rbbi.h

Go to the documentation of this file.
00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2004 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00019 #if !UCONFIG_NO_BREAK_ITERATION 00020 00021 #include "unicode/brkiter.h" 00022 #include "unicode/udata.h" 00023 #include "unicode/parseerr.h" 00024 00025 struct UTrie; 00026 00027 U_NAMESPACE_BEGIN 00028 00030 struct RBBIDataHeader; 00031 class RuleBasedBreakIteratorTables; 00032 class BreakIterator; 00033 class RBBIDataWrapper; 00034 struct RBBIStateTable; 00035 00036 00037 00052 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00053 00054 protected: 00059 CharacterIterator* fText; 00060 00065 RBBIDataWrapper *fData; 00066 00070 int32_t fLastRuleStatusIndex; 00071 00078 UBool fLastStatusIndexValid; 00079 00087 uint32_t fDictionaryCharCount; 00088 00093 static UBool fTrace; 00094 00095 00096 protected: 00097 //======================================================================= 00098 // constructors 00099 //======================================================================= 00100 00111 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00112 00113 friend class RBBIRuleBuilder; 00114 friend class BreakIterator; 00115 00116 00117 00118 public: 00119 00124 RuleBasedBreakIterator(); 00125 00132 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00133 00142 RuleBasedBreakIterator( const UnicodeString &rules, 00143 UParseError &parseError, 00144 UErrorCode &status); 00145 00146 00159 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00160 00165 virtual ~RuleBasedBreakIterator(); 00166 00174 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00175 00184 virtual UBool operator==(const BreakIterator& that) const; 00185 00193 UBool operator!=(const BreakIterator& that) const; 00194 00205 virtual BreakIterator* clone() const; 00206 00212 virtual int32_t hashCode(void) const; 00213 00219 virtual const UnicodeString& getRules(void) const; 00220 00221 //======================================================================= 00222 // BreakIterator overrides 00223 //======================================================================= 00224 00233 virtual const CharacterIterator& getText(void) const; 00234 00235 00243 virtual void adoptText(CharacterIterator* newText); 00244 00251 virtual void setText(const UnicodeString& newText); 00252 00259 virtual int32_t first(void); 00260 00267 virtual int32_t last(void); 00268 00279 virtual int32_t next(int32_t n); 00280 00286 virtual int32_t next(void); 00287 00293 virtual int32_t previous(void); 00294 00302 virtual int32_t following(int32_t offset); 00303 00311 virtual int32_t preceding(int32_t offset); 00312 00321 virtual UBool isBoundary(int32_t offset); 00322 00328 virtual int32_t current(void) const; 00329 00330 00363 virtual int32_t getRuleStatus() const; 00364 00388 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 00389 00401 virtual UClassID getDynamicClassID(void) const; 00402 00414 static UClassID getStaticClassID(void); 00415 00416 /* 00417 * Create a clone (copy) of this break iterator in memory provided 00418 * by the caller. The idea is to increase performance by avoiding 00419 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00420 * Performance gains are minimal, and correct buffer management is 00421 * tricky. Use clone() instead. 00422 * 00423 * @param stackBuffer The pointer to the memory into which the cloned object 00424 * should be placed. If NULL, allocate heap memory 00425 * for the cloned object. 00426 * @param BufferSize The size of the buffer. If zero, return the required 00427 * buffer size, but do not clone the object. If the 00428 * size was too small (but not zero), allocate heap 00429 * storage for the cloned object. 00430 * 00431 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00432 * returned if the the provided buffer was too small, and 00433 * the clone was therefore put on the heap. 00434 * 00435 * @return Pointer to the clone object. This may differ from the stackBuffer 00436 * address if the byte alignment of the stack buffer was not suitable 00437 * or if the stackBuffer was too small to hold the clone. 00438 * @stable ICU 2.0 00439 */ 00440 virtual BreakIterator * createBufferClone(void *stackBuffer, 00441 int32_t &BufferSize, 00442 UErrorCode &status); 00443 00444 00462 virtual const uint8_t *getBinaryRules(uint32_t &length); 00463 00464 00465 protected: 00466 //======================================================================= 00467 // implementation 00468 //======================================================================= 00477 virtual int32_t handleNext(void); 00478 00487 virtual int32_t handlePrevious(void); 00488 00495 virtual void reset(void); 00496 00505 virtual UBool isDictionaryChar(UChar32); 00506 00512 void init(); 00513 00514 private: 00515 00525 int32_t handlePrevious(const RBBIStateTable *statetable); 00526 00536 int32_t handleNext(const RBBIStateTable *statetable); 00537 00541 void makeRuleStatusValid(); 00542 00543 }; 00544 00545 //------------------------------------------------------------------------------ 00546 // 00547 // Inline Functions Definitions ... 00548 // 00549 //------------------------------------------------------------------------------ 00550 00551 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00552 return !operator==(that); 00553 } 00554 00555 U_NAMESPACE_END 00556 00557 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00558 00559 #endif

Generated on Fri Jun 18 12:35:58 2004 for ICU by doxygen 1.3.7