/* ******************************************************************** * COPYRIGHT: * Copyright (c) 1996-1999, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** */ #ifndef NORMLZR_H #define NORMLZR_H #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/chariter.h" /* forward declaration */ class ComposedCharIter; class U_COMMON_API Normalizer { public: // This tells us what the bits in the "mode" mean. enum { COMPAT_BIT = 1, DECOMP_BIT = 2, COMPOSE_BIT = 4 }; enum { DONE=0xffff }; enum EMode { NO_OP = 0, COMPOSE = COMPOSE_BIT, COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT, DECOMP = DECOMP_BIT, DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT }; enum { IGNORE_HANGUL = 0x001 }; // Constructors Normalizer(const UnicodeString& str, EMode mode); Normalizer(const UnicodeString& str, EMode mode, int32_t opt); Normalizer(const UChar* str, int32_t length, EMode mode); Normalizer(const UChar* str, int32_t length, EMode mode, int32_t option); Normalizer(const CharacterIterator& iter, EMode mode); Normalizer(const CharacterIterator& iter, EMode mode, int32_t opt); Normalizer(const Normalizer& copy); ~Normalizer(); //------------------------------------------------------------------------- // Static utility methods //------------------------------------------------------------------------- static void normalize(const UnicodeString& source, EMode mode, int32_t options, UnicodeString& result, UErrorCode &status); static void compose(const UnicodeString& source, UBool compat, int32_t options, UnicodeString& result, UErrorCode &status); static void decompose(const UnicodeString& source, UBool compat, int32_t options, UnicodeString& result, UErrorCode &status); //------------------------------------------------------------------------- // CharacterIterator overrides //------------------------------------------------------------------------- UChar32 current(void) const; UChar32 first(void); UChar32 last(void); UChar32 next(void); UChar32 previous(void); UChar32 setIndex(UTextOffset index); void reset(void); UTextOffset getIndex(void) const; UTextOffset startIndex(void) const; UTextOffset endIndex(void) const; // virtual UBool operator==(const CharacterIterator& that) const; UBool operator==(const Normalizer& that) const; inline UBool operator!=(const Normalizer& that) const; Normalizer* clone(void) const; int32_t hashCode(void) const; //------------------------------------------------------------------------- // Property access methods //------------------------------------------------------------------------- void setMode(EMode newMode); EMode getMode(void) const; void setOption(int32_t option, UBool value); UBool getOption(int32_t option) const; void setText(const UnicodeString& newText, UErrorCode &status); void setText(const CharacterIterator& newText, UErrorCode &status); void setText(const UChar* newText, int32_t length, UErrorCode &status); void getText(UnicodeString& result); const UChar* getText(int32_t& count); private: // Private utility methods for iteration // For documentation, see the source code UChar nextCompose(void); UChar prevCompose(void); UChar nextDecomp(void); UChar prevDecomp(void); UChar curForward(void); UChar curBackward(void); void init(CharacterIterator* iter, EMode mode, int32_t option); void initBuffer(void); void clearBuffer(void); // Utilities used by Compose static void bubbleAppend(UnicodeString& target, UChar ch, uint32_t cclass); static uint32_t getComposeClass(UChar ch); static uint16_t composeLookup(UChar ch); static uint16_t composeAction(uint16_t baseIndex, uint16_t comIndex); static void explode(UnicodeString& target, uint16_t index); static UChar pairExplode(UnicodeString& target, uint16_t action); // Utilities used by Decompose static void fixCanonical(UnicodeString& result); // Reorders combining marks static uint8_t getClass(UChar ch); // Gets char's combining class // Other static utility methods static void doAppend(const UChar source[], uint16_t offset, UnicodeString& dest); static void doInsert(const UChar source[], uint16_t offset, UnicodeString& dest, UTextOffset pos); static uint16_t doReplace(const UChar source[], uint16_t offset, UnicodeString& dest, UTextOffset pos); static void hangulToJamo(UChar ch, UnicodeString& result, uint16_t decompLimit); static void jamoAppend(UChar ch, uint16_t decompLimit, UnicodeString& dest); static void jamoToHangul(UnicodeString& buffer, UTextOffset start); //------------------------------------------------------------------------- // Private data //------------------------------------------------------------------------- EMode fMode; int32_t fOptions; int16_t minDecomp; // The input text and our position in it CharacterIterator* text; // A buffer for holding intermediate results UnicodeString buffer; UTextOffset bufferPos; UTextOffset bufferLimit; UChar currentChar; // Another buffer for use during iterative composition UnicodeString explodeBuf; enum { EMPTY = -1, STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder STR_LENGTH_MASK = 0x0003 }; enum { HANGUL_BASE = 0xac00, HANGUL_LIMIT = 0xd7a4, JAMO_LBASE = 0x1100, JAMO_VBASE = 0x1161, JAMO_TBASE = 0x11a7, JAMO_LCOUNT = 19, JAMO_VCOUNT = 21, JAMO_TCOUNT = 28, JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT }; friend class ComposedCharIter; }; inline UBool Normalizer::operator!= (const Normalizer& other) const { return ! operator==(other); } #endif // _NORMLZR