00001 /* 00002 * Copyright © {1999}, International Business Machines Corporation and others. All Rights Reserved. 00003 ********************************************************************** 00004 * Date Name Description 00005 * 11/17/99 aliu Creation. 00006 ********************************************************************** 00007 */ 00008 #ifndef TRANSLIT_H 00009 #define TRANSLIT_H 00010 00011 #include "unicode/unistr.h" 00012 #include "unicode/parseerr.h" 00013 #include "unicode/utrans.h" // UTransPosition, UTransDirection 00014 00015 class Replaceable; 00016 class UnicodeFilter; 00017 class TransliterationRuleData; 00018 class Hashtable; 00019 class U_I18N_API UVector; 00020 class CompoundTransliterator; 00021 00225 class U_I18N_API Transliterator { 00226 00227 private: 00228 00232 UnicodeString ID; 00233 00240 UnicodeFilter* filter; 00241 00242 int32_t maximumContextLength; 00243 00266 static Hashtable* cache; 00267 00271 static UMTX cacheMutex; 00272 00282 static UBool cacheInitialized; 00283 00297 struct CacheEntry { 00298 enum Type { 00299 RULE_BASED_PLACEHOLDER, 00300 REVERSE_RULE_BASED_PLACEHOLDER, 00301 PROTOTYPE, 00302 RBT_DATA, 00303 NONE // Only used for uninitialized entries 00304 } entryType; 00305 UnicodeString rbFile; // For *PLACEHOLDER 00306 union { 00307 Transliterator* prototype; // For PROTOTYPE 00308 TransliterationRuleData* data; // For RBT_DATA 00309 } u; 00310 CacheEntry(); 00311 ~CacheEntry(); 00312 void adoptPrototype(Transliterator* adopted); 00313 }; 00314 00320 static const char* RB_DISPLAY_NAME_PREFIX; 00321 00327 static const char* RB_SCRIPT_DISPLAY_NAME_PREFIX; 00328 00335 static const char* RB_DISPLAY_NAME_PATTERN; 00336 00343 static const char* RB_RULE_BASED_IDS; 00344 00348 static const char* RB_RULE; 00349 00350 protected: 00351 00360 Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); 00361 00365 Transliterator(const Transliterator&); 00366 00370 Transliterator& operator=(const Transliterator&); 00371 00372 public: 00373 00378 virtual ~Transliterator(); 00379 00393 virtual Transliterator* clone() const { return 0; } 00394 00413 virtual int32_t transliterate(Replaceable& text, 00414 int32_t start, int32_t limit) const; 00415 00421 virtual void transliterate(Replaceable& text) const; 00422 00485 virtual void transliterate(Replaceable& text, UTransPosition& index, 00486 const UnicodeString& insertion, 00487 UErrorCode& status) const; 00488 00505 virtual void transliterate(Replaceable& text, UTransPosition& index, 00506 UChar insertion, 00507 UErrorCode& status) const; 00508 00521 virtual void transliterate(Replaceable& text, UTransPosition& index, 00522 UErrorCode& status) const; 00523 00535 virtual void finishTransliteration(Replaceable& text, 00536 UTransPosition& index) const; 00537 00538 private: 00539 00547 void _transliterate(Replaceable& text, 00548 UTransPosition& index, 00549 const UnicodeString* insertion, 00550 UErrorCode &status) const; 00551 00552 protected: 00553 00577 virtual void handleTransliterate(Replaceable& text, 00578 UTransPosition& index, 00579 UBool incremental) const = 0; 00580 00581 // C++ requires this friend declaration so CompoundTransliterator 00582 // can access handleTransliterate. Alternatively, we could 00583 // make handleTransliterate public. 00584 friend class CompoundTransliterator; 00585 00586 public: 00587 00601 int32_t getMaximumContextLength(void) const; 00602 00603 protected: 00604 00609 void setMaximumContextLength(int32_t maxContextLength); 00610 00611 public: 00612 00622 virtual const UnicodeString& getID(void) const; 00623 00630 static UnicodeString& getDisplayName(const UnicodeString& ID, 00631 UnicodeString& result); 00632 00652 static UnicodeString& getDisplayName(const UnicodeString& ID, 00653 const Locale& inLocale, 00654 UnicodeString& result); 00655 00661 virtual const UnicodeFilter* getFilter(void) const; 00662 00669 UnicodeFilter* orphanFilter(void); 00670 00680 virtual void adoptFilter(UnicodeFilter* adoptedFilter); 00681 00703 Transliterator* createInverse(void) const; 00704 00718 static Transliterator* createInstance(const UnicodeString& ID, 00719 UTransDirection dir = UTRANS_FORWARD, 00720 UParseError* parseError = 0); 00721 00722 private: 00723 00728 static Transliterator* _createInstance(const UnicodeString& ID, 00729 UParseError* parseError = 0); 00730 00731 public: 00732 00750 static void registerInstance(Transliterator* adoptedObj, 00751 UErrorCode& status); 00752 00753 private: 00754 00759 static void _registerInstance(Transliterator* adoptedPrototype, 00760 UErrorCode &status); 00761 00762 public: 00763 00775 static void unregister(const UnicodeString& ID); 00776 00777 private: 00778 00784 static void _unregister(const UnicodeString& ID); 00785 00797 // virtual Enumeration getAvailableIDs(); 00798 00802 static UVector cacheIDs; 00803 00804 public: 00805 00812 static int32_t countAvailableIDs(void); 00813 00820 static const UnicodeString& getAvailableID(int32_t index); 00821 00822 protected: 00823 00828 UChar filteredCharAt(const Replaceable& text, int32_t i) const; 00829 00834 void setID(const UnicodeString& id); 00835 00836 private: 00841 static UBool compareIDs(void* a, void* b); 00842 00843 static void initializeCache(void); 00844 00845 /* IDs take the form <source> ID_SEP <target>, where 00846 * <source> and <target> are (usually) script names. 00847 * Compound IDs take the form <ID> ( ID_DELIM <ID> )+. 00848 */ 00849 static const UChar ID_SEP; // ((UChar)0x002D) /*-*/ 00850 static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/ 00851 }; 00852 00853 inline int32_t Transliterator::getMaximumContextLength(void) const { 00854 return maximumContextLength; 00855 } 00856 00857 inline void Transliterator::setID(const UnicodeString& id) { 00858 ID = id; 00859 } 00860 00861 #endif