alphaindex.cpp 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2009-2014, International Business Machines Corporation and
  6. * others. All Rights Reserved.
  7. *******************************************************************************
  8. */
  9. #include "unicode/utypes.h"
  10. #if !UCONFIG_NO_COLLATION
  11. #include "unicode/alphaindex.h"
  12. #include "unicode/coll.h"
  13. #include "unicode/localpointer.h"
  14. #include "unicode/normalizer2.h"
  15. #include "unicode/tblcoll.h"
  16. #include "unicode/uchar.h"
  17. #include "unicode/ulocdata.h"
  18. #include "unicode/uniset.h"
  19. #include "unicode/uobject.h"
  20. #include "unicode/usetiter.h"
  21. #include "unicode/utf16.h"
  22. #include "cmemory.h"
  23. #include "cstring.h"
  24. #include "uassert.h"
  25. #include "uvector.h"
  26. #include "uvectr64.h"
  27. //#include <string>
  28. //#include <iostream>
  29. U_NAMESPACE_BEGIN
  30. namespace {
  31. /**
  32. * Prefix string for Chinese index buckets.
  33. * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
  34. */
  35. const UChar BASE[1] = { 0xFDD0 };
  36. const int32_t BASE_LENGTH = 1;
  37. UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
  38. const UnicodeString &one, const UnicodeString &other);
  39. } // namespace
  40. static int32_t U_CALLCONV
  41. collatorComparator(const void *context, const void *left, const void *right);
  42. static int32_t U_CALLCONV
  43. recordCompareFn(const void *context, const void *left, const void *right);
  44. // UVector<Record *> support function, delete a Record.
  45. static void U_CALLCONV
  46. alphaIndex_deleteRecord(void *obj) {
  47. delete static_cast<AlphabeticIndex::Record *>(obj);
  48. }
  49. namespace {
  50. UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
  51. UErrorCode &errorCode) {
  52. if (U_FAILURE(errorCode)) { return NULL; }
  53. if (owned.isValid()) {
  54. return owned.orphan();
  55. }
  56. UnicodeString *p = new UnicodeString(s);
  57. if (p == NULL) {
  58. errorCode = U_MEMORY_ALLOCATION_ERROR;
  59. }
  60. return p;
  61. }
  62. inline UnicodeString *getString(const UVector &list, int32_t i) {
  63. return static_cast<UnicodeString *>(list[i]);
  64. }
  65. inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
  66. return static_cast<AlphabeticIndex::Bucket *>(list[i]);
  67. }
  68. inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
  69. return static_cast<AlphabeticIndex::Record *>(list[i]);
  70. }
  71. /**
  72. * Like Java Collections.binarySearch(List, String, Comparator).
  73. *
  74. * @return the index>=0 where the item was found,
  75. * or the index<0 for inserting the string at ~index in sorted order
  76. */
  77. int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
  78. if (list.size() == 0) { return ~0; }
  79. int32_t start = 0;
  80. int32_t limit = list.size();
  81. for (;;) {
  82. int32_t i = (start + limit) / 2;
  83. const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
  84. UErrorCode errorCode = U_ZERO_ERROR;
  85. UCollationResult cmp = coll.compare(s, *si, errorCode);
  86. if (cmp == UCOL_EQUAL) {
  87. return i;
  88. } else if (cmp < 0) {
  89. if (i == start) {
  90. return ~start; // insert s before *si
  91. }
  92. limit = i;
  93. } else {
  94. if (i == start) {
  95. return ~(start + 1); // insert s after *si
  96. }
  97. start = i;
  98. }
  99. }
  100. }
  101. } // namespace
  102. // The BucketList is not in the anonymous namespace because only Clang
  103. // seems to support its use in other classes from there.
  104. // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
  105. class BucketList : public UObject {
  106. public:
  107. BucketList(UVector *bucketList, UVector *publicBucketList)
  108. : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
  109. int32_t displayIndex = 0;
  110. for (int32_t i = 0; i < publicBucketList->size(); ++i) {
  111. getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
  112. }
  113. }
  114. // The virtual destructor must not be inline.
  115. // See ticket #8454 for details.
  116. virtual ~BucketList();
  117. int32_t getBucketCount() const {
  118. return immutableVisibleList_->size();
  119. }
  120. int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
  121. UErrorCode &errorCode) {
  122. // binary search
  123. int32_t start = 0;
  124. int32_t limit = bucketList_->size();
  125. while ((start + 1) < limit) {
  126. int32_t i = (start + limit) / 2;
  127. const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
  128. UCollationResult nameVsBucket =
  129. collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
  130. if (nameVsBucket < 0) {
  131. limit = i;
  132. } else {
  133. start = i;
  134. }
  135. }
  136. const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
  137. if (bucket->displayBucket_ != NULL) {
  138. bucket = bucket->displayBucket_;
  139. }
  140. return bucket->displayIndex_;
  141. }
  142. /** All of the buckets, visible and invisible. */
  143. UVector *bucketList_;
  144. /** Just the visible buckets. */
  145. UVector *immutableVisibleList_;
  146. };
  147. BucketList::~BucketList() {
  148. delete bucketList_;
  149. if (immutableVisibleList_ != bucketList_) {
  150. delete immutableVisibleList_;
  151. }
  152. }
  153. AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
  154. delete buckets_;
  155. delete collatorPrimaryOnly_;
  156. }
  157. int32_t
  158. AlphabeticIndex::ImmutableIndex::getBucketCount() const {
  159. return buckets_->getBucketCount();
  160. }
  161. int32_t
  162. AlphabeticIndex::ImmutableIndex::getBucketIndex(
  163. const UnicodeString &name, UErrorCode &errorCode) const {
  164. return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
  165. }
  166. const AlphabeticIndex::Bucket *
  167. AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
  168. if (0 <= index && index < buckets_->getBucketCount()) {
  169. return icu::getBucket(*buckets_->immutableVisibleList_, index);
  170. } else {
  171. return NULL;
  172. }
  173. }
  174. AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
  175. : inputList_(NULL),
  176. labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
  177. maxLabelCount_(99),
  178. initialLabels_(NULL), firstCharsInScripts_(NULL),
  179. collator_(NULL), collatorPrimaryOnly_(NULL),
  180. buckets_(NULL) {
  181. init(&locale, status);
  182. }
  183. AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
  184. : inputList_(NULL),
  185. labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
  186. maxLabelCount_(99),
  187. initialLabels_(NULL), firstCharsInScripts_(NULL),
  188. collator_(collator), collatorPrimaryOnly_(NULL),
  189. buckets_(NULL) {
  190. init(NULL, status);
  191. }
  192. AlphabeticIndex::~AlphabeticIndex() {
  193. delete collator_;
  194. delete collatorPrimaryOnly_;
  195. delete firstCharsInScripts_;
  196. delete buckets_;
  197. delete inputList_;
  198. delete initialLabels_;
  199. }
  200. AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
  201. if (U_FAILURE(status)) {
  202. return *this;
  203. }
  204. initialLabels_->addAll(additions);
  205. clearBuckets();
  206. return *this;
  207. }
  208. AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
  209. addIndexExemplars(locale, status);
  210. clearBuckets();
  211. return *this;
  212. }
  213. AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
  214. if (U_FAILURE(errorCode)) { return NULL; }
  215. // In C++, the ImmutableIndex must own its copy of the BucketList,
  216. // even if it contains no records, for proper memory management.
  217. // We could clone the buckets_ if they are not NULL,
  218. // but that would be worth it only if this method is called multiple times,
  219. // or called after using the old-style bucket iterator API.
  220. LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
  221. LocalPointer<RuleBasedCollator> coll(
  222. static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
  223. if (immutableBucketList.isNull() || coll.isNull()) {
  224. errorCode = U_MEMORY_ALLOCATION_ERROR;
  225. return NULL;
  226. }
  227. ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
  228. if (immIndex == NULL) {
  229. errorCode = U_MEMORY_ALLOCATION_ERROR;
  230. return NULL;
  231. }
  232. // The ImmutableIndex adopted its parameter objects.
  233. immutableBucketList.orphan();
  234. coll.orphan();
  235. return immIndex;
  236. }
  237. int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
  238. initBuckets(status);
  239. if (U_FAILURE(status)) {
  240. return 0;
  241. }
  242. return buckets_->getBucketCount();
  243. }
  244. int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
  245. if (U_FAILURE(status) || inputList_ == NULL) {
  246. return 0;
  247. }
  248. return inputList_->size();
  249. }
  250. void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
  251. const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
  252. if (U_FAILURE(errorCode)) { return; }
  253. const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
  254. const UnicodeString &overflowBoundary =
  255. *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
  256. // We make a sorted array of elements.
  257. // Some of the input may be redundant.
  258. // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
  259. // We filter out those cases.
  260. UnicodeSetIterator iter(*initialLabels_);
  261. while (iter.next()) {
  262. const UnicodeString *item = &iter.getString();
  263. LocalPointer<UnicodeString> ownedItem;
  264. UBool checkDistinct;
  265. int32_t itemLength = item->length();
  266. if (!item->hasMoreChar32Than(0, itemLength, 1)) {
  267. checkDistinct = FALSE;
  268. } else if(item->charAt(itemLength - 1) == 0x2a && // '*'
  269. item->charAt(itemLength - 2) != 0x2a) {
  270. // Use a label if it is marked with one trailing star,
  271. // even if the label string sorts the same when all contractions are suppressed.
  272. ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
  273. item = ownedItem.getAlias();
  274. if (item == NULL) {
  275. errorCode = U_MEMORY_ALLOCATION_ERROR;
  276. return;
  277. }
  278. checkDistinct = FALSE;
  279. } else {
  280. checkDistinct = TRUE;
  281. }
  282. if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
  283. // Ignore a primary-ignorable or non-alphabetic index character.
  284. } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
  285. // Ignore an index character that will land in the overflow bucket.
  286. } else if (checkDistinct &&
  287. collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
  288. // Ignore a multi-code point index character that does not sort distinctly
  289. // from the sequence of its separate characters.
  290. } else {
  291. int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
  292. if (insertionPoint < 0) {
  293. indexCharacters.insertElementAt(
  294. ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
  295. } else {
  296. const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
  297. if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
  298. indexCharacters.setElementAt(
  299. ownedString(*item, ownedItem, errorCode), insertionPoint);
  300. }
  301. }
  302. }
  303. }
  304. if (U_FAILURE(errorCode)) { return; }
  305. // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
  306. int32_t size = indexCharacters.size() - 1;
  307. if (size > maxLabelCount_) {
  308. int32_t count = 0;
  309. int32_t old = -1;
  310. for (int32_t i = 0; i < indexCharacters.size();) {
  311. ++count;
  312. int32_t bump = count * maxLabelCount_ / size;
  313. if (bump == old) {
  314. indexCharacters.removeElementAt(i);
  315. } else {
  316. old = bump;
  317. ++i;
  318. }
  319. }
  320. }
  321. }
  322. namespace {
  323. const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
  324. if (!current.startsWith(BASE, BASE_LENGTH)) {
  325. return current;
  326. }
  327. UChar rest = current.charAt(BASE_LENGTH);
  328. if (0x2800 < rest && rest <= 0x28FF) { // stroke count
  329. int32_t count = rest-0x2800;
  330. temp.setTo((UChar)(0x30 + count % 10));
  331. if (count >= 10) {
  332. count /= 10;
  333. temp.insert(0, (UChar)(0x30 + count % 10));
  334. if (count >= 10) {
  335. count /= 10;
  336. temp.insert(0, (UChar)(0x30 + count));
  337. }
  338. }
  339. return temp.append((UChar)0x5283);
  340. }
  341. return temp.setTo(current, BASE_LENGTH);
  342. }
  343. UBool hasMultiplePrimaryWeights(
  344. const RuleBasedCollator &coll, uint32_t variableTop,
  345. const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
  346. ces.removeAllElements();
  347. coll.internalGetCEs(s, ces, errorCode);
  348. if (U_FAILURE(errorCode)) { return FALSE; }
  349. UBool seenPrimary = FALSE;
  350. for (int32_t i = 0; i < ces.size(); ++i) {
  351. int64_t ce = ces.elementAti(i);
  352. uint32_t p = (uint32_t)(ce >> 32);
  353. if (p > variableTop) {
  354. // not primary ignorable
  355. if (seenPrimary) {
  356. return TRUE;
  357. }
  358. seenPrimary = TRUE;
  359. }
  360. }
  361. return FALSE;
  362. }
  363. } // namespace
  364. BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
  365. // Initialize indexCharacters.
  366. UVector indexCharacters(errorCode);
  367. indexCharacters.setDeleter(uprv_deleteUObject);
  368. initLabels(indexCharacters, errorCode);
  369. if (U_FAILURE(errorCode)) { return NULL; }
  370. // Variables for hasMultiplePrimaryWeights().
  371. UVector64 ces(errorCode);
  372. uint32_t variableTop;
  373. if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
  374. variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
  375. } else {
  376. variableTop = 0;
  377. }
  378. UBool hasInvisibleBuckets = FALSE;
  379. // Helper arrays for Chinese Pinyin collation.
  380. Bucket *asciiBuckets[26] = {
  381. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  382. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  383. };
  384. Bucket *pinyinBuckets[26] = {
  385. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  386. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  387. };
  388. UBool hasPinyin = FALSE;
  389. LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
  390. if (U_FAILURE(errorCode)) {
  391. return NULL;
  392. }
  393. bucketList->setDeleter(uprv_deleteUObject);
  394. // underflow bucket
  395. Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
  396. if (bucket == NULL) {
  397. errorCode = U_MEMORY_ALLOCATION_ERROR;
  398. return NULL;
  399. }
  400. bucketList->addElement(bucket, errorCode);
  401. if (U_FAILURE(errorCode)) { return NULL; }
  402. UnicodeString temp;
  403. // fix up the list, adding underflow, additions, overflow
  404. // Insert inflow labels as needed.
  405. int32_t scriptIndex = -1;
  406. const UnicodeString *scriptUpperBoundary = &emptyString_;
  407. for (int32_t i = 0; i < indexCharacters.size(); ++i) {
  408. UnicodeString &current = *getString(indexCharacters, i);
  409. if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
  410. // We crossed the script boundary into a new script.
  411. const UnicodeString &inflowBoundary = *scriptUpperBoundary;
  412. UBool skippedScript = FALSE;
  413. for (;;) {
  414. scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
  415. if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
  416. break;
  417. }
  418. skippedScript = TRUE;
  419. }
  420. if (skippedScript && bucketList->size() > 1) {
  421. // We are skipping one or more scripts,
  422. // and we are not just getting out of the underflow label.
  423. bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
  424. if (bucket == NULL) {
  425. errorCode = U_MEMORY_ALLOCATION_ERROR;
  426. return NULL;
  427. }
  428. bucketList->addElement(bucket, errorCode);
  429. }
  430. }
  431. // Add a bucket with the current label.
  432. bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
  433. if (bucket == NULL) {
  434. errorCode = U_MEMORY_ALLOCATION_ERROR;
  435. return NULL;
  436. }
  437. bucketList->addElement(bucket, errorCode);
  438. // Remember ASCII and Pinyin buckets for Pinyin redirects.
  439. UChar c;
  440. if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
  441. asciiBuckets[c - 0x41] = bucket;
  442. } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
  443. 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
  444. pinyinBuckets[c - 0x41] = bucket;
  445. hasPinyin = TRUE;
  446. }
  447. // Check for multiple primary weights.
  448. if (!current.startsWith(BASE, BASE_LENGTH) &&
  449. hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
  450. ces, errorCode) &&
  451. current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
  452. // "AE-ligature" or "Sch" etc.
  453. for (int32_t i = bucketList->size() - 2;; --i) {
  454. Bucket *singleBucket = getBucket(*bucketList, i);
  455. if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
  456. // There is no single-character bucket since the last
  457. // underflow or inflow label.
  458. break;
  459. }
  460. if (singleBucket->displayBucket_ == NULL &&
  461. !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
  462. singleBucket->lowerBoundary_,
  463. ces, errorCode)) {
  464. // Add an invisible bucket that redirects strings greater than the expansion
  465. // to the previous single-character bucket.
  466. // For example, after ... Q R S Sch we add Sch\uFFFF->S
  467. // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
  468. bucket = new Bucket(emptyString_,
  469. UnicodeString(current).append((UChar)0xFFFF),
  470. U_ALPHAINDEX_NORMAL);
  471. if (bucket == NULL) {
  472. errorCode = U_MEMORY_ALLOCATION_ERROR;
  473. return NULL;
  474. }
  475. bucket->displayBucket_ = singleBucket;
  476. bucketList->addElement(bucket, errorCode);
  477. hasInvisibleBuckets = TRUE;
  478. break;
  479. }
  480. }
  481. }
  482. }
  483. if (U_FAILURE(errorCode)) { return NULL; }
  484. if (bucketList->size() == 1) {
  485. // No real labels, show only the underflow label.
  486. BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
  487. if (bl == NULL) {
  488. errorCode = U_MEMORY_ALLOCATION_ERROR;
  489. return NULL;
  490. }
  491. bucketList.orphan();
  492. return bl;
  493. }
  494. // overflow bucket
  495. bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
  496. if (bucket == NULL) {
  497. errorCode = U_MEMORY_ALLOCATION_ERROR;
  498. return NULL;
  499. }
  500. bucketList->addElement(bucket, errorCode); // final
  501. if (hasPinyin) {
  502. // Redirect Pinyin buckets.
  503. Bucket *asciiBucket = NULL;
  504. for (int32_t i = 0; i < 26; ++i) {
  505. if (asciiBuckets[i] != NULL) {
  506. asciiBucket = asciiBuckets[i];
  507. }
  508. if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
  509. pinyinBuckets[i]->displayBucket_ = asciiBucket;
  510. hasInvisibleBuckets = TRUE;
  511. }
  512. }
  513. }
  514. if (U_FAILURE(errorCode)) { return NULL; }
  515. if (!hasInvisibleBuckets) {
  516. BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
  517. if (bl == NULL) {
  518. errorCode = U_MEMORY_ALLOCATION_ERROR;
  519. return NULL;
  520. }
  521. bucketList.orphan();
  522. return bl;
  523. }
  524. // Merge inflow buckets that are visually adjacent.
  525. // Iterate backwards: Merge inflow into overflow rather than the other way around.
  526. int32_t i = bucketList->size() - 1;
  527. Bucket *nextBucket = getBucket(*bucketList, i);
  528. while (--i > 0) {
  529. bucket = getBucket(*bucketList, i);
  530. if (bucket->displayBucket_ != NULL) {
  531. continue; // skip invisible buckets
  532. }
  533. if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
  534. if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
  535. bucket->displayBucket_ = nextBucket;
  536. continue;
  537. }
  538. }
  539. nextBucket = bucket;
  540. }
  541. LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
  542. if (U_FAILURE(errorCode)) {
  543. return NULL;
  544. }
  545. // Do not call publicBucketList->setDeleter():
  546. // This vector shares its objects with the bucketList.
  547. for (int32_t i = 0; i < bucketList->size(); ++i) {
  548. bucket = getBucket(*bucketList, i);
  549. if (bucket->displayBucket_ == NULL) {
  550. publicBucketList->addElement(bucket, errorCode);
  551. }
  552. }
  553. if (U_FAILURE(errorCode)) { return NULL; }
  554. BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
  555. if (bl == NULL) {
  556. errorCode = U_MEMORY_ALLOCATION_ERROR;
  557. return NULL;
  558. }
  559. bucketList.orphan();
  560. publicBucketList.orphan();
  561. return bl;
  562. }
  563. /**
  564. * Creates an index, and buckets and sorts the list of records into the index.
  565. */
  566. void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
  567. if (U_FAILURE(errorCode) || buckets_ != NULL) {
  568. return;
  569. }
  570. buckets_ = createBucketList(errorCode);
  571. if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
  572. return;
  573. }
  574. // Sort the records by name.
  575. // Stable sort preserves input order of collation duplicates.
  576. inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
  577. // Now, we traverse all of the input, which is now sorted.
  578. // If the item doesn't go in the current bucket, we find the next bucket that contains it.
  579. // This makes the process order n*log(n), since we just sort the list and then do a linear process.
  580. // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
  581. // we need to improve it for that case.
  582. Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
  583. int32_t bucketIndex = 1;
  584. Bucket *nextBucket;
  585. const UnicodeString *upperBoundary;
  586. if (bucketIndex < buckets_->bucketList_->size()) {
  587. nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
  588. upperBoundary = &nextBucket->lowerBoundary_;
  589. } else {
  590. nextBucket = NULL;
  591. upperBoundary = NULL;
  592. }
  593. for (int32_t i = 0; i < inputList_->size(); ++i) {
  594. Record *r = getRecord(*inputList_, i);
  595. // if the current bucket isn't the right one, find the one that is
  596. // We have a special flag for the last bucket so that we don't look any further
  597. while (upperBoundary != NULL &&
  598. collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
  599. currentBucket = nextBucket;
  600. // now reset the boundary that we compare against
  601. if (bucketIndex < buckets_->bucketList_->size()) {
  602. nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
  603. upperBoundary = &nextBucket->lowerBoundary_;
  604. } else {
  605. upperBoundary = NULL;
  606. }
  607. }
  608. // now put the record into the bucket.
  609. Bucket *bucket = currentBucket;
  610. if (bucket->displayBucket_ != NULL) {
  611. bucket = bucket->displayBucket_;
  612. }
  613. if (bucket->records_ == NULL) {
  614. bucket->records_ = new UVector(errorCode);
  615. if (bucket->records_ == NULL) {
  616. errorCode = U_MEMORY_ALLOCATION_ERROR;
  617. return;
  618. }
  619. }
  620. bucket->records_->addElement(r, errorCode);
  621. }
  622. }
  623. void AlphabeticIndex::clearBuckets() {
  624. if (buckets_ != NULL) {
  625. delete buckets_;
  626. buckets_ = NULL;
  627. internalResetBucketIterator();
  628. }
  629. }
  630. void AlphabeticIndex::internalResetBucketIterator() {
  631. labelsIterIndex_ = -1;
  632. currentBucket_ = NULL;
  633. }
  634. void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
  635. LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
  636. if (U_FAILURE(status)) {
  637. return;
  638. }
  639. UnicodeSet exemplars;
  640. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
  641. if (U_SUCCESS(status)) {
  642. initialLabels_->addAll(exemplars);
  643. return;
  644. }
  645. status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
  646. // The locale data did not include explicit Index characters.
  647. // Synthesize a set of them from the locale's standard exemplar characters.
  648. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
  649. if (U_FAILURE(status)) {
  650. return;
  651. }
  652. // question: should we add auxiliary exemplars?
  653. if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
  654. exemplars.add(0x61, 0x7A);
  655. }
  656. if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
  657. // cut down to small list
  658. exemplars.remove(0xAC00, 0xD7A3).
  659. add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
  660. add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
  661. add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
  662. add(0xD30C).add(0xD558);
  663. }
  664. if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
  665. // cut down to small list
  666. // make use of the fact that Ethiopic is allocated in 8's, where
  667. // the base is 0 mod 8.
  668. UnicodeSet ethiopic(
  669. UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
  670. UnicodeSetIterator it(ethiopic);
  671. while (it.next() && !it.isString()) {
  672. if ((it.getCodepoint() & 0x7) != 0) {
  673. exemplars.remove(it.getCodepoint());
  674. }
  675. }
  676. }
  677. // Upper-case any that aren't already so.
  678. // (We only do this for synthesized index characters.)
  679. UnicodeSetIterator it(exemplars);
  680. UnicodeString upperC;
  681. while (it.next()) {
  682. const UnicodeString &exemplarC = it.getString();
  683. upperC = exemplarC;
  684. upperC.toUpper(locale);
  685. initialLabels_->add(upperC);
  686. }
  687. }
  688. UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
  689. UnicodeSet contractions;
  690. collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
  691. if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
  692. initialLabels_->addAll(contractions);
  693. UnicodeSetIterator iter(contractions);
  694. while (iter.next()) {
  695. const UnicodeString &s = iter.getString();
  696. U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
  697. UChar c = s.charAt(s.length() - 1);
  698. if (0x41 <= c && c <= 0x5A) { // A-Z
  699. // There are Pinyin labels, add ASCII A-Z labels as well.
  700. initialLabels_->add(0x41, 0x5A); // A-Z
  701. break;
  702. }
  703. }
  704. return TRUE;
  705. }
  706. /*
  707. * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
  708. */
  709. static const UChar CGJ = 0x034F;
  710. UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
  711. UnicodeString result;
  712. if (item.length() == 0) {
  713. return result;
  714. }
  715. int32_t i = 0;
  716. for (;;) {
  717. UChar32 cp = item.char32At(i);
  718. result.append(cp);
  719. i = item.moveIndex32(i, 1);
  720. if (i >= item.length()) {
  721. break;
  722. }
  723. result.append(CGJ);
  724. }
  725. return result;
  726. }
  727. UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
  728. return FALSE;
  729. }
  730. UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
  731. return FALSE;
  732. }
  733. const RuleBasedCollator &AlphabeticIndex::getCollator() const {
  734. return *collator_;
  735. }
  736. const UnicodeString &AlphabeticIndex::getInflowLabel() const {
  737. return inflowLabel_;
  738. }
  739. const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
  740. return overflowLabel_;
  741. }
  742. const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
  743. return underflowLabel_;
  744. }
  745. AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
  746. inflowLabel_ = label;
  747. clearBuckets();
  748. return *this;
  749. }
  750. AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
  751. overflowLabel_ = label;
  752. clearBuckets();
  753. return *this;
  754. }
  755. AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
  756. underflowLabel_ = label;
  757. clearBuckets();
  758. return *this;
  759. }
  760. int32_t AlphabeticIndex::getMaxLabelCount() const {
  761. return maxLabelCount_;
  762. }
  763. AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
  764. if (U_FAILURE(status)) {
  765. return *this;
  766. }
  767. if (maxLabelCount <= 0) {
  768. status = U_ILLEGAL_ARGUMENT_ERROR;
  769. return *this;
  770. }
  771. maxLabelCount_ = maxLabelCount;
  772. clearBuckets();
  773. return *this;
  774. }
  775. //
  776. // init() - Common code for constructors.
  777. //
  778. void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
  779. if (U_FAILURE(status)) { return; }
  780. if (locale == NULL && collator_ == NULL) {
  781. status = U_ILLEGAL_ARGUMENT_ERROR;
  782. return;
  783. }
  784. initialLabels_ = new UnicodeSet();
  785. if (initialLabels_ == NULL) {
  786. status = U_MEMORY_ALLOCATION_ERROR;
  787. return;
  788. }
  789. inflowLabel_.setTo((UChar)0x2026); // Ellipsis
  790. overflowLabel_ = inflowLabel_;
  791. underflowLabel_ = inflowLabel_;
  792. if (collator_ == NULL) {
  793. Collator *coll = Collator::createInstance(*locale, status);
  794. if (U_FAILURE(status)) {
  795. delete coll;
  796. return;
  797. }
  798. if (coll == NULL) {
  799. status = U_MEMORY_ALLOCATION_ERROR;
  800. return;
  801. }
  802. collator_ = dynamic_cast<RuleBasedCollator *>(coll);
  803. if (collator_ == NULL) {
  804. delete coll;
  805. status = U_UNSUPPORTED_ERROR;
  806. return;
  807. }
  808. }
  809. collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
  810. if (collatorPrimaryOnly_ == NULL) {
  811. status = U_MEMORY_ALLOCATION_ERROR;
  812. return;
  813. }
  814. collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
  815. firstCharsInScripts_ = firstStringsInScript(status);
  816. if (U_FAILURE(status)) { return; }
  817. firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
  818. // Guard against a degenerate collator where
  819. // some script boundary strings are primary ignorable.
  820. for (;;) {
  821. if (U_FAILURE(status)) { return; }
  822. if (firstCharsInScripts_->isEmpty()) {
  823. // AlphabeticIndex requires some non-ignorable script boundary strings.
  824. status = U_ILLEGAL_ARGUMENT_ERROR;
  825. return;
  826. }
  827. if (collatorPrimaryOnly_->compare(
  828. *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
  829. emptyString_, status) == UCOL_EQUAL) {
  830. firstCharsInScripts_->removeElementAt(0);
  831. } else {
  832. break;
  833. }
  834. }
  835. // Chinese index characters, which are specific to each of the several Chinese tailorings,
  836. // take precedence over the single locale data exemplar set per language.
  837. if (!addChineseIndexCharacters(status) && locale != NULL) {
  838. addIndexExemplars(*locale, status);
  839. }
  840. }
  841. //
  842. // Comparison function for UVector<UnicodeString *> sorting with a collator.
  843. //
  844. static int32_t U_CALLCONV
  845. collatorComparator(const void *context, const void *left, const void *right) {
  846. const UElement *leftElement = static_cast<const UElement *>(left);
  847. const UElement *rightElement = static_cast<const UElement *>(right);
  848. const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer);
  849. const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
  850. if (leftString == rightString) {
  851. // Catches case where both are NULL
  852. return 0;
  853. }
  854. if (leftString == NULL) {
  855. return 1;
  856. };
  857. if (rightString == NULL) {
  858. return -1;
  859. }
  860. const Collator *col = static_cast<const Collator *>(context);
  861. UErrorCode errorCode = U_ZERO_ERROR;
  862. return col->compare(*leftString, *rightString, errorCode);
  863. }
  864. //
  865. // Comparison function for UVector<Record *> sorting with a collator.
  866. //
  867. static int32_t U_CALLCONV
  868. recordCompareFn(const void *context, const void *left, const void *right) {
  869. const UElement *leftElement = static_cast<const UElement *>(left);
  870. const UElement *rightElement = static_cast<const UElement *>(right);
  871. const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
  872. const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
  873. const Collator *col = static_cast<const Collator *>(context);
  874. UErrorCode errorCode = U_ZERO_ERROR;
  875. return col->compare(leftRec->name_, rightRec->name_, errorCode);
  876. }
  877. UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
  878. if (U_FAILURE(status)) {
  879. return NULL;
  880. }
  881. LocalPointer<UVector> dest(new UVector(status), status);
  882. if (U_FAILURE(status)) {
  883. return NULL;
  884. }
  885. dest->setDeleter(uprv_deleteUObject);
  886. // Fetch the script-first-primary contractions which are defined in the root collator.
  887. // They all start with U+FDD1.
  888. UnicodeSet set;
  889. collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
  890. if (U_FAILURE(status)) {
  891. return NULL;
  892. }
  893. if (set.isEmpty()) {
  894. status = U_UNSUPPORTED_ERROR;
  895. return NULL;
  896. }
  897. UnicodeSetIterator iter(set);
  898. while (iter.next()) {
  899. const UnicodeString &boundary = iter.getString();
  900. uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
  901. if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
  902. // Ignore boundaries for the special reordering groups.
  903. // Take only those for "real scripts" (where the sample character is a Letter,
  904. // and the one for unassigned implicit weights (Cn).
  905. continue;
  906. }
  907. UnicodeString *s = new UnicodeString(boundary);
  908. if (s == NULL) {
  909. status = U_MEMORY_ALLOCATION_ERROR;
  910. return NULL;
  911. }
  912. dest->addElement(s, status);
  913. }
  914. return dest.orphan();
  915. }
  916. namespace {
  917. /**
  918. * Returns true if one index character string is "better" than the other.
  919. * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
  920. * better, and otherwise binary-less-than is better.
  921. */
  922. UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
  923. const UnicodeString &one, const UnicodeString &other) {
  924. // This is called with primary-equal strings, but never with one.equals(other).
  925. UErrorCode status = U_ZERO_ERROR;
  926. UnicodeString n1 = nfkdNormalizer.normalize(one, status);
  927. UnicodeString n2 = nfkdNormalizer.normalize(other, status);
  928. if (U_FAILURE(status)) { return FALSE; }
  929. int32_t result = n1.countChar32() - n2.countChar32();
  930. if (result != 0) {
  931. return result < 0;
  932. }
  933. result = n1.compareCodePointOrder(n2);
  934. if (result != 0) {
  935. return result < 0;
  936. }
  937. return one.compareCodePointOrder(other) < 0;
  938. }
  939. } // namespace
  940. //
  941. // Constructor & Destructor for AlphabeticIndex::Record
  942. //
  943. // Records are internal only, instances are not directly surfaced in the public API.
  944. // This class is mostly struct-like, with all public fields.
  945. AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
  946. : name_(name), data_(data) {}
  947. AlphabeticIndex::Record::~Record() {
  948. }
  949. AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
  950. if (U_FAILURE(status)) {
  951. return *this;
  952. }
  953. if (inputList_ == NULL) {
  954. inputList_ = new UVector(status);
  955. if (inputList_ == NULL) {
  956. status = U_MEMORY_ALLOCATION_ERROR;
  957. return *this;
  958. }
  959. inputList_->setDeleter(alphaIndex_deleteRecord);
  960. }
  961. Record *r = new Record(name, data);
  962. if (r == NULL) {
  963. status = U_MEMORY_ALLOCATION_ERROR;
  964. return *this;
  965. }
  966. inputList_->addElement(r, status);
  967. clearBuckets();
  968. //std::string ss;
  969. //std::string ss2;
  970. //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
  971. // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
  972. return *this;
  973. }
  974. AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
  975. if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
  976. inputList_->removeAllElements();
  977. clearBuckets();
  978. }
  979. return *this;
  980. }
  981. int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
  982. initBuckets(status);
  983. if (U_FAILURE(status)) {
  984. return 0;
  985. }
  986. return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
  987. }
  988. int32_t AlphabeticIndex::getBucketIndex() const {
  989. return labelsIterIndex_;
  990. }
  991. UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
  992. if (U_FAILURE(status)) {
  993. return FALSE;
  994. }
  995. if (buckets_ == NULL && currentBucket_ != NULL) {
  996. status = U_ENUM_OUT_OF_SYNC_ERROR;
  997. return FALSE;
  998. }
  999. initBuckets(status);
  1000. if (U_FAILURE(status)) {
  1001. return FALSE;
  1002. }
  1003. ++labelsIterIndex_;
  1004. if (labelsIterIndex_ >= buckets_->getBucketCount()) {
  1005. labelsIterIndex_ = buckets_->getBucketCount();
  1006. return FALSE;
  1007. }
  1008. currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
  1009. resetRecordIterator();
  1010. return TRUE;
  1011. }
  1012. const UnicodeString &AlphabeticIndex::getBucketLabel() const {
  1013. if (currentBucket_ != NULL) {
  1014. return currentBucket_->label_;
  1015. } else {
  1016. return emptyString_;
  1017. }
  1018. }
  1019. UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
  1020. if (currentBucket_ != NULL) {
  1021. return currentBucket_->labelType_;
  1022. } else {
  1023. return U_ALPHAINDEX_NORMAL;
  1024. }
  1025. }
  1026. int32_t AlphabeticIndex::getBucketRecordCount() const {
  1027. if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
  1028. return currentBucket_->records_->size();
  1029. } else {
  1030. return 0;
  1031. }
  1032. }
  1033. AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
  1034. if (U_FAILURE(status)) {
  1035. return *this;
  1036. }
  1037. internalResetBucketIterator();
  1038. return *this;
  1039. }
  1040. UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
  1041. if (U_FAILURE(status)) {
  1042. return FALSE;
  1043. }
  1044. if (currentBucket_ == NULL) {
  1045. // We are trying to iterate over the items in a bucket, but there is no
  1046. // current bucket from the enumeration of buckets.
  1047. status = U_INVALID_STATE_ERROR;
  1048. return FALSE;
  1049. }
  1050. if (buckets_ == NULL) {
  1051. status = U_ENUM_OUT_OF_SYNC_ERROR;
  1052. return FALSE;
  1053. }
  1054. if (currentBucket_->records_ == NULL) {
  1055. return FALSE;
  1056. }
  1057. ++itemsIterIndex_;
  1058. if (itemsIterIndex_ >= currentBucket_->records_->size()) {
  1059. itemsIterIndex_ = currentBucket_->records_->size();
  1060. return FALSE;
  1061. }
  1062. return TRUE;
  1063. }
  1064. const UnicodeString &AlphabeticIndex::getRecordName() const {
  1065. const UnicodeString *retStr = &emptyString_;
  1066. if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
  1067. itemsIterIndex_ >= 0 &&
  1068. itemsIterIndex_ < currentBucket_->records_->size()) {
  1069. Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
  1070. retStr = &item->name_;
  1071. }
  1072. return *retStr;
  1073. }
  1074. const void *AlphabeticIndex::getRecordData() const {
  1075. const void *retPtr = NULL;
  1076. if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
  1077. itemsIterIndex_ >= 0 &&
  1078. itemsIterIndex_ < currentBucket_->records_->size()) {
  1079. Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
  1080. retPtr = item->data_;
  1081. }
  1082. return retPtr;
  1083. }
  1084. AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
  1085. itemsIterIndex_ = -1;
  1086. return *this;
  1087. }
  1088. AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
  1089. const UnicodeString &lowerBoundary,
  1090. UAlphabeticIndexLabelType type)
  1091. : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
  1092. displayBucket_(NULL), displayIndex_(-1),
  1093. records_(NULL) {
  1094. }
  1095. AlphabeticIndex::Bucket::~Bucket() {
  1096. delete records_;
  1097. }
  1098. U_NAMESPACE_END
  1099. #endif // !UCONFIG_NO_COLLATION