uspoof.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ***************************************************************************
  5. * Copyright (C) 2008-2015, International Business Machines Corporation
  6. * and others. All Rights Reserved.
  7. ***************************************************************************
  8. * file name: uspoof.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2008Feb13
  14. * created by: Andy Heninger
  15. *
  16. * Unicode Spoof Detection
  17. */
  18. #include "unicode/ubidi.h"
  19. #include "unicode/utypes.h"
  20. #include "unicode/normalizer2.h"
  21. #include "unicode/uspoof.h"
  22. #include "unicode/ustring.h"
  23. #include "unicode/utf16.h"
  24. #include "cmemory.h"
  25. #include "cstring.h"
  26. #include "mutex.h"
  27. #include "scriptset.h"
  28. #include "uassert.h"
  29. #include "ucln_in.h"
  30. #include "uspoof_impl.h"
  31. #include "umutex.h"
  32. #if !UCONFIG_NO_NORMALIZATION
  33. U_NAMESPACE_USE
  34. //
  35. // Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
  36. //
  37. static UnicodeSet *gInclusionSet = nullptr;
  38. static UnicodeSet *gRecommendedSet = nullptr;
  39. static const Normalizer2 *gNfdNormalizer = nullptr;
  40. static UInitOnce gSpoofInitStaticsOnce {};
  41. namespace {
  42. UBool U_CALLCONV
  43. uspoof_cleanup() {
  44. delete gInclusionSet;
  45. gInclusionSet = nullptr;
  46. delete gRecommendedSet;
  47. gRecommendedSet = nullptr;
  48. gNfdNormalizer = nullptr;
  49. gSpoofInitStaticsOnce.reset();
  50. return true;
  51. }
  52. void U_CALLCONV initializeStatics(UErrorCode &status) {
  53. gInclusionSet = new UnicodeSet();
  54. gRecommendedSet = new UnicodeSet();
  55. if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
  56. status = U_MEMORY_ALLOCATION_ERROR;
  57. delete gInclusionSet;
  58. gInclusionSet = nullptr;
  59. delete gRecommendedSet;
  60. gRecommendedSet = nullptr;
  61. return;
  62. }
  63. gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
  64. gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
  65. if (U_FAILURE(status)) {
  66. delete gInclusionSet;
  67. gInclusionSet = nullptr;
  68. delete gRecommendedSet;
  69. gRecommendedSet = nullptr;
  70. return;
  71. }
  72. gInclusionSet->freeze();
  73. gRecommendedSet->freeze();
  74. gNfdNormalizer = Normalizer2::getNFDInstance(status);
  75. ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
  76. }
  77. } // namespace
  78. U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
  79. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  80. }
  81. U_CAPI USpoofChecker * U_EXPORT2
  82. uspoof_open(UErrorCode *status) {
  83. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  84. if (U_FAILURE(*status)) {
  85. return nullptr;
  86. }
  87. SpoofImpl *si = new SpoofImpl(*status);
  88. if (si == nullptr) {
  89. *status = U_MEMORY_ALLOCATION_ERROR;
  90. return nullptr;
  91. }
  92. if (U_FAILURE(*status)) {
  93. delete si;
  94. return nullptr;
  95. }
  96. return si->asUSpoofChecker();
  97. }
  98. U_CAPI USpoofChecker * U_EXPORT2
  99. uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
  100. UErrorCode *status) {
  101. if (U_FAILURE(*status)) {
  102. return nullptr;
  103. }
  104. if (data == nullptr) {
  105. *status = U_ILLEGAL_ARGUMENT_ERROR;
  106. return nullptr;
  107. }
  108. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  109. if (U_FAILURE(*status))
  110. {
  111. return nullptr;
  112. }
  113. SpoofData *sd = new SpoofData(data, length, *status);
  114. if (sd == nullptr) {
  115. *status = U_MEMORY_ALLOCATION_ERROR;
  116. return nullptr;
  117. }
  118. if (U_FAILURE(*status)) {
  119. delete sd;
  120. return nullptr;
  121. }
  122. SpoofImpl *si = new SpoofImpl(sd, *status);
  123. if (si == nullptr) {
  124. *status = U_MEMORY_ALLOCATION_ERROR;
  125. delete sd; // explicit delete as the destructor for si won't be called.
  126. return nullptr;
  127. }
  128. if (U_FAILURE(*status)) {
  129. delete si; // no delete for sd, as the si destructor will delete it.
  130. return nullptr;
  131. }
  132. if (pActualLength != nullptr) {
  133. *pActualLength = sd->size();
  134. }
  135. return si->asUSpoofChecker();
  136. }
  137. U_CAPI USpoofChecker * U_EXPORT2
  138. uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
  139. const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
  140. if (src == nullptr) {
  141. return nullptr;
  142. }
  143. SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor
  144. if (result == nullptr) {
  145. *status = U_MEMORY_ALLOCATION_ERROR;
  146. return nullptr;
  147. }
  148. if (U_FAILURE(*status)) {
  149. delete result;
  150. result = nullptr;
  151. }
  152. return result->asUSpoofChecker();
  153. }
  154. U_CAPI void U_EXPORT2
  155. uspoof_close(USpoofChecker *sc) {
  156. UErrorCode status = U_ZERO_ERROR;
  157. SpoofImpl *This = SpoofImpl::validateThis(sc, status);
  158. delete This;
  159. }
  160. U_CAPI void U_EXPORT2
  161. uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
  162. SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  163. if (This == nullptr) {
  164. return;
  165. }
  166. // Verify that the requested checks are all ones (bits) that
  167. // are acceptable, known values.
  168. if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
  169. *status = U_ILLEGAL_ARGUMENT_ERROR;
  170. return;
  171. }
  172. This->fChecks = checks;
  173. }
  174. U_CAPI int32_t U_EXPORT2
  175. uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
  176. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  177. if (This == nullptr) {
  178. return 0;
  179. }
  180. return This->fChecks;
  181. }
  182. U_CAPI void U_EXPORT2
  183. uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
  184. UErrorCode status = U_ZERO_ERROR;
  185. SpoofImpl *This = SpoofImpl::validateThis(sc, status);
  186. if (This != nullptr) {
  187. This->fRestrictionLevel = restrictionLevel;
  188. This->fChecks |= USPOOF_RESTRICTION_LEVEL;
  189. }
  190. }
  191. U_CAPI URestrictionLevel U_EXPORT2
  192. uspoof_getRestrictionLevel(const USpoofChecker *sc) {
  193. UErrorCode status = U_ZERO_ERROR;
  194. const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
  195. if (This == nullptr) {
  196. return USPOOF_UNRESTRICTIVE;
  197. }
  198. return This->fRestrictionLevel;
  199. }
  200. U_CAPI void U_EXPORT2
  201. uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
  202. SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  203. if (This == nullptr) {
  204. return;
  205. }
  206. This->setAllowedLocales(localesList, *status);
  207. }
  208. U_CAPI const char * U_EXPORT2
  209. uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
  210. SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  211. if (This == nullptr) {
  212. return nullptr;
  213. }
  214. return This->getAllowedLocales(*status);
  215. }
  216. U_CAPI const USet * U_EXPORT2
  217. uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
  218. const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
  219. return result->toUSet();
  220. }
  221. U_CAPI const UnicodeSet * U_EXPORT2
  222. uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
  223. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  224. if (This == nullptr) {
  225. return nullptr;
  226. }
  227. return This->fAllowedCharsSet;
  228. }
  229. U_CAPI void U_EXPORT2
  230. uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
  231. const UnicodeSet *set = UnicodeSet::fromUSet(chars);
  232. uspoof_setAllowedUnicodeSet(sc, set, status);
  233. }
  234. U_CAPI void U_EXPORT2
  235. uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
  236. SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  237. if (This == nullptr) {
  238. return;
  239. }
  240. if (chars->isBogus()) {
  241. *status = U_ILLEGAL_ARGUMENT_ERROR;
  242. return;
  243. }
  244. UnicodeSet *clonedSet = chars->clone();
  245. if (clonedSet == nullptr || clonedSet->isBogus()) {
  246. *status = U_MEMORY_ALLOCATION_ERROR;
  247. return;
  248. }
  249. clonedSet->freeze();
  250. delete This->fAllowedCharsSet;
  251. This->fAllowedCharsSet = clonedSet;
  252. This->fChecks |= USPOOF_CHAR_LIMIT;
  253. }
  254. U_CAPI int32_t U_EXPORT2
  255. uspoof_check(const USpoofChecker *sc,
  256. const char16_t *id, int32_t length,
  257. int32_t *position,
  258. UErrorCode *status) {
  259. // Backwards compatibility:
  260. if (position != nullptr) {
  261. *position = 0;
  262. }
  263. // Delegate to uspoof_check2
  264. return uspoof_check2(sc, id, length, nullptr, status);
  265. }
  266. U_CAPI int32_t U_EXPORT2
  267. uspoof_check2(const USpoofChecker *sc,
  268. const char16_t* id, int32_t length,
  269. USpoofCheckResult* checkResult,
  270. UErrorCode *status) {
  271. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  272. if (This == nullptr) {
  273. return 0;
  274. }
  275. if (length < -1) {
  276. *status = U_ILLEGAL_ARGUMENT_ERROR;
  277. return 0;
  278. }
  279. UnicodeString idStr((length == -1), id, length); // Aliasing constructor.
  280. int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
  281. return result;
  282. }
  283. U_CAPI int32_t U_EXPORT2
  284. uspoof_checkUTF8(const USpoofChecker *sc,
  285. const char *id, int32_t length,
  286. int32_t *position,
  287. UErrorCode *status) {
  288. // Backwards compatibility:
  289. if (position != nullptr) {
  290. *position = 0;
  291. }
  292. // Delegate to uspoof_check2
  293. return uspoof_check2UTF8(sc, id, length, nullptr, status);
  294. }
  295. U_CAPI int32_t U_EXPORT2
  296. uspoof_check2UTF8(const USpoofChecker *sc,
  297. const char *id, int32_t length,
  298. USpoofCheckResult* checkResult,
  299. UErrorCode *status) {
  300. if (U_FAILURE(*status)) {
  301. return 0;
  302. }
  303. UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
  304. int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
  305. return result;
  306. }
  307. U_CAPI int32_t U_EXPORT2
  308. uspoof_areConfusable(const USpoofChecker *sc,
  309. const char16_t *id1, int32_t length1,
  310. const char16_t *id2, int32_t length2,
  311. UErrorCode *status) {
  312. SpoofImpl::validateThis(sc, *status);
  313. if (U_FAILURE(*status)) {
  314. return 0;
  315. }
  316. if (length1 < -1 || length2 < -1) {
  317. *status = U_ILLEGAL_ARGUMENT_ERROR;
  318. return 0;
  319. }
  320. UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor
  321. UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor
  322. return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
  323. }
  324. U_CAPI int32_t U_EXPORT2
  325. uspoof_areConfusableUTF8(const USpoofChecker *sc,
  326. const char *id1, int32_t length1,
  327. const char *id2, int32_t length2,
  328. UErrorCode *status) {
  329. SpoofImpl::validateThis(sc, *status);
  330. if (U_FAILURE(*status)) {
  331. return 0;
  332. }
  333. if (length1 < -1 || length2 < -1) {
  334. *status = U_ILLEGAL_ARGUMENT_ERROR;
  335. return 0;
  336. }
  337. UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1))));
  338. UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2))));
  339. int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
  340. return results;
  341. }
  342. U_CAPI int32_t U_EXPORT2
  343. uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
  344. const icu::UnicodeString &id1,
  345. const icu::UnicodeString &id2,
  346. UErrorCode *status) {
  347. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  348. if (U_FAILURE(*status)) {
  349. return 0;
  350. }
  351. //
  352. // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
  353. // and for definitions of the types (single, whole, mixed-script) of confusables.
  354. // We only care about a few of the check flags. Ignore the others.
  355. // If no tests relevant to this function have been specified, return an error.
  356. // TODO: is this really the right thing to do? It's probably an error on the caller's part,
  357. // but logically we would just return 0 (no error).
  358. if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
  359. *status = U_INVALID_STATE_ERROR;
  360. return 0;
  361. }
  362. // Compute the skeletons and check for confusability.
  363. UnicodeString id1Skeleton;
  364. uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
  365. UnicodeString id2Skeleton;
  366. uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
  367. if (U_FAILURE(*status)) { return 0; }
  368. if (id1Skeleton != id2Skeleton) {
  369. return 0;
  370. }
  371. // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
  372. // of confusables according to UTS 39 section 4.
  373. // Start by computing the resolved script sets of id1 and id2.
  374. ScriptSet id1RSS;
  375. This->getResolvedScriptSet(id1, id1RSS, *status);
  376. ScriptSet id2RSS;
  377. This->getResolvedScriptSet(id2, id2RSS, *status);
  378. // Turn on all applicable flags
  379. int32_t result = 0;
  380. if (id1RSS.intersects(id2RSS)) {
  381. result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
  382. } else {
  383. result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
  384. if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
  385. result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
  386. }
  387. }
  388. // Turn off flags that the user doesn't want
  389. if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
  390. result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
  391. }
  392. if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
  393. result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
  394. }
  395. if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
  396. result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
  397. }
  398. return result;
  399. }
  400. U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
  401. const char16_t *id1, int32_t length1,
  402. const char16_t *id2, int32_t length2,
  403. UErrorCode *status) {
  404. UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
  405. UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
  406. if (id1Str.isBogus() || id2Str.isBogus()) {
  407. *status = U_ILLEGAL_ARGUMENT_ERROR;
  408. return 0;
  409. }
  410. return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
  411. }
  412. U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
  413. const char *id1, int32_t length1, const char *id2,
  414. int32_t length2, UErrorCode *status) {
  415. if (length1 < -1 || length2 < -1) {
  416. *status = U_ILLEGAL_ARGUMENT_ERROR;
  417. return 0;
  418. }
  419. UnicodeString id1Str = UnicodeString::fromUTF8(
  420. StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
  421. UnicodeString id2Str = UnicodeString::fromUTF8(
  422. StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
  423. return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
  424. }
  425. U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
  426. UBiDiDirection direction,
  427. const icu::UnicodeString &id1,
  428. const icu::UnicodeString &id2,
  429. UErrorCode *status) {
  430. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  431. if (U_FAILURE(*status)) {
  432. return 0;
  433. }
  434. //
  435. // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
  436. // and for definitions of the types (single, whole, mixed-script) of confusables.
  437. // We only care about a few of the check flags. Ignore the others.
  438. // If no tests relevant to this function have been specified, return an error.
  439. // TODO: is this really the right thing to do? It's probably an error on the caller's part,
  440. // but logically we would just return 0 (no error).
  441. if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
  442. *status = U_INVALID_STATE_ERROR;
  443. return 0;
  444. }
  445. // Compute the skeletons and check for confusability.
  446. UnicodeString id1Skeleton;
  447. uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
  448. UnicodeString id2Skeleton;
  449. uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
  450. if (U_FAILURE(*status)) {
  451. return 0;
  452. }
  453. if (id1Skeleton != id2Skeleton) {
  454. return 0;
  455. }
  456. // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate
  457. // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
  458. // of id1 and id2.
  459. ScriptSet id1RSS;
  460. This->getResolvedScriptSet(id1, id1RSS, *status);
  461. ScriptSet id2RSS;
  462. This->getResolvedScriptSet(id2, id2RSS, *status);
  463. // Turn on all applicable flags
  464. uint32_t result = 0;
  465. if (id1RSS.intersects(id2RSS)) {
  466. result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
  467. } else {
  468. result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
  469. if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
  470. result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
  471. }
  472. }
  473. // Turn off flags that the user doesn't want
  474. return result & This->fChecks;
  475. }
  476. U_CAPI int32_t U_EXPORT2
  477. uspoof_checkUnicodeString(const USpoofChecker *sc,
  478. const icu::UnicodeString &id,
  479. int32_t *position,
  480. UErrorCode *status) {
  481. // Backwards compatibility:
  482. if (position != nullptr) {
  483. *position = 0;
  484. }
  485. // Delegate to uspoof_check2
  486. return uspoof_check2UnicodeString(sc, id, nullptr, status);
  487. }
  488. namespace {
  489. int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
  490. U_ASSERT(This != nullptr);
  491. U_ASSERT(checkResult != nullptr);
  492. checkResult->clear();
  493. int32_t result = 0;
  494. if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
  495. URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
  496. if (idRestrictionLevel > This->fRestrictionLevel) {
  497. result |= USPOOF_RESTRICTION_LEVEL;
  498. }
  499. checkResult->fRestrictionLevel = idRestrictionLevel;
  500. }
  501. if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
  502. UnicodeSet numerics;
  503. This->getNumerics(id, numerics, *status);
  504. if (numerics.size() > 1) {
  505. result |= USPOOF_MIXED_NUMBERS;
  506. }
  507. checkResult->fNumerics = numerics; // UnicodeSet::operator=
  508. }
  509. if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
  510. int32_t index = This->findHiddenOverlay(id, *status);
  511. if (index != -1) {
  512. result |= USPOOF_HIDDEN_OVERLAY;
  513. }
  514. }
  515. if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
  516. int32_t i;
  517. UChar32 c;
  518. int32_t length = id.length();
  519. for (i=0; i<length ;) {
  520. c = id.char32At(i);
  521. i += U16_LENGTH(c);
  522. if (!This->fAllowedCharsSet->contains(c)) {
  523. result |= USPOOF_CHAR_LIMIT;
  524. break;
  525. }
  526. }
  527. }
  528. if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
  529. // This check needs to be done on NFD input
  530. UnicodeString nfdText;
  531. gNfdNormalizer->normalize(id, nfdText, *status);
  532. int32_t nfdLength = nfdText.length();
  533. // scan for more than one occurrence of the same non-spacing mark
  534. // in a sequence of non-spacing marks.
  535. int32_t i;
  536. UChar32 c;
  537. UChar32 firstNonspacingMark = 0;
  538. UBool haveMultipleMarks = false;
  539. UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
  540. for (i=0; i<nfdLength ;) {
  541. c = nfdText.char32At(i);
  542. i += U16_LENGTH(c);
  543. if (u_charType(c) != U_NON_SPACING_MARK) {
  544. firstNonspacingMark = 0;
  545. if (haveMultipleMarks) {
  546. marksSeenSoFar.clear();
  547. haveMultipleMarks = false;
  548. }
  549. continue;
  550. }
  551. if (firstNonspacingMark == 0) {
  552. firstNonspacingMark = c;
  553. continue;
  554. }
  555. if (!haveMultipleMarks) {
  556. marksSeenSoFar.add(firstNonspacingMark);
  557. haveMultipleMarks = true;
  558. }
  559. if (marksSeenSoFar.contains(c)) {
  560. // report the error, and stop scanning.
  561. // No need to find more than the first failure.
  562. result |= USPOOF_INVISIBLE;
  563. break;
  564. }
  565. marksSeenSoFar.add(c);
  566. }
  567. }
  568. checkResult->fChecks = result;
  569. return checkResult->toCombinedBitmask(This->fChecks);
  570. }
  571. } // namespace
  572. U_CAPI int32_t U_EXPORT2
  573. uspoof_check2UnicodeString(const USpoofChecker *sc,
  574. const icu::UnicodeString &id,
  575. USpoofCheckResult* checkResult,
  576. UErrorCode *status) {
  577. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  578. if (This == nullptr) {
  579. return false;
  580. }
  581. if (checkResult != nullptr) {
  582. CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
  583. if (ThisCheckResult == nullptr) {
  584. return false;
  585. }
  586. return checkImpl(This, id, ThisCheckResult, status);
  587. } else {
  588. // Stack-allocate the checkResult since this method doesn't return it
  589. CheckResult stackCheckResult;
  590. return checkImpl(This, id, &stackCheckResult, status);
  591. }
  592. }
  593. U_CAPI int32_t U_EXPORT2
  594. uspoof_getSkeleton(const USpoofChecker *sc,
  595. uint32_t type,
  596. const char16_t *id, int32_t length,
  597. char16_t *dest, int32_t destCapacity,
  598. UErrorCode *status) {
  599. SpoofImpl::validateThis(sc, *status);
  600. if (U_FAILURE(*status)) {
  601. return 0;
  602. }
  603. if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
  604. *status = U_ILLEGAL_ARGUMENT_ERROR;
  605. return 0;
  606. }
  607. UnicodeString idStr((length==-1), id, length); // Aliasing constructor
  608. UnicodeString destStr;
  609. uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
  610. destStr.extract(dest, destCapacity, *status);
  611. return destStr.length();
  612. }
  613. U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
  614. const UChar *id, int32_t length, UChar *dest,
  615. int32_t destCapacity, UErrorCode *status) {
  616. UnicodeString idStr((length == -1), id, length); // Aliasing constructor
  617. if (idStr.isBogus()) {
  618. *status = U_ILLEGAL_ARGUMENT_ERROR;
  619. return 0;
  620. }
  621. UnicodeString destStr;
  622. uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
  623. return destStr.extract(dest, destCapacity, *status);
  624. }
  625. U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
  626. UBiDiDirection direction,
  627. const UnicodeString &id,
  628. UnicodeString &dest,
  629. UErrorCode *status) {
  630. dest.remove();
  631. if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
  632. *status = U_ILLEGAL_ARGUMENT_ERROR;
  633. return dest;
  634. }
  635. UBiDi *bidi = ubidi_open();
  636. ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
  637. /*embeddingLevels*/ nullptr, status);
  638. if (U_FAILURE(*status)) {
  639. ubidi_close(bidi);
  640. return dest;
  641. }
  642. UnicodeString reordered;
  643. int32_t const size = ubidi_getProcessedLength(bidi);
  644. UChar* const reorderedBuffer = reordered.getBuffer(size);
  645. if (reorderedBuffer == nullptr) {
  646. *status = U_MEMORY_ALLOCATION_ERROR;
  647. ubidi_close(bidi);
  648. return dest;
  649. }
  650. ubidi_writeReordered(bidi, reorderedBuffer, size,
  651. UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
  652. reordered.releaseBuffer(size);
  653. ubidi_close(bidi);
  654. if (U_FAILURE(*status)) {
  655. return dest;
  656. }
  657. // The type parameter is deprecated since ICU 58; any number may be passed.
  658. constexpr uint32_t deprecatedType = 58;
  659. return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
  660. }
  661. U_I18N_API UnicodeString & U_EXPORT2
  662. uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
  663. uint32_t /*type*/,
  664. const UnicodeString &id,
  665. UnicodeString &dest,
  666. UErrorCode *status) {
  667. const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  668. if (U_FAILURE(*status)) {
  669. return dest;
  670. }
  671. UnicodeString nfdId;
  672. gNfdNormalizer->normalize(id, nfdId, *status);
  673. // Apply the skeleton mapping to the NFD normalized input string
  674. // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
  675. int32_t inputIndex = 0;
  676. UnicodeString skelStr;
  677. int32_t normalizedLen = nfdId.length();
  678. for (inputIndex=0; inputIndex < normalizedLen; ) {
  679. UChar32 c = nfdId.char32At(inputIndex);
  680. inputIndex += U16_LENGTH(c);
  681. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
  682. This->fSpoofData->confusableLookup(c, skelStr);
  683. }
  684. }
  685. gNfdNormalizer->normalize(skelStr, dest, *status);
  686. return dest;
  687. }
  688. U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
  689. int32_t length, char *dest, int32_t destCapacity,
  690. UErrorCode *status) {
  691. SpoofImpl::validateThis(sc, *status);
  692. if (U_FAILURE(*status)) {
  693. return 0;
  694. }
  695. if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
  696. *status = U_ILLEGAL_ARGUMENT_ERROR;
  697. return 0;
  698. }
  699. UnicodeString srcStr = UnicodeString::fromUTF8(
  700. StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
  701. UnicodeString destStr;
  702. uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
  703. if (U_FAILURE(*status)) {
  704. return 0;
  705. }
  706. int32_t lengthInUTF8 = 0;
  707. u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
  708. return lengthInUTF8;
  709. }
  710. U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
  711. const char *id, int32_t length, char *dest,
  712. int32_t destCapacity, UErrorCode *status) {
  713. if (length < -1) {
  714. *status = U_ILLEGAL_ARGUMENT_ERROR;
  715. return 0;
  716. }
  717. UnicodeString srcStr = UnicodeString::fromUTF8(
  718. StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
  719. UnicodeString destStr;
  720. uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
  721. if (U_FAILURE(*status)) {
  722. return 0;
  723. }
  724. int32_t lengthInUTF8 = 0;
  725. u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
  726. return lengthInUTF8;
  727. }
  728. U_CAPI int32_t U_EXPORT2
  729. uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
  730. SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
  731. if (This == nullptr) {
  732. U_ASSERT(U_FAILURE(*status));
  733. return 0;
  734. }
  735. return This->fSpoofData->serialize(buf, capacity, *status);
  736. }
  737. U_CAPI const USet * U_EXPORT2
  738. uspoof_getInclusionSet(UErrorCode *status) {
  739. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  740. return gInclusionSet->toUSet();
  741. }
  742. U_CAPI const USet * U_EXPORT2
  743. uspoof_getRecommendedSet(UErrorCode *status) {
  744. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  745. return gRecommendedSet->toUSet();
  746. }
  747. U_I18N_API const UnicodeSet * U_EXPORT2
  748. uspoof_getInclusionUnicodeSet(UErrorCode *status) {
  749. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  750. return gInclusionSet;
  751. }
  752. U_I18N_API const UnicodeSet * U_EXPORT2
  753. uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
  754. umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
  755. return gRecommendedSet;
  756. }
  757. //------------------
  758. // CheckResult APIs
  759. //------------------
  760. U_CAPI USpoofCheckResult* U_EXPORT2
  761. uspoof_openCheckResult(UErrorCode *status) {
  762. CheckResult* checkResult = new CheckResult();
  763. if (checkResult == nullptr) {
  764. *status = U_MEMORY_ALLOCATION_ERROR;
  765. return nullptr;
  766. }
  767. return checkResult->asUSpoofCheckResult();
  768. }
  769. U_CAPI void U_EXPORT2
  770. uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
  771. UErrorCode status = U_ZERO_ERROR;
  772. CheckResult* This = CheckResult::validateThis(checkResult, status);
  773. delete This;
  774. }
  775. U_CAPI int32_t U_EXPORT2
  776. uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
  777. const CheckResult* This = CheckResult::validateThis(checkResult, *status);
  778. if (U_FAILURE(*status)) { return 0; }
  779. return This->fChecks;
  780. }
  781. U_CAPI URestrictionLevel U_EXPORT2
  782. uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
  783. const CheckResult* This = CheckResult::validateThis(checkResult, *status);
  784. if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
  785. return This->fRestrictionLevel;
  786. }
  787. U_CAPI const USet* U_EXPORT2
  788. uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
  789. const CheckResult* This = CheckResult::validateThis(checkResult, *status);
  790. if (U_FAILURE(*status)) { return nullptr; }
  791. return This->fNumerics.toUSet();
  792. }
  793. #endif // !UCONFIG_NO_NORMALIZATION