affixpatternparser.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2015, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * affixpatternparser.h
  9. *
  10. * created on: 2015jan06
  11. * created by: Travis Keep
  12. */
  13. #ifndef __AFFIX_PATTERN_PARSER_H__
  14. #define __AFFIX_PATTERN_PARSER_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_FORMATTING
  17. #include "unicode/unistr.h"
  18. #include "unicode/uobject.h"
  19. #include "pluralaffix.h"
  20. U_NAMESPACE_BEGIN
  21. class PluralRules;
  22. class FixedPrecision;
  23. class DecimalFormatSymbols;
  24. /**
  25. * A representation of the various forms of a particular currency according
  26. * to some locale and usage context.
  27. *
  28. * Includes the symbol, ISO code form, and long form(s) of the currency name
  29. * for each plural variation.
  30. */
  31. class U_I18N_API CurrencyAffixInfo : public UMemory {
  32. public:
  33. /**
  34. * Symbol is \u00a4; ISO form is \u00a4\u00a4;
  35. * long form is \u00a4\u00a4\u00a4.
  36. */
  37. CurrencyAffixInfo();
  38. const UnicodeString &getSymbol() const { return fSymbol; }
  39. const UnicodeString &getISO() const { return fISO; }
  40. const PluralAffix &getLong() const { return fLong; }
  41. void setSymbol(const UnicodeString &symbol) {
  42. fSymbol = symbol;
  43. fIsDefault = FALSE;
  44. }
  45. void setISO(const UnicodeString &iso) {
  46. fISO = iso;
  47. fIsDefault = FALSE;
  48. }
  49. UBool
  50. equals(const CurrencyAffixInfo &other) const {
  51. return (fSymbol == other.fSymbol)
  52. && (fISO == other.fISO)
  53. && (fLong.equals(other.fLong))
  54. && (fIsDefault == other.fIsDefault);
  55. }
  56. /**
  57. * Intializes this instance.
  58. *
  59. * @param locale the locale for the currency forms.
  60. * @param rules The plural rules for the locale.
  61. * @param currency the null terminated, 3 character ISO code of the
  62. * currency. If NULL, resets this instance as if it were just created.
  63. * In this case, the first 2 parameters may be NULL as well.
  64. * @param status any error returned here.
  65. */
  66. void set(
  67. const char *locale, const PluralRules *rules,
  68. const UChar *currency, UErrorCode &status);
  69. /**
  70. * Returns true if this instance is the default. That is has no real
  71. * currency. For instance never initialized with set()
  72. * or reset with set(NULL, NULL, NULL, status).
  73. */
  74. UBool isDefault() const { return fIsDefault; }
  75. /**
  76. * Adjusts the precision used for a particular currency.
  77. * @param currency the null terminated, 3 character ISO code of the
  78. * currency.
  79. * @param usage the usage of the currency
  80. * @param precision min/max fraction digits and rounding increment
  81. * adjusted.
  82. * @params status any error reported here.
  83. */
  84. static void adjustPrecision(
  85. const UChar *currency, const UCurrencyUsage usage,
  86. FixedPrecision &precision, UErrorCode &status);
  87. private:
  88. /**
  89. * The symbol form of the currency.
  90. */
  91. UnicodeString fSymbol;
  92. /**
  93. * The ISO form of the currency, usually three letter abbreviation.
  94. */
  95. UnicodeString fISO;
  96. /**
  97. * The long forms of the currency keyed by plural variation.
  98. */
  99. PluralAffix fLong;
  100. UBool fIsDefault;
  101. };
  102. class AffixPatternIterator;
  103. /**
  104. * A locale agnostic representation of an affix pattern.
  105. */
  106. class U_I18N_API AffixPattern : public UMemory {
  107. public:
  108. /**
  109. * The token types that can appear in an affix pattern.
  110. */
  111. enum ETokenType {
  112. kLiteral,
  113. kPercent,
  114. kPerMill,
  115. kCurrency,
  116. kNegative,
  117. kPositive
  118. };
  119. /**
  120. * An empty affix pattern.
  121. */
  122. AffixPattern()
  123. : tokens(), literals(), hasCurrencyToken(FALSE),
  124. hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
  125. }
  126. /**
  127. * Adds a string literal to this affix pattern.
  128. */
  129. void addLiteral(const UChar *, int32_t start, int32_t len);
  130. /**
  131. * Adds a token to this affix pattern. t must not be kLiteral as
  132. * the addLiteral() method adds literals.
  133. * @param t the token type to add
  134. */
  135. void add(ETokenType t);
  136. /**
  137. * Adds a currency token with specific count to this affix pattern.
  138. * @param count the token count. Used to distinguish between
  139. * one, two, or three currency symbols. Note that adding a currency
  140. * token with count=2 (Use ISO code) is different than adding two
  141. * currency tokens each with count=1 (two currency symbols).
  142. */
  143. void addCurrency(uint8_t count);
  144. /**
  145. * Makes this instance be an empty affix pattern.
  146. */
  147. void remove();
  148. /**
  149. * Provides an iterator over the tokens in this instance.
  150. * @param result this is initialized to point just before the
  151. * first token of this instance. Caller must call nextToken()
  152. * on the iterator once it is set up to have it actually point
  153. * to the first token. This first call to nextToken() will return
  154. * FALSE if the AffixPattern being iterated over is empty.
  155. * @return result
  156. */
  157. AffixPatternIterator &iterator(AffixPatternIterator &result) const;
  158. /**
  159. * Returns TRUE if this instance has currency tokens in it.
  160. */
  161. UBool usesCurrency() const {
  162. return hasCurrencyToken;
  163. }
  164. UBool usesPercent() const {
  165. return hasPercentToken;
  166. }
  167. UBool usesPermill() const {
  168. return hasPermillToken;
  169. }
  170. /**
  171. * Returns the number of code points a string of this instance
  172. * would have if none of the special tokens were escaped.
  173. * Used to compute the padding size.
  174. */
  175. int32_t countChar32() const {
  176. return char32Count;
  177. }
  178. /**
  179. * Appends other to this instance mutating this instance in place.
  180. * @param other The pattern appended to the end of this one.
  181. * @return a reference to this instance for chaining.
  182. */
  183. AffixPattern &append(const AffixPattern &other);
  184. /**
  185. * Converts this AffixPattern back into a user string.
  186. * It is the inverse of parseUserAffixString.
  187. */
  188. UnicodeString &toUserString(UnicodeString &appendTo) const;
  189. /**
  190. * Converts this AffixPattern back into a string.
  191. * It is the inverse of parseAffixString.
  192. */
  193. UnicodeString &toString(UnicodeString &appendTo) const;
  194. /**
  195. * Parses an affix pattern string appending it to an AffixPattern.
  196. * Parses affix pattern strings produced from using
  197. * DecimalFormatPatternParser to parse a format pattern. Affix patterns
  198. * include the positive prefix and suffix and the negative prefix
  199. * and suffix. This method expects affix patterns strings to be in the
  200. * same format that DecimalFormatPatternParser produces. Namely special
  201. * characters in the affix that correspond to a field type must be
  202. * prefixed with an apostrophe ('). These special character sequences
  203. * inluce minus (-), percent (%), permile (U+2030), plus (+),
  204. * short currency (U+00a4), medium currency (u+00a4 * 2),
  205. * long currency (u+a4 * 3), and apostrophe (')
  206. * (apostrophe does not correspond to a field type but has to be escaped
  207. * because it itself is the escape character).
  208. * Since the expansion of these special character
  209. * sequences is locale dependent, these sequences are not expanded in
  210. * an AffixPattern instance.
  211. * If these special characters are not prefixed with an apostrophe in
  212. * the affix pattern string, then they are treated verbatim just as
  213. * any other character. If an apostrophe prefixes a non special
  214. * character in the affix pattern, the apostrophe is simply ignored.
  215. *
  216. * @param affixStr the string from DecimalFormatPatternParser
  217. * @param appendTo parsed result appended here.
  218. * @param status any error parsing returned here.
  219. */
  220. static AffixPattern &parseAffixString(
  221. const UnicodeString &affixStr,
  222. AffixPattern &appendTo,
  223. UErrorCode &status);
  224. /**
  225. * Parses an affix pattern string appending it to an AffixPattern.
  226. * Parses affix pattern strings as the user would supply them.
  227. * In this function, quoting makes special characters like normal
  228. * characters whereas in parseAffixString, quoting makes special
  229. * characters special.
  230. *
  231. * @param affixStr the string from the user
  232. * @param appendTo parsed result appended here.
  233. * @param status any error parsing returned here.
  234. */
  235. static AffixPattern &parseUserAffixString(
  236. const UnicodeString &affixStr,
  237. AffixPattern &appendTo,
  238. UErrorCode &status);
  239. UBool equals(const AffixPattern &other) const {
  240. return (tokens == other.tokens)
  241. && (literals == other.literals)
  242. && (hasCurrencyToken == other.hasCurrencyToken)
  243. && (hasPercentToken == other.hasPercentToken)
  244. && (hasPermillToken == other.hasPermillToken)
  245. && (char32Count == other.char32Count);
  246. }
  247. private:
  248. /*
  249. * Tokens stored here. Each UChar generally stands for one token. A
  250. * Each token is of form 'etttttttllllllll' llllllll is the length of
  251. * the token and ranges from 0-255. ttttttt is the token type and ranges
  252. * from 0-127. If e is set it means this is an extendo token (to be
  253. * described later). To accomodate token lengths above 255, each normal
  254. * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
  255. * the same type. Right now only kLiteral Tokens have extendo tokens.
  256. * Each extendo token provides the next 8 higher bits for the length.
  257. * If a kLiteral token is followed by 2 extendo tokens then, then the
  258. * llllllll of the next extendo token contains bits 8-15 of the length
  259. * and the last extendo token contains bits 16-23 of the length.
  260. */
  261. UnicodeString tokens;
  262. /*
  263. * The characters of the kLiteral tokens are concatenated together here.
  264. * The first characters go with the first kLiteral token, the next
  265. * characters go with the next kLiteral token etc.
  266. */
  267. UnicodeString literals;
  268. UBool hasCurrencyToken;
  269. UBool hasPercentToken;
  270. UBool hasPermillToken;
  271. int32_t char32Count;
  272. void add(ETokenType t, uint8_t count);
  273. };
  274. /**
  275. * An iterator over the tokens in an AffixPattern instance.
  276. */
  277. class U_I18N_API AffixPatternIterator : public UMemory {
  278. public:
  279. /**
  280. * Using an iterator without first calling iterator on an AffixPattern
  281. * instance to initialize the iterator results in
  282. * undefined behavior.
  283. */
  284. AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
  285. /**
  286. * Advances this iterator to the next token. Returns FALSE when there
  287. * are no more tokens. Calling the other methods after nextToken()
  288. * returns FALSE results in undefined behavior.
  289. */
  290. UBool nextToken();
  291. /**
  292. * Returns the type of token.
  293. */
  294. AffixPattern::ETokenType getTokenType() const;
  295. /**
  296. * For literal tokens, returns the literal string. Calling this for
  297. * other token types results in undefined behavior.
  298. * @param result replaced with a read-only alias to the literal string.
  299. * @return result
  300. */
  301. UnicodeString &getLiteral(UnicodeString &result) const;
  302. /**
  303. * Returns the token length. Usually 1, but for currency tokens may
  304. * be 2 for ISO code and 3 for long form.
  305. */
  306. int32_t getTokenLength() const;
  307. private:
  308. int32_t nextLiteralIndex;
  309. int32_t lastLiteralLength;
  310. int32_t nextTokenIndex;
  311. const UnicodeString *tokens;
  312. const UnicodeString *literals;
  313. friend class AffixPattern;
  314. AffixPatternIterator(const AffixPatternIterator &);
  315. AffixPatternIterator &operator=(const AffixPatternIterator &);
  316. };
  317. /**
  318. * A locale aware class that converts locale independent AffixPattern
  319. * instances into locale dependent PluralAffix instances.
  320. */
  321. class U_I18N_API AffixPatternParser : public UMemory {
  322. public:
  323. AffixPatternParser();
  324. AffixPatternParser(const DecimalFormatSymbols &symbols);
  325. void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
  326. /**
  327. * Parses affixPattern appending the result to appendTo.
  328. * @param affixPattern The affix pattern.
  329. * @param currencyAffixInfo contains the currency forms.
  330. * @param appendTo The result of parsing affixPattern is appended here.
  331. * @param status any error returned here.
  332. * @return appendTo.
  333. */
  334. PluralAffix &parse(
  335. const AffixPattern &affixPattern,
  336. const CurrencyAffixInfo &currencyAffixInfo,
  337. PluralAffix &appendTo,
  338. UErrorCode &status) const;
  339. UBool equals(const AffixPatternParser &other) const {
  340. return (fPercent == other.fPercent)
  341. && (fPermill == other.fPermill)
  342. && (fNegative == other.fNegative)
  343. && (fPositive == other.fPositive);
  344. }
  345. private:
  346. UnicodeString fPercent;
  347. UnicodeString fPermill;
  348. UnicodeString fNegative;
  349. UnicodeString fPositive;
  350. };
  351. U_NAMESPACE_END
  352. #endif /* #if !UCONFIG_NO_FORMATTING */
  353. #endif // __AFFIX_PATTERN_PARSER_H__