juce_CharacterFunctions.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /*
  2. ==============================================================================
  3. This file is part of the juce_core module of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission to use, copy, modify, and/or distribute this software for any purpose with
  6. or without fee is hereby granted, provided that the above copyright notice and this
  7. permission notice appear in all copies.
  8. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  9. TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
  10. NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  11. DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  12. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. ------------------------------------------------------------------------------
  15. NOTE! This permissive ISC license applies ONLY to files within the juce_core module!
  16. All other JUCE modules are covered by a dual GPL/commercial license, so if you are
  17. using any other modules, be sure to check that you also comply with their license.
  18. For more details, visit www.juce.com
  19. ==============================================================================
  20. */
  21. #ifndef JUCE_CHARACTERFUNCTIONS_H_INCLUDED
  22. #define JUCE_CHARACTERFUNCTIONS_H_INCLUDED
  23. //==============================================================================
  24. #if JUCE_WINDOWS && ! DOXYGEN
  25. #define JUCE_NATIVE_WCHAR_IS_UTF8 0
  26. #define JUCE_NATIVE_WCHAR_IS_UTF16 1
  27. #define JUCE_NATIVE_WCHAR_IS_UTF32 0
  28. #else
  29. /** This macro will be set to 1 if the compiler's native wchar_t is an 8-bit type. */
  30. #define JUCE_NATIVE_WCHAR_IS_UTF8 0
  31. /** This macro will be set to 1 if the compiler's native wchar_t is a 16-bit type. */
  32. #define JUCE_NATIVE_WCHAR_IS_UTF16 0
  33. /** This macro will be set to 1 if the compiler's native wchar_t is a 32-bit type. */
  34. #define JUCE_NATIVE_WCHAR_IS_UTF32 1
  35. #endif
  36. #if JUCE_NATIVE_WCHAR_IS_UTF32 || DOXYGEN
  37. /** A platform-independent 32-bit unicode character type. */
  38. typedef wchar_t juce_wchar;
  39. #else
  40. typedef uint32 juce_wchar;
  41. #endif
  42. #ifndef DOXYGEN
  43. /** This macro is deprecated, but preserved for compatibility with old code. */
  44. #define JUCE_T(stringLiteral) (L##stringLiteral)
  45. #endif
  46. #if JUCE_DEFINE_T_MACRO
  47. /** The 'T' macro is an alternative for using the "L" prefix in front of a string literal.
  48. This macro is deprecated, but available for compatibility with old code if you set
  49. JUCE_DEFINE_T_MACRO = 1. The fastest, most portable and best way to write your string
  50. literals is as standard char strings, using escaped utf-8 character sequences for extended
  51. characters, rather than trying to store them as wide-char strings.
  52. */
  53. #define T(stringLiteral) JUCE_T(stringLiteral)
  54. #endif
  55. //==============================================================================
  56. /**
  57. A collection of functions for manipulating characters and character strings.
  58. Most of these methods are designed for internal use by the String and CharPointer
  59. classes, but some of them may be useful to call directly.
  60. @see String, CharPointer_UTF8, CharPointer_UTF16, CharPointer_UTF32
  61. */
  62. class JUCE_API CharacterFunctions
  63. {
  64. public:
  65. //==============================================================================
  66. /** Converts a character to upper-case. */
  67. static juce_wchar toUpperCase (juce_wchar character) noexcept;
  68. /** Converts a character to lower-case. */
  69. static juce_wchar toLowerCase (juce_wchar character) noexcept;
  70. /** Checks whether a unicode character is upper-case. */
  71. static bool isUpperCase (juce_wchar character) noexcept;
  72. /** Checks whether a unicode character is lower-case. */
  73. static bool isLowerCase (juce_wchar character) noexcept;
  74. /** Checks whether a character is whitespace. */
  75. static bool isWhitespace (char character) noexcept;
  76. /** Checks whether a character is whitespace. */
  77. static bool isWhitespace (juce_wchar character) noexcept;
  78. /** Checks whether a character is a digit. */
  79. static bool isDigit (char character) noexcept;
  80. /** Checks whether a character is a digit. */
  81. static bool isDigit (juce_wchar character) noexcept;
  82. /** Checks whether a character is alphabetic. */
  83. static bool isLetter (char character) noexcept;
  84. /** Checks whether a character is alphabetic. */
  85. static bool isLetter (juce_wchar character) noexcept;
  86. /** Checks whether a character is alphabetic or numeric. */
  87. static bool isLetterOrDigit (char character) noexcept;
  88. /** Checks whether a character is alphabetic or numeric. */
  89. static bool isLetterOrDigit (juce_wchar character) noexcept;
  90. /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
  91. a punctuation character or a space.
  92. */
  93. static bool isPrintable (char character) noexcept;
  94. /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
  95. a punctuation character or a space.
  96. */
  97. static bool isPrintable (juce_wchar character) noexcept;
  98. /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */
  99. static int getHexDigitValue (juce_wchar digit) noexcept;
  100. /** Converts a byte of Windows 1252 codepage to unicode. */
  101. static juce_wchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept;
  102. //==============================================================================
  103. /** Parses a character string to read a floating-point number.
  104. Note that this will advance the pointer that is passed in, leaving it at
  105. the end of the number.
  106. */
  107. template <typename CharPointerType>
  108. static double readDoubleValue (CharPointerType& text) noexcept
  109. {
  110. double result[3] = { 0 }, accumulator[2] = { 0 };
  111. int exponentAdjustment[2] = { 0 }, exponentAccumulator[2] = { -1, -1 };
  112. int exponent = 0, decPointIndex = 0, digit = 0;
  113. int lastDigit = 0, numSignificantDigits = 0;
  114. bool isNegative = false, digitsFound = false;
  115. const int maxSignificantDigits = 15 + 2;
  116. text = text.findEndOfWhitespace();
  117. juce_wchar c = *text;
  118. switch (c)
  119. {
  120. case '-': isNegative = true; // fall-through..
  121. case '+': c = *++text;
  122. }
  123. switch (c)
  124. {
  125. case 'n':
  126. case 'N':
  127. if ((text[1] == 'a' || text[1] == 'A') && (text[2] == 'n' || text[2] == 'N'))
  128. return std::numeric_limits<double>::quiet_NaN();
  129. break;
  130. case 'i':
  131. case 'I':
  132. if ((text[1] == 'n' || text[1] == 'N') && (text[2] == 'f' || text[2] == 'F'))
  133. return std::numeric_limits<double>::infinity();
  134. break;
  135. }
  136. for (;;)
  137. {
  138. if (text.isDigit())
  139. {
  140. lastDigit = digit;
  141. digit = (int) text.getAndAdvance() - '0';
  142. digitsFound = true;
  143. if (decPointIndex != 0)
  144. exponentAdjustment[1]++;
  145. if (numSignificantDigits == 0 && digit == 0)
  146. continue;
  147. if (++numSignificantDigits > maxSignificantDigits)
  148. {
  149. if (digit > 5)
  150. ++accumulator [decPointIndex];
  151. else if (digit == 5 && (lastDigit & 1) != 0)
  152. ++accumulator [decPointIndex];
  153. if (decPointIndex > 0)
  154. exponentAdjustment[1]--;
  155. else
  156. exponentAdjustment[0]++;
  157. while (text.isDigit())
  158. {
  159. ++text;
  160. if (decPointIndex == 0)
  161. exponentAdjustment[0]++;
  162. }
  163. }
  164. else
  165. {
  166. const double maxAccumulatorValue = (double) ((std::numeric_limits<unsigned int>::max() - 9) / 10);
  167. if (accumulator [decPointIndex] > maxAccumulatorValue)
  168. {
  169. result [decPointIndex] = mulexp10 (result [decPointIndex], exponentAccumulator [decPointIndex])
  170. + accumulator [decPointIndex];
  171. accumulator [decPointIndex] = 0;
  172. exponentAccumulator [decPointIndex] = 0;
  173. }
  174. accumulator [decPointIndex] = accumulator[decPointIndex] * 10 + digit;
  175. exponentAccumulator [decPointIndex]++;
  176. }
  177. }
  178. else if (decPointIndex == 0 && *text == '.')
  179. {
  180. ++text;
  181. decPointIndex = 1;
  182. if (numSignificantDigits > maxSignificantDigits)
  183. {
  184. while (text.isDigit())
  185. ++text;
  186. break;
  187. }
  188. }
  189. else
  190. {
  191. break;
  192. }
  193. }
  194. result[0] = mulexp10 (result[0], exponentAccumulator[0]) + accumulator[0];
  195. if (decPointIndex != 0)
  196. result[1] = mulexp10 (result[1], exponentAccumulator[1]) + accumulator[1];
  197. c = *text;
  198. if ((c == 'e' || c == 'E') && digitsFound)
  199. {
  200. bool negativeExponent = false;
  201. switch (*++text)
  202. {
  203. case '-': negativeExponent = true; // fall-through..
  204. case '+': ++text;
  205. }
  206. while (text.isDigit())
  207. exponent = (exponent * 10) + ((int) text.getAndAdvance() - '0');
  208. if (negativeExponent)
  209. exponent = -exponent;
  210. }
  211. double r = mulexp10 (result[0], exponent + exponentAdjustment[0]);
  212. if (decPointIndex != 0)
  213. r += mulexp10 (result[1], exponent - exponentAdjustment[1]);
  214. return isNegative ? -r : r;
  215. }
  216. /** Parses a character string, to read a floating-point value. */
  217. template <typename CharPointerType>
  218. static double getDoubleValue (CharPointerType text) noexcept
  219. {
  220. return readDoubleValue (text);
  221. }
  222. //==============================================================================
  223. /** Parses a character string, to read an integer value. */
  224. template <typename IntType, typename CharPointerType>
  225. static IntType getIntValue (const CharPointerType text) noexcept
  226. {
  227. IntType v = 0;
  228. CharPointerType s (text.findEndOfWhitespace());
  229. const bool isNeg = *s == '-';
  230. if (isNeg)
  231. ++s;
  232. for (;;)
  233. {
  234. const juce_wchar c = s.getAndAdvance();
  235. if (c >= '0' && c <= '9')
  236. v = v * 10 + (IntType) (c - '0');
  237. else
  238. break;
  239. }
  240. return isNeg ? -v : v;
  241. }
  242. template <typename ResultType>
  243. struct HexParser
  244. {
  245. template <typename CharPointerType>
  246. static ResultType parse (CharPointerType t) noexcept
  247. {
  248. ResultType result = 0;
  249. while (! t.isEmpty())
  250. {
  251. const int hexValue = CharacterFunctions::getHexDigitValue (t.getAndAdvance());
  252. if (hexValue >= 0)
  253. result = (result << 4) | hexValue;
  254. }
  255. return result;
  256. }
  257. };
  258. //==============================================================================
  259. /** Counts the number of characters in a given string, stopping if the count exceeds
  260. a specified limit. */
  261. template <typename CharPointerType>
  262. static size_t lengthUpTo (CharPointerType text, const size_t maxCharsToCount) noexcept
  263. {
  264. size_t len = 0;
  265. while (len < maxCharsToCount && text.getAndAdvance() != 0)
  266. ++len;
  267. return len;
  268. }
  269. /** Counts the number of characters in a given string, stopping if the count exceeds
  270. a specified end-pointer. */
  271. template <typename CharPointerType>
  272. static size_t lengthUpTo (CharPointerType start, const CharPointerType end) noexcept
  273. {
  274. size_t len = 0;
  275. while (start < end && start.getAndAdvance() != 0)
  276. ++len;
  277. return len;
  278. }
  279. /** Copies null-terminated characters from one string to another. */
  280. template <typename DestCharPointerType, typename SrcCharPointerType>
  281. static void copyAll (DestCharPointerType& dest, SrcCharPointerType src) noexcept
  282. {
  283. while (juce_wchar c = src.getAndAdvance())
  284. dest.write (c);
  285. dest.writeNull();
  286. }
  287. /** Copies characters from one string to another, up to a null terminator
  288. or a given byte size limit. */
  289. template <typename DestCharPointerType, typename SrcCharPointerType>
  290. static size_t copyWithDestByteLimit (DestCharPointerType& dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
  291. {
  292. typename DestCharPointerType::CharType const* const startAddress = dest.getAddress();
  293. ssize_t maxBytes = (ssize_t) maxBytesToWrite;
  294. maxBytes -= sizeof (typename DestCharPointerType::CharType); // (allow for a terminating null)
  295. for (;;)
  296. {
  297. const juce_wchar c = src.getAndAdvance();
  298. const size_t bytesNeeded = DestCharPointerType::getBytesRequiredFor (c);
  299. maxBytes -= bytesNeeded;
  300. if (c == 0 || maxBytes < 0)
  301. break;
  302. dest.write (c);
  303. }
  304. dest.writeNull();
  305. return (size_t) getAddressDifference (dest.getAddress(), startAddress)
  306. + sizeof (typename DestCharPointerType::CharType);
  307. }
  308. /** Copies characters from one string to another, up to a null terminator
  309. or a given maximum number of characters. */
  310. template <typename DestCharPointerType, typename SrcCharPointerType>
  311. static void copyWithCharLimit (DestCharPointerType& dest, SrcCharPointerType src, int maxChars) noexcept
  312. {
  313. while (--maxChars > 0)
  314. {
  315. const juce_wchar c = src.getAndAdvance();
  316. if (c == 0)
  317. break;
  318. dest.write (c);
  319. }
  320. dest.writeNull();
  321. }
  322. /** Compares two characters. */
  323. static inline int compare (juce_wchar char1, juce_wchar char2) noexcept
  324. {
  325. if (int diff = static_cast<int> (char1) - static_cast<int> (char2))
  326. return diff < 0 ? -1 : 1;
  327. return 0;
  328. }
  329. /** Compares two null-terminated character strings. */
  330. template <typename CharPointerType1, typename CharPointerType2>
  331. static int compare (CharPointerType1 s1, CharPointerType2 s2) noexcept
  332. {
  333. for (;;)
  334. {
  335. const juce_wchar c1 = s1.getAndAdvance();
  336. if (int diff = compare (c1, s2.getAndAdvance()))
  337. return diff;
  338. if (c1 == 0)
  339. break;
  340. }
  341. return 0;
  342. }
  343. /** Compares two null-terminated character strings, up to a given number of characters. */
  344. template <typename CharPointerType1, typename CharPointerType2>
  345. static int compareUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
  346. {
  347. while (--maxChars >= 0)
  348. {
  349. const juce_wchar c1 = s1.getAndAdvance();
  350. if (int diff = compare (c1, s2.getAndAdvance()))
  351. return diff;
  352. if (c1 == 0)
  353. break;
  354. }
  355. return 0;
  356. }
  357. /** Compares two characters, using a case-independant match. */
  358. static inline int compareIgnoreCase (juce_wchar char1, juce_wchar char2) noexcept
  359. {
  360. return char1 != char2 ? compare (toUpperCase (char1), toUpperCase (char2)) : 0;
  361. }
  362. /** Compares two null-terminated character strings, using a case-independant match. */
  363. template <typename CharPointerType1, typename CharPointerType2>
  364. static int compareIgnoreCase (CharPointerType1 s1, CharPointerType2 s2) noexcept
  365. {
  366. for (;;)
  367. {
  368. const juce_wchar c1 = s1.getAndAdvance();
  369. if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
  370. return diff;
  371. if (c1 == 0)
  372. break;
  373. }
  374. return 0;
  375. }
  376. /** Compares two null-terminated character strings, using a case-independent match. */
  377. template <typename CharPointerType1, typename CharPointerType2>
  378. static int compareIgnoreCaseUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
  379. {
  380. while (--maxChars >= 0)
  381. {
  382. const juce_wchar c1 = s1.getAndAdvance();
  383. if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
  384. return diff;
  385. if (c1 == 0)
  386. break;
  387. }
  388. return 0;
  389. }
  390. /** Finds the character index of a given substring in another string.
  391. Returns -1 if the substring is not found.
  392. */
  393. template <typename CharPointerType1, typename CharPointerType2>
  394. static int indexOf (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
  395. {
  396. int index = 0;
  397. const int substringLength = (int) substringToLookFor.length();
  398. for (;;)
  399. {
  400. if (textToSearch.compareUpTo (substringToLookFor, substringLength) == 0)
  401. return index;
  402. if (textToSearch.getAndAdvance() == 0)
  403. return -1;
  404. ++index;
  405. }
  406. }
  407. /** Returns a pointer to the first occurrence of a substring in a string.
  408. If the substring is not found, this will return a pointer to the string's
  409. null terminator.
  410. */
  411. template <typename CharPointerType1, typename CharPointerType2>
  412. static CharPointerType1 find (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
  413. {
  414. const int substringLength = (int) substringToLookFor.length();
  415. while (textToSearch.compareUpTo (substringToLookFor, substringLength) != 0
  416. && ! textToSearch.isEmpty())
  417. ++textToSearch;
  418. return textToSearch;
  419. }
  420. /** Returns a pointer to the first occurrence of a substring in a string.
  421. If the substring is not found, this will return a pointer to the string's
  422. null terminator.
  423. */
  424. template <typename CharPointerType>
  425. static CharPointerType find (CharPointerType textToSearch, const juce_wchar charToLookFor) noexcept
  426. {
  427. for (;; ++textToSearch)
  428. {
  429. const juce_wchar c = *textToSearch;
  430. if (c == charToLookFor || c == 0)
  431. break;
  432. }
  433. return textToSearch;
  434. }
  435. /** Finds the character index of a given substring in another string, using
  436. a case-independent match.
  437. Returns -1 if the substring is not found.
  438. */
  439. template <typename CharPointerType1, typename CharPointerType2>
  440. static int indexOfIgnoreCase (CharPointerType1 haystack, const CharPointerType2 needle) noexcept
  441. {
  442. int index = 0;
  443. const int needleLength = (int) needle.length();
  444. for (;;)
  445. {
  446. if (haystack.compareIgnoreCaseUpTo (needle, needleLength) == 0)
  447. return index;
  448. if (haystack.getAndAdvance() == 0)
  449. return -1;
  450. ++index;
  451. }
  452. }
  453. /** Finds the character index of a given character in another string.
  454. Returns -1 if the character is not found.
  455. */
  456. template <typename Type>
  457. static int indexOfChar (Type text, const juce_wchar charToFind) noexcept
  458. {
  459. int i = 0;
  460. while (! text.isEmpty())
  461. {
  462. if (text.getAndAdvance() == charToFind)
  463. return i;
  464. ++i;
  465. }
  466. return -1;
  467. }
  468. /** Finds the character index of a given character in another string, using
  469. a case-independent match.
  470. Returns -1 if the character is not found.
  471. */
  472. template <typename Type>
  473. static int indexOfCharIgnoreCase (Type text, juce_wchar charToFind) noexcept
  474. {
  475. charToFind = CharacterFunctions::toLowerCase (charToFind);
  476. int i = 0;
  477. while (! text.isEmpty())
  478. {
  479. if (text.toLowerCase() == charToFind)
  480. return i;
  481. ++text;
  482. ++i;
  483. }
  484. return -1;
  485. }
  486. /** Returns a pointer to the first non-whitespace character in a string.
  487. If the string contains only whitespace, this will return a pointer
  488. to its null terminator.
  489. */
  490. template <typename Type>
  491. static Type findEndOfWhitespace (Type text) noexcept
  492. {
  493. while (text.isWhitespace())
  494. ++text;
  495. return text;
  496. }
  497. /** Returns a pointer to the first character in the string which is found in
  498. the breakCharacters string.
  499. */
  500. template <typename Type, typename BreakType>
  501. static Type findEndOfToken (Type text, const BreakType breakCharacters, const Type quoteCharacters)
  502. {
  503. juce_wchar currentQuoteChar = 0;
  504. while (! text.isEmpty())
  505. {
  506. const juce_wchar c = text.getAndAdvance();
  507. if (currentQuoteChar == 0 && breakCharacters.indexOf (c) >= 0)
  508. {
  509. --text;
  510. break;
  511. }
  512. if (quoteCharacters.indexOf (c) >= 0)
  513. {
  514. if (currentQuoteChar == 0)
  515. currentQuoteChar = c;
  516. else if (currentQuoteChar == c)
  517. currentQuoteChar = 0;
  518. }
  519. }
  520. return text;
  521. }
  522. private:
  523. static double mulexp10 (const double value, int exponent) noexcept;
  524. };
  525. #endif // JUCE_CHARACTERFUNCTIONS_H_INCLUDED