NUMstring.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /* NUMstring.cpp
  2. *
  3. * Copyright (C) 2012-2017 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20121005 First version
  20. */
  21. #include <ctype.h>
  22. #include <wctype.h>
  23. #include "Interpreter.h"
  24. #include "NUM2.h"
  25. autoVEC VEC_createFromString (conststring32 s) {
  26. autostring32vector tokens = STRVECtokenize (s);
  27. if (tokens.size < 1)
  28. Melder_throw (U"Empty string.");
  29. autoVEC numbers = VECraw (tokens.size);
  30. for (integer inum = 1; inum <= tokens.size; inum ++)
  31. Interpreter_numericExpression (0, tokens [inum].get(), & numbers [inum]);
  32. return numbers;
  33. }
  34. char32 *strstr_regexp (conststring32 string, conststring32 search_regexp) {
  35. char32 *charp = nullptr;
  36. regexp *compiled_regexp = CompileRE_throwable (search_regexp, 0);
  37. if (ExecRE (compiled_regexp, nullptr, string, nullptr, false, U'\0', U'\0', nullptr, nullptr)) {
  38. charp = compiled_regexp -> startp [0];
  39. }
  40. free (compiled_regexp);
  41. return charp;
  42. }
  43. static autostring32vector string32vector_searchAndReplace_literal (string32vector me,
  44. conststring32 search, conststring32 replace, int maximumNumberOfReplaces,
  45. integer *out_numberOfMatches, integer *out_numberOfStringMatches)
  46. {
  47. if (! search || ! replace)
  48. return autostring32vector();
  49. autostring32vector result (me.size);
  50. integer nmatches_sub = 0, nmatches = 0, nstringmatches = 0;
  51. for (integer i = 1; i <= me.size; i ++) {
  52. conststring32 string = ( me [i] ? me [i] : U"" ); // treat null as an empty string
  53. result [i] = STRreplace (string, search, replace, maximumNumberOfReplaces, & nmatches_sub);
  54. if (nmatches_sub > 0) {
  55. nmatches += nmatches_sub;
  56. nstringmatches ++;
  57. }
  58. }
  59. if (out_numberOfMatches)
  60. *out_numberOfMatches = nmatches;
  61. if (out_numberOfStringMatches)
  62. *out_numberOfStringMatches = nstringmatches;
  63. return result;
  64. }
  65. static autostring32vector string32vector_searchAndReplace_regexp (string32vector me,
  66. conststring32 searchRE, conststring32 replaceRE, int maximumNumberOfReplaces,
  67. integer *out_numberOfMatches, integer *out_numberOfStringMatches)
  68. {
  69. if (! searchRE || ! replaceRE)
  70. return autostring32vector();
  71. integer nmatches_sub = 0;
  72. regexp *compiledRE = CompileRE_throwable (searchRE, 0);
  73. autostring32vector result (me.size);
  74. integer nmatches = 0, nstringmatches = 0;
  75. for (integer i = 1; i <= me.size; i ++) {
  76. conststring32 string = ( me [i] ? me [i] : U"" ); // treat null as an empty string
  77. result [i] = STRreplace_regex (string, compiledRE, replaceRE, maximumNumberOfReplaces, & nmatches_sub);
  78. if (nmatches_sub > 0) {
  79. nmatches += nmatches_sub;
  80. nstringmatches ++;
  81. }
  82. }
  83. if (out_numberOfMatches)
  84. *out_numberOfMatches = nmatches;
  85. if (out_numberOfStringMatches)
  86. *out_numberOfStringMatches = nstringmatches;
  87. return result;
  88. }
  89. autostring32vector string32vector_searchAndReplace (string32vector me,
  90. conststring32 search, conststring32 replace, int maximumNumberOfReplaces,
  91. integer *nmatches, integer *nstringmatches, bool use_regexp)
  92. {
  93. return use_regexp ?
  94. string32vector_searchAndReplace_regexp (me, search, replace, maximumNumberOfReplaces, nmatches, nstringmatches) :
  95. string32vector_searchAndReplace_literal (me, search, replace, maximumNumberOfReplaces, nmatches, nstringmatches);
  96. }
  97. /*
  98. * Acceptable ranges e.g. "1 4 2 3:7 4:3 3:5:2" -->
  99. * 1, 4, 2, 3, 4, 5, 6, 7, 4, 3, 3, 4, 5, 4, 3, 2
  100. * Overlap is allowed. Ranges can go up and down.
  101. */
  102. static autoINTVEC getElementsOfRanges (conststring32 ranges, integer maximumElement, conststring32 elementType) {
  103. /*
  104. Count the elements.
  105. */
  106. integer previousElement = 0;
  107. integer numberOfElements = 0;
  108. const char32 *p = & ranges [0];
  109. for (;;) {
  110. while (Melder_isHorizontalSpace (*p)) p ++;
  111. if (*p == U'\0')
  112. break;
  113. if (Melder_isAsciiDecimalNumber (*p)) {
  114. integer currentElement = Melder_atoi (p);
  115. Melder_require (currentElement != 0,
  116. U"No such ", elementType, U": 0 (minimum is 1).");
  117. Melder_require (currentElement <= maximumElement,
  118. U"No such ", elementType, U": ", currentElement, U" (maximum is ", maximumElement, U").");
  119. numberOfElements += 1;
  120. previousElement = currentElement;
  121. do { p ++; } while (Melder_isAsciiDecimalNumber (*p));
  122. } else if (*p == ':') {
  123. Melder_require (previousElement != 0, U"The range should not start with a colon.");
  124. do { p ++; } while (Melder_isHorizontalSpace (*p));
  125. Melder_require (*p != U'\0',
  126. U"The range should not end with a colon.");
  127. Melder_require (Melder_isAsciiDecimalNumber (*p),
  128. U"End of range should be a positive whole number.");
  129. integer currentElement = Melder_atoi (p);
  130. Melder_require (currentElement != 0,
  131. U"No such ", elementType, U": 0 (minimum is 1).");
  132. Melder_require (currentElement <= maximumElement,
  133. U"No such ", elementType, U": ", currentElement, U" (maximum is ", maximumElement, U").");
  134. if (currentElement > previousElement) {
  135. numberOfElements += currentElement - previousElement;
  136. } else {
  137. numberOfElements += previousElement - currentElement;
  138. }
  139. previousElement = currentElement;
  140. do { p ++; } while (Melder_isAsciiDecimalNumber (*p));
  141. } else {
  142. Melder_throw (U"Start of range should be a positive whole number.");
  143. }
  144. }
  145. /*
  146. Create room for the elements.
  147. */
  148. if (numberOfElements == 0)
  149. Melder_throw (U"No element(s) found");
  150. autoINTVEC elements = INTVECraw (numberOfElements);
  151. /*
  152. Store the elements.
  153. */
  154. previousElement = 0;
  155. numberOfElements = 0;
  156. p = & ranges [0];
  157. for (;;) {
  158. while (Melder_isHorizontalSpace (*p)) p ++;
  159. if (*p == U'\0')
  160. break;
  161. if (Melder_isAsciiDecimalNumber (*p)) {
  162. integer currentElement = Melder_atoi (p);
  163. elements [++ numberOfElements] = currentElement;
  164. previousElement = currentElement;
  165. do { p ++; } while (Melder_isAsciiDecimalNumber (*p));
  166. } else if (*p == U':') {
  167. do { p ++; } while (Melder_isHorizontalSpace (*p));
  168. integer currentElement = Melder_atoi (p);
  169. if (currentElement > previousElement) {
  170. for (integer ielement = previousElement + 1; ielement <= currentElement; ielement ++)
  171. elements [++ numberOfElements] = ielement;
  172. } else {
  173. for (integer ielement = previousElement - 1; ielement >= currentElement; ielement --)
  174. elements [++ numberOfElements] = ielement;
  175. }
  176. previousElement = currentElement;
  177. do { p ++; } while (Melder_isAsciiDecimalNumber (*p));
  178. }
  179. }
  180. return elements;
  181. }
  182. static void NUMlvector_getUniqueNumbers (integer numbers[], integer *inout_numberOfElements) {
  183. Melder_assert (inout_numberOfElements);
  184. autoNUMvector< integer> sorted (NUMvector_copy <integer> (numbers, 1, *inout_numberOfElements), 1);
  185. NUMsort_integer (*inout_numberOfElements, sorted.peek());
  186. integer numberOfMultiples = 0;
  187. numbers [1] = sorted [1];
  188. integer numberOfUniques = 1;
  189. for (integer i = 2; i <= *inout_numberOfElements; i ++) {
  190. if (sorted [i] != sorted [i - 1]) {
  191. numbers [++ numberOfUniques] = sorted [i];
  192. } else {
  193. numberOfMultiples ++;
  194. }
  195. }
  196. *inout_numberOfElements = numberOfUniques;
  197. }
  198. autoINTVEC NUMstring_getElementsOfRanges (conststring32 ranges, integer maximumElement, conststring32 elementType, bool sortedUniques)
  199. {
  200. autoINTVEC elements = getElementsOfRanges (ranges, maximumElement, elementType);
  201. if (sortedUniques) {
  202. integer size = elements.size;
  203. NUMlvector_getUniqueNumbers (elements.at, & size);
  204. elements.resize (size);
  205. }
  206. return elements;
  207. }
  208. char32 * NUMstring_timeNoDot (double time) {
  209. static char32 string [100];
  210. integer seconds = Melder_ifloor (time);
  211. integer ms = Melder_iround ((time - seconds) * 1000.0);
  212. Melder_sprint (string,100, U"_", seconds, U"_", ms);
  213. return string;
  214. }
  215. /* End of file NUMstring.cpp */