Strings_extensions.cpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /* Strings_extensions.cpp
  2. *
  3. * Copyright (C) 1993-2017 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20011003
  20. djmw 20020813 GPL header
  21. djmw 20030107 Added Strings_setString
  22. djmw 20031212 Added Strings_extractPart
  23. djmw 20040301 Added Strings_createFixedLength.
  24. djmw 20040308 Corrected bug in strings_to_Strings.
  25. djmw 20040427 Strings_append added.
  26. djmw 20040629 Strings_append now accepts an Ordered of Strings.
  27. djmw 20050714 New: Strings_to_Permutation, Strings_and_Permutation_permuteStrings.
  28. djmw 20050721 Extra argument in Strings_to_Permutation.
  29. djmw 20101007 StringsIndex Stringses_to_StringsIndex (Strings me, Strings classes)
  30. djmw 20120407 + Strings_createFromCharacters
  31. djmw 20120813 -Strings_setString
  32. */
  33. #include "Strings_extensions.h"
  34. #include "NUM2.h"
  35. autoStrings Strings_createFixedLength (integer numberOfStrings) {
  36. try {
  37. Melder_require (numberOfStrings > 0, U"The number of strings should be positive.");
  38. autoStrings me = Thing_new (Strings);
  39. my strings = autostring32vector (numberOfStrings);
  40. my numberOfStrings = numberOfStrings;
  41. return me;
  42. } catch (MelderError) {
  43. Melder_throw (U"Strings not created.");
  44. }
  45. }
  46. autoStrings Strings_createAsCharacters (conststring32 string) {
  47. try {
  48. autoStrings me = Thing_new (Strings);
  49. my numberOfStrings = str32len (string);
  50. my strings = autostring32vector (my numberOfStrings);
  51. for (integer i = 1; i <= my numberOfStrings; i ++) {
  52. my strings [i] = Melder_dup (Melder_character (*string ++));
  53. }
  54. return me;
  55. } catch (MelderError) {
  56. Melder_throw (U"Strings from characters not created.");
  57. }
  58. }
  59. autoStrings Strings_createAsTokens (conststring32 token_string, conststring32 separator_string) {
  60. try {
  61. autoStrings me = Thing_new (Strings);
  62. /*
  63. * 1. make a copy
  64. * 2. replace all separators by 0 in the copy
  65. * 3. count the items in the copy
  66. * 4. copy the tokens from the copy to the Strings object
  67. *
  68. * The algorithm is not the most efficient one since the token string is processed 4 times.
  69. * However the steps taken are easy to follow.
  70. */
  71. if (! token_string || token_string [0] == U'\0')
  72. return me;
  73. conststring32 separators = ( separator_string && separator_string [0] != U'\0' ? separator_string : U" " );
  74. autostring32 copy = Melder_dup (token_string);
  75. mutablestring32 tokens = copy.get();
  76. char32 *index = & tokens [0];
  77. integer numberOfTokens = 0;
  78. for (const char32 *indexs = & token_string [0]; *indexs != U'\0'; indexs ++, index ++) {
  79. for (const char32 *s = & separators [0]; *s != U'\0'; s ++) {
  80. if (*index == *s) {
  81. *index = U'\0';
  82. if (index > tokens && *(index - 1) != U'\0')
  83. numberOfTokens ++;
  84. break;
  85. }
  86. }
  87. }
  88. if (*(index - 1) != U'\0') { // if token_string ends with a non-separator
  89. numberOfTokens ++;
  90. }
  91. my numberOfStrings = numberOfTokens;
  92. my strings = autostring32vector (my numberOfStrings);
  93. numberOfTokens = 0;
  94. char32 *start = tokens;
  95. index = & tokens [0];
  96. for (const char32 *indexs = & token_string [0]; *indexs != U'\0'; indexs ++, index ++) {
  97. if (*index == U'\0' && index > tokens && *(index - 1) != U'\0')
  98. my strings [++ numberOfTokens] = Melder_dup (start);
  99. if (*index != U'\0' && index > tokens && *(index - 1) == U'\0')
  100. start = index;
  101. }
  102. if (*(index - 1) != U'\0')
  103. my strings [++ numberOfTokens] = Melder_dup (start);
  104. return me;
  105. } catch (MelderError) {
  106. Melder_throw (U"Strings as tokens not created.");
  107. }
  108. }
  109. integer Strings_findString (Strings me, conststring32 string) {
  110. for (integer i = 1; i <= my numberOfStrings; i ++) {
  111. if (Melder_equ (my strings [i].get(), string)) {
  112. return i;
  113. }
  114. }
  115. return 0;
  116. }
  117. autoStrings Strings_append (OrderedOf<structStrings>* me) {
  118. try {
  119. integer index = 1, numberOfStrings = 0;
  120. for (integer i = 1; i <= my size; i ++) {
  121. Strings s = my at [i];
  122. numberOfStrings += s -> numberOfStrings;
  123. }
  124. autoStrings thee = Strings_createFixedLength (numberOfStrings);
  125. for (integer i = 1; i <= my size; i ++) {
  126. Strings s = my at [i];
  127. for (integer j = 1; j <= s -> numberOfStrings; j ++, index ++)
  128. thy strings [index] = Melder_dup (s -> strings [j].get());
  129. }
  130. return thee;
  131. } catch (MelderError) {
  132. Melder_throw (me, U": not appended.");
  133. }
  134. }
  135. autoStrings Strings_change (Strings me, conststring32 search, conststring32 replace, int maximumNumberOfReplaces, integer *nmatches, integer *nstringmatches, bool use_regexp) {
  136. try {
  137. autoStrings thee = Thing_new (Strings);
  138. autostring32vector strings = string32vector_searchAndReplace (my strings.get(),
  139. search, replace, maximumNumberOfReplaces, nmatches, nstringmatches, use_regexp);
  140. thy numberOfStrings = my numberOfStrings;
  141. thy strings = std::move (strings);
  142. return thee;
  143. } catch (MelderError) {
  144. Melder_throw (me, U": not changed.");
  145. }
  146. }
  147. autoStrings strings_to_Strings (char32 **strings, integer from, integer to) {
  148. try {
  149. autoStrings thee = Strings_createFixedLength (to - from + 1);
  150. for (integer i = from; i <= to; i ++) {
  151. thy strings [i - from + 1] = Melder_dup (strings [i]);
  152. }
  153. return thee;
  154. } catch (MelderError) {
  155. Melder_throw (U"Strings not created.");
  156. }
  157. }
  158. autoStrings Strings_extractPart (Strings me, integer from, integer to) {
  159. try {
  160. Melder_require (from > 0 && from <= to && to <= my numberOfStrings,
  161. U"Strings_extractPart: begin and end should be in interval [1, ", my numberOfStrings, U"].");
  162. return strings_to_Strings (my strings.peek2(), from, to);
  163. } catch (MelderError) {
  164. Melder_throw (me, U": no part extracted.");
  165. }
  166. }
  167. autoPermutation Strings_to_Permutation (Strings me, int sort) { // TODO sort
  168. try {
  169. autoPermutation thee = Permutation_create (my numberOfStrings);
  170. if (sort != 0) {
  171. autoINTVEC index = NUMindexx_s (my strings.get ()); // TODO inplace version
  172. vectorcopy_preallocated (thy p.get(), index.get());
  173. }
  174. return thee;
  175. } catch (MelderError) {
  176. Melder_throw (me, U": no Permutation created.");
  177. }
  178. }
  179. autoStrings Strings_Permutation_permuteStrings (Strings me, Permutation thee) {
  180. try {
  181. Melder_require (my numberOfStrings == thy numberOfElements,
  182. U"Strings_Permutation_permuteStrings: The number of strings and the number of elements in the Permutation should be equal.");
  183. autoStrings him = Strings_createFixedLength (my numberOfStrings);
  184. for (integer i = 1; i <= thy numberOfElements; i ++) {
  185. integer index = thy p [i];
  186. his strings [i] = Melder_dup (my strings [index].get());
  187. }
  188. return him;
  189. } catch (MelderError) {
  190. Melder_throw (me, U": no permuted Strings created.");
  191. }
  192. }
  193. autoStringsIndex Stringses_to_StringsIndex (Strings me, Strings classes) {
  194. try {
  195. autoStringsIndex tmp = Strings_to_StringsIndex (classes);
  196. integer numberOfClasses = tmp -> classes->size;
  197. autoStringsIndex him = StringsIndex_create (my numberOfStrings);
  198. for (integer i = 1; i <= numberOfClasses; i ++) {
  199. SimpleString t = (SimpleString) tmp -> classes->at [i]; // FIXME cast
  200. autoSimpleString t2 = Data_copy (t);
  201. his classes -> addItem_move (t2.move());
  202. }
  203. for (integer j = 1; j <= my numberOfStrings; j ++) {
  204. integer index = 0;
  205. conststring32 stringsj = my strings [j].get();
  206. for (integer i = 1; i <= numberOfClasses; i ++) {
  207. SimpleString ss = (SimpleString) his classes->at [i]; // FIXME cast
  208. if (Melder_equ (stringsj, ss -> string.get())) {
  209. index = i;
  210. break;
  211. }
  212. }
  213. his classIndex [j] = index;
  214. }
  215. return him;
  216. } catch (MelderError) {
  217. Melder_throw (me, U": no StringsIndex created.");
  218. }
  219. }
  220. autoStringsIndex Strings_to_StringsIndex (Strings me) {
  221. try {
  222. autoStringsIndex thee = StringsIndex_create (my numberOfStrings);
  223. autoPermutation sorted = Strings_to_Permutation (me, 1);
  224. integer numberOfClasses = 0;
  225. conststring32 strings = nullptr;
  226. for (integer i = 1; i <= sorted -> numberOfElements; i ++) {
  227. integer index = sorted -> p [i];
  228. conststring32 stringsi = my strings [index].get();
  229. if (i == 1 || ! Melder_equ (strings, stringsi)) {
  230. numberOfClasses ++;
  231. autoSimpleString him = SimpleString_create (stringsi);
  232. thy classes -> addItem_move (him.move());
  233. strings = stringsi;
  234. }
  235. thy classIndex [index] = numberOfClasses;
  236. }
  237. return thee;
  238. } catch (MelderError) {
  239. Melder_throw (me, U": no StringsIndex created.");
  240. }
  241. }
  242. autoStrings StringsIndex_to_Strings (StringsIndex me) {
  243. try {
  244. autoStrings thee = Strings_createFixedLength (my numberOfItems);
  245. for (integer i = 1; i <= thy numberOfStrings; i ++) {
  246. SimpleString s = (SimpleString) my classes->at [my classIndex [i]]; // FIXME cast, FIXME classIndex
  247. thy strings [i] = Melder_dup (s -> string.get());
  248. }
  249. return thee;
  250. } catch (MelderError) {
  251. Melder_throw (me, U": no Strings created.");
  252. }
  253. }
  254. autoStringsIndex Table_to_StringsIndex_column (Table me, integer column) {
  255. try {
  256. Melder_require (column > 0 && column <= my numberOfColumns, U"Invalid column number.");
  257. integer numberOfRows = my rows.size;
  258. Table_numericize_Assert (me, column);
  259. autoNUMvector<char32 *> groupLabels (1, numberOfRows);
  260. for (integer irow = 1; irow <= numberOfRows; irow ++) {
  261. groupLabels [irow] = my rows.at [irow] -> cells [column]. string.get();
  262. }
  263. autoStrings thee = strings_to_Strings (groupLabels.peek(), 1, numberOfRows);
  264. autoStringsIndex him = Strings_to_StringsIndex (thee.get());
  265. return him;
  266. } catch (MelderError) {
  267. Melder_throw (me, U"No StringsIndex created from column ", column, U".");
  268. }
  269. }
  270. /* End of file Strings_extensions.cpp */