STR.cpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /* STR.cpp
  2. *
  3. * Copyright (C) 2012-2017 David Weenink, 2008,2018 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. * See the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "melder.h"
  19. autostring32 STRleft (conststring32 str, integer newLength) {
  20. integer length = str32len (str);
  21. if (newLength < 0)
  22. newLength = 0;
  23. if (newLength > length)
  24. newLength = length;
  25. autostring32 result (newLength);
  26. str32ncpy (result.get(), str, newLength);
  27. return result;
  28. }
  29. autostring32 STRmid (conststring32 str, integer startingPosition_1, integer numberOfCharacters) {
  30. integer length = str32len (str), endPosition_1 = startingPosition_1 + numberOfCharacters - 1;
  31. if (startingPosition_1 < 1)
  32. startingPosition_1 = 1;
  33. if (endPosition_1 > length)
  34. endPosition_1 = length;
  35. integer newLength = endPosition_1 - startingPosition_1 + 1;
  36. if (newLength <= 0)
  37. return Melder_dup (U"");
  38. autostring32 result (newLength);
  39. str32ncpy (result.get(), & str [startingPosition_1-1], newLength);
  40. return result;
  41. }
  42. autostring32 STRreplace (conststring32 string,
  43. conststring32 search, conststring32 replace, integer maximumNumberOfReplaces,
  44. integer *out_numberOfMatches)
  45. {
  46. if (string == 0 || search == 0 || replace == 0)
  47. return autostring32();
  48. integer len_string = str32len (string);
  49. if (len_string == 0)
  50. maximumNumberOfReplaces = 1;
  51. integer len_search = str32len (search);
  52. if (len_search == 0)
  53. maximumNumberOfReplaces = 1;
  54. /*
  55. To allocate memory for 'result' only once, we have to know how many
  56. matches will occur.
  57. */
  58. const char32 *pos = & string [0]; // current position / start of current match
  59. integer numberOfMatches = 0;
  60. if (maximumNumberOfReplaces <= 0)
  61. maximumNumberOfReplaces = INTEGER_MAX;
  62. if (len_search == 0) { /* Search is empty string... */
  63. if (len_string == 0)
  64. numberOfMatches = 1; /* ...only matches empty string */
  65. } else {
  66. if (len_string != 0) { /* Because empty string always matches */
  67. while (!! (pos = str32str (pos, search)) && numberOfMatches < maximumNumberOfReplaces) {
  68. pos += len_search;
  69. numberOfMatches ++;
  70. }
  71. }
  72. }
  73. integer len_replace = str32len (replace);
  74. integer len_result = len_string + numberOfMatches * (len_replace - len_search);
  75. autostring32 result (len_result);
  76. const char32 *posp = pos = & string [0];
  77. integer nchar = 0, result_nchar = 0;
  78. for (integer i = 1; i <= numberOfMatches; i ++) {
  79. pos = str32str (pos, search);
  80. /*
  81. Copy gap between end of previous match and start of current.
  82. */
  83. nchar = pos - posp;
  84. if (nchar > 0) {
  85. str32ncpy (& result [result_nchar], posp, nchar);
  86. result_nchar += nchar;
  87. }
  88. /*
  89. Insert the replace string in result.
  90. */
  91. str32ncpy (& result [result_nchar], replace, len_replace);
  92. result_nchar += len_replace;
  93. /*
  94. Next search starts after the match.
  95. */
  96. pos += len_search;
  97. posp = pos;
  98. }
  99. /*
  100. Copy gap between end of match and end of string.
  101. */
  102. pos = string + len_string;
  103. nchar = pos - posp;
  104. if (nchar > 0)
  105. str32ncpy (& result [result_nchar], posp, nchar);
  106. if (out_numberOfMatches)
  107. *out_numberOfMatches = numberOfMatches;
  108. return result;
  109. }
  110. autostring32 STRreplace_regex (conststring32 string,
  111. regexp *compiledSearchRE, conststring32 replaceRE, integer maximumNumberOfReplaces,
  112. integer *out_numberOfMatches)
  113. {
  114. integer buf_nchar = 0; // number of characters in 'buf'
  115. integer gap_copied = 0;
  116. integer nchar;
  117. bool reverse = false;
  118. int errorType;
  119. char32 prev_char = U'\0';
  120. const char32 *pos; // current position in 'string' / start of current match
  121. const char32 *posp; // end of previous match
  122. autostring32 buf;
  123. if (out_numberOfMatches)
  124. *out_numberOfMatches = 0;
  125. if (string == 0 || compiledSearchRE == 0 || replaceRE == 0)
  126. return 0;
  127. integer string_length = str32len (string);
  128. //int replace_length = str32len (replaceRE);
  129. if (string_length == 0)
  130. maximumNumberOfReplaces = 1;
  131. integer i = ( maximumNumberOfReplaces > 0 ? 0 : - string_length );
  132. /*
  133. We do not know the size of the replaced string in advance,
  134. therefore we allocate a replace buffer twice the size of the
  135. original string. After all replaces have taken place we do a
  136. final realloc to the then exactly known size.
  137. If during the replace, the size of the buffer happens to be too
  138. small (this is signalled by the replaceRE function),
  139. we double its size and restart the replace.
  140. */
  141. integer bufferLength = 2 * string_length;
  142. bufferLength = bufferLength < 100 ? 100 : bufferLength;
  143. buf.resize (bufferLength);
  144. pos = posp = string;
  145. while (ExecRE (compiledSearchRE, nullptr, pos, nullptr, reverse, prev_char, U'\0', nullptr, nullptr) &&
  146. i ++ < maximumNumberOfReplaces) {
  147. /*
  148. Copy gap between the end of the previous match and the start
  149. of the current match.
  150. Check buffer overflow. pos == posp ? '\0' : pos [-1],
  151. */
  152. pos = compiledSearchRE -> startp [0];
  153. nchar = pos - posp;
  154. if (nchar > 0 && ! gap_copied) {
  155. if (buf_nchar + nchar > bufferLength) {
  156. bufferLength *= 2;
  157. buf.resize (bufferLength);
  158. }
  159. str32ncpy (buf.get() + buf_nchar, posp, nchar);
  160. buf_nchar += nchar;
  161. }
  162. gap_copied = 1;
  163. /*
  164. Do the substitution. We can only check afterwards for buffer overflow.
  165. SubstituteRE puts null byte at last replaced position and signals when overflow.
  166. */
  167. if (! SubstituteRE (compiledSearchRE, replaceRE, buf.get() + buf_nchar, bufferLength + 1 - buf_nchar, & errorType)) {
  168. if (errorType == 1) { // not enough memory
  169. bufferLength *= 2;
  170. buf.resize (bufferLength);
  171. Melder_clearError ();
  172. i --; // retry
  173. continue;
  174. }
  175. Melder_throw (U"Error during substitution.");
  176. }
  177. // Buffer is not full, get number of characters added;
  178. nchar = str32len (buf.get() + buf_nchar);
  179. buf_nchar += nchar;
  180. // Update next start position in search string.
  181. posp = pos;
  182. pos = (char32 *) compiledSearchRE -> endp [0];
  183. if (pos != posp)
  184. prev_char = pos [-1];
  185. gap_copied = 0;
  186. posp = pos; //pb 20080121
  187. if (out_numberOfMatches)
  188. (*out_numberOfMatches) ++;
  189. // at end of string?
  190. // we need this because .* matches at the end of a string
  191. if (pos - string == string_length)
  192. break;
  193. }
  194. // Copy last part of string to destination string
  195. nchar = (string + string_length) - pos;
  196. bufferLength = buf_nchar + nchar;
  197. buf.resize (bufferLength);
  198. str32ncpy (buf.get() + buf_nchar, pos, nchar);
  199. return buf;
  200. }
  201. autostring32 STRright (conststring32 str, integer newLength) {
  202. integer length = str32len (str);
  203. if (newLength < 0)
  204. newLength = 0;
  205. if (newLength > length)
  206. newLength = length;
  207. return Melder_dup (str + length - newLength);
  208. }
  209. /* End of file STR.cpp */