strings.c 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504
  1. /* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2016,2018-2019
  2. Free Software Foundation, Inc.
  3. This file is part of Guile.
  4. Guile is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published
  6. by the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. Guile is distributed in the hope that it will be useful, but WITHOUT
  9. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with Guile. If not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #ifdef HAVE_CONFIG_H
  16. # include <config.h>
  17. #endif
  18. #include <alloca.h>
  19. #include <string.h>
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <errno.h>
  23. #include <uninorm.h>
  24. #include <unistr.h>
  25. #include <uniconv.h>
  26. #include <c-strcase.h>
  27. #include <intprops.h>
  28. #include "chars.h"
  29. #include "deprecation.h"
  30. #include "error.h"
  31. #include "generalized-vectors.h"
  32. #include "gsubr.h"
  33. #include "numbers.h"
  34. #include "pairs.h"
  35. #include "ports-internal.h"
  36. #include "ports.h"
  37. #include "private-options.h"
  38. #include "striconveh.h"
  39. #include "symbols.h"
  40. #include "threads.h"
  41. #include "strings.h"
  42. #ifndef SCM_MAX_ALLOCA
  43. # define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
  44. #endif
  45. /* {Strings}
  46. */
  47. SCM_SYMBOL (sym_UTF_8, "UTF-8");
  48. SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
  49. SCM_SYMBOL (sym_error, "error");
  50. /* A stringbuf is a linear buffer of characters. Every string has a
  51. stringbuf. Strings may reference just a slice of a stringbuf; that's
  52. often the case for strings made by the "substring" function.
  53. Stringbufs may hold either 8-bit characters or 32-bit characters. In
  54. either case the characters are Unicode codepoints. "Narrow"
  55. stringbufs thus have the ISO-8859-1 (Latin-1) encoding, and "wide"
  56. stringbufs have the UTF-32 (UCS-4) encoding.
  57. By default, stringbufs are immutable. This enables an O(1)
  58. "substring" operation with no synchronization. A string-set! will
  59. first ensure that the string's stringbuf is mutable, copying the
  60. stringbuf if necessary. This is therefore a copy-on-write
  61. representation. However, taking a substring of a mutable stringbuf
  62. is an O(n) operation as it has to create a new immutable stringbuf.
  63. There are also mutation-sharing substrings as well. */
  64. /* The size in words of the stringbuf header (type tag + size). */
  65. #define STRINGBUF_HEADER_SIZE 2U
  66. #define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
  67. #define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
  68. #define STRINGBUF_F_MUTABLE SCM_I_STRINGBUF_F_MUTABLE
  69. #define STRINGBUF_TAG scm_tc7_stringbuf
  70. #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
  71. #define STRINGBUF_MUTABLE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_MUTABLE)
  72. #define STRINGBUF_SET_MUTABLE(buf) \
  73. SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_MUTABLE)
  74. #define STRINGBUF_CONTENTS(buf) ((void *) \
  75. SCM_CELL_OBJECT_LOC (buf, \
  76. STRINGBUF_HEADER_SIZE))
  77. #define STRINGBUF_CHARS(buf) ((unsigned char *) STRINGBUF_CONTENTS (buf))
  78. #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf))
  79. #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
  80. #ifdef SCM_STRING_LENGTH_HISTOGRAM
  81. static size_t lenhist[1001];
  82. #endif
  83. /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
  84. characters. */
  85. static SCM
  86. make_stringbuf (size_t len)
  87. {
  88. /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
  89. scm_i_symbol_chars, all stringbufs are null-terminated. Once
  90. SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
  91. has been changed for scm_i_symbol_chars, this null-termination
  92. can be dropped.
  93. */
  94. SCM buf;
  95. #ifdef SCM_STRING_LENGTH_HISTOGRAM
  96. if (len < 1000)
  97. lenhist[len]++;
  98. else
  99. lenhist[1000]++;
  100. #endif
  101. /* Make sure that the total allocation size will not overflow size_t,
  102. with ~30 extra bytes to spare to avoid an overflow within the
  103. allocator. */
  104. if (INT_ADD_OVERFLOW (len, STRINGBUF_HEADER_BYTES + 32))
  105. scm_num_overflow ("make_stringbuf");
  106. buf = SCM_PACK_POINTER (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
  107. "string"));
  108. SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
  109. SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
  110. STRINGBUF_CHARS (buf)[len] = 0;
  111. return buf;
  112. }
  113. /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
  114. characters. */
  115. static SCM
  116. make_wide_stringbuf (size_t len)
  117. {
  118. SCM buf;
  119. size_t raw_len;
  120. #ifdef SCM_STRING_LENGTH_HISTOGRAM
  121. if (len < 1000)
  122. lenhist[len]++;
  123. else
  124. lenhist[1000]++;
  125. #endif
  126. /* Make sure that the total allocation size will not overflow size_t,
  127. with ~30 extra bytes to spare to avoid an overflow within the
  128. allocator. */
  129. if (len > (((size_t) -(STRINGBUF_HEADER_BYTES + 32 + sizeof (scm_t_wchar)))
  130. / sizeof (scm_t_wchar)))
  131. scm_num_overflow ("make_wide_stringbuf");
  132. raw_len = (len + 1) * sizeof (scm_t_wchar);
  133. buf = SCM_PACK_POINTER (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
  134. "string"));
  135. SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
  136. SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
  137. STRINGBUF_WIDE_CHARS (buf)[len] = 0;
  138. return buf;
  139. }
  140. /* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
  141. characters from BUF. */
  142. static SCM
  143. wide_stringbuf (SCM buf)
  144. {
  145. SCM new_buf;
  146. if (STRINGBUF_WIDE (buf))
  147. new_buf = buf;
  148. else
  149. {
  150. size_t i, len;
  151. scm_t_wchar *mem;
  152. len = STRINGBUF_LENGTH (buf);
  153. new_buf = make_wide_stringbuf (len);
  154. mem = STRINGBUF_WIDE_CHARS (new_buf);
  155. for (i = 0; i < len; i++)
  156. mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
  157. mem[len] = 0;
  158. }
  159. return new_buf;
  160. }
  161. /* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
  162. characters from BUF, if possible. */
  163. static SCM
  164. narrow_stringbuf (SCM buf)
  165. {
  166. SCM new_buf;
  167. if (!STRINGBUF_WIDE (buf))
  168. new_buf = buf;
  169. else
  170. {
  171. size_t i, len;
  172. scm_t_wchar *wmem;
  173. unsigned char *mem;
  174. len = STRINGBUF_LENGTH (buf);
  175. wmem = STRINGBUF_WIDE_CHARS (buf);
  176. for (i = 0; i < len; i++)
  177. if (wmem[i] > 0xFF)
  178. /* BUF cannot be narrowed. */
  179. return buf;
  180. new_buf = make_stringbuf (len);
  181. mem = STRINGBUF_CHARS (new_buf);
  182. for (i = 0; i < len; i++)
  183. mem[i] = (unsigned char) wmem[i];
  184. mem[len] = 0;
  185. }
  186. return new_buf;
  187. }
  188. /* Copy-on-write strings.
  189. */
  190. #define STRING_TAG scm_tc7_string
  191. #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
  192. #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
  193. #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
  194. #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
  195. #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
  196. #define IS_STRING(str) (SCM_HAS_TYP7 (str, STRING_TAG))
  197. /* Read-only strings.
  198. */
  199. #define RO_STRING_TAG scm_tc7_ro_string
  200. #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
  201. /* Mutation-sharing substrings
  202. */
  203. #define SH_STRING_TAG (scm_tc7_string + 0x100)
  204. #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
  205. /* START and LENGTH as for STRINGs. */
  206. #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
  207. void
  208. scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate)
  209. {
  210. SCM str = scm_double_cell (STRING_TAG, SCM_UNPACK(exp), 0,
  211. STRINGBUF_LENGTH (exp));
  212. scm_puts ("#<stringbuf ", port);
  213. scm_iprin1 (str, port, pstate);
  214. scm_puts (">", port);
  215. }
  216. SCM scm_nullstr;
  217. static SCM null_stringbuf;
  218. static void
  219. init_null_stringbuf (void)
  220. {
  221. null_stringbuf = make_stringbuf (0);
  222. }
  223. /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
  224. characters. CHARSP, if not NULL, will be set to location of the
  225. char array. If READ_ONLY_P, the returned string is read-only;
  226. otherwise it is writable. */
  227. SCM
  228. scm_i_make_string (size_t len, char **charsp, int read_only_p)
  229. {
  230. SCM buf;
  231. SCM res;
  232. if (len == 0)
  233. {
  234. static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT;
  235. scm_i_pthread_once (&once, init_null_stringbuf);
  236. buf = null_stringbuf;
  237. }
  238. else
  239. buf = make_stringbuf (len);
  240. if (charsp)
  241. *charsp = (char *) STRINGBUF_CHARS (buf);
  242. res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
  243. SCM_UNPACK (buf),
  244. (scm_t_bits) 0, (scm_t_bits) len);
  245. return res;
  246. }
  247. /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
  248. characters. CHARSP, if not NULL, will be set to location of the
  249. character array. If READ_ONLY_P, the returned string is read-only;
  250. otherwise it is writable. */
  251. SCM
  252. scm_i_make_wide_string (size_t len, scm_t_wchar **charsp, int read_only_p)
  253. {
  254. SCM buf = make_wide_stringbuf (len);
  255. SCM res;
  256. if (charsp)
  257. *charsp = STRINGBUF_WIDE_CHARS (buf);
  258. res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
  259. SCM_UNPACK (buf),
  260. (scm_t_bits) 0, (scm_t_bits) len);
  261. return res;
  262. }
  263. static void
  264. validate_substring_args (SCM str, size_t start, size_t end)
  265. {
  266. if (!IS_STRING (str))
  267. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  268. if (start > STRING_LENGTH (str))
  269. scm_out_of_range (NULL, scm_from_size_t (start));
  270. if (end > STRING_LENGTH (str) || end < start)
  271. scm_out_of_range (NULL, scm_from_size_t (end));
  272. }
  273. static inline void
  274. get_str_buf_start (SCM *str, SCM *buf, size_t *start)
  275. {
  276. *start = STRING_START (*str);
  277. if (IS_SH_STRING (*str))
  278. {
  279. *str = SH_STRING_STRING (*str);
  280. *start += STRING_START (*str);
  281. }
  282. *buf = STRING_STRINGBUF (*str);
  283. }
  284. static SCM
  285. substring_with_immutable_stringbuf (SCM str, size_t start, size_t end,
  286. int force_copy_p, int read_only_p)
  287. {
  288. SCM buf;
  289. size_t str_start, len;
  290. scm_t_bits tag = read_only_p ? RO_STRING_TAG : STRING_TAG;
  291. get_str_buf_start (&str, &buf, &str_start);
  292. len = end - start;
  293. start += str_start;
  294. if (len == 0)
  295. return scm_i_make_string (0, NULL, read_only_p);
  296. else if (!force_copy_p && SCM_LIKELY (!STRINGBUF_MUTABLE (buf)))
  297. return scm_double_cell (tag, SCM_UNPACK (buf), start, len);
  298. else
  299. {
  300. SCM new_buf, new_str;
  301. if (STRINGBUF_WIDE (buf))
  302. {
  303. new_buf = make_wide_stringbuf (len);
  304. u32_cpy ((uint32_t *) STRINGBUF_WIDE_CHARS (new_buf),
  305. (uint32_t *) (STRINGBUF_WIDE_CHARS (buf) + start), len);
  306. new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
  307. scm_i_try_narrow_string (new_str);
  308. }
  309. else
  310. {
  311. new_buf = make_stringbuf (len);
  312. memcpy (STRINGBUF_CHARS (new_buf),
  313. STRINGBUF_CHARS (buf) + start, len);
  314. new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
  315. }
  316. return new_str;
  317. }
  318. }
  319. SCM
  320. scm_i_substring (SCM str, size_t start, size_t end)
  321. {
  322. return substring_with_immutable_stringbuf (str, start, end, 0, 0);
  323. }
  324. SCM
  325. scm_i_substring_read_only (SCM str, size_t start, size_t end)
  326. {
  327. return substring_with_immutable_stringbuf (str, start, end, 0, 1);
  328. }
  329. SCM
  330. scm_i_substring_copy (SCM str, size_t start, size_t end)
  331. {
  332. return substring_with_immutable_stringbuf (str, start, end, 1, 0);
  333. }
  334. static void
  335. scm_i_string_ensure_mutable_x (SCM str)
  336. {
  337. SCM buf;
  338. if (IS_SH_STRING (str))
  339. {
  340. /* Shared-mutation strings always have mutable stringbufs. */
  341. buf = STRING_STRINGBUF (SH_STRING_STRING (str));
  342. if (!STRINGBUF_MUTABLE (buf))
  343. abort ();
  344. return;
  345. }
  346. if (IS_RO_STRING (str))
  347. scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (str));
  348. buf = STRING_STRINGBUF (str);
  349. if (STRINGBUF_MUTABLE (buf))
  350. return;
  351. /* Otherwise copy and mark the fresh stringbuf as mutable. Note that
  352. we copy the whole stringbuf so that the start/len offsets from the
  353. original string keep working, so that concurrent accessors on this
  354. string don't see things in an inconsistent state. */
  355. {
  356. SCM new_buf;
  357. size_t len = STRINGBUF_LENGTH (buf);
  358. if (STRINGBUF_WIDE (buf))
  359. {
  360. new_buf = make_wide_stringbuf (len);
  361. u32_cpy ((uint32_t *) STRINGBUF_WIDE_CHARS (new_buf),
  362. (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len);
  363. }
  364. else
  365. {
  366. new_buf = make_stringbuf (len);
  367. memcpy (STRINGBUF_CHARS (new_buf), STRINGBUF_CHARS (buf), len);
  368. }
  369. STRINGBUF_SET_MUTABLE (new_buf);
  370. SET_STRING_STRINGBUF (str, new_buf);
  371. }
  372. }
  373. SCM
  374. scm_i_substring_shared (SCM str, size_t start, size_t end)
  375. {
  376. if (start == 0 && end == STRING_LENGTH (str))
  377. return str;
  378. else if (start == end)
  379. return scm_i_make_string (0, NULL, 0);
  380. else if (IS_RO_STRING (str))
  381. return scm_i_substring_read_only (str, start, end);
  382. else
  383. {
  384. size_t len = end - start;
  385. if (IS_SH_STRING (str))
  386. {
  387. start += STRING_START (str);
  388. str = SH_STRING_STRING (str);
  389. }
  390. scm_i_string_ensure_mutable_x (str);
  391. return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
  392. (scm_t_bits)start, (scm_t_bits) len);
  393. }
  394. }
  395. SCM
  396. scm_c_substring (SCM str, size_t start, size_t end)
  397. {
  398. validate_substring_args (str, start, end);
  399. return scm_i_substring (str, start, end);
  400. }
  401. SCM
  402. scm_c_substring_read_only (SCM str, size_t start, size_t end)
  403. {
  404. validate_substring_args (str, start, end);
  405. return scm_i_substring_read_only (str, start, end);
  406. }
  407. SCM
  408. scm_c_substring_copy (SCM str, size_t start, size_t end)
  409. {
  410. validate_substring_args (str, start, end);
  411. return scm_i_substring_copy (str, start, end);
  412. }
  413. SCM
  414. scm_c_substring_shared (SCM str, size_t start, size_t end)
  415. {
  416. validate_substring_args (str, start, end);
  417. return scm_i_substring_shared (str, start, end);
  418. }
  419. /* Internal accessors
  420. */
  421. /* Returns the number of characters in STR. This may be different
  422. than the memory size of the string storage. */
  423. size_t
  424. scm_i_string_length (SCM str)
  425. {
  426. return STRING_LENGTH (str);
  427. }
  428. int
  429. scm_i_string_is_mutable (SCM str)
  430. {
  431. return !IS_RO_STRING (str);
  432. }
  433. /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
  434. encoding. False if it is 'wide', having a 32-bit UCS-4
  435. encoding. */
  436. int
  437. scm_i_is_narrow_string (SCM str)
  438. {
  439. if (IS_SH_STRING (str))
  440. str = SH_STRING_STRING (str);
  441. return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
  442. }
  443. /* Try to coerce a string to be narrow. It if is narrow already, do
  444. nothing. If it is wide, shrink it to narrow if none of its
  445. characters are above 0xFF. Return true if the string is narrow or
  446. was made to be narrow. */
  447. int
  448. scm_i_try_narrow_string (SCM str)
  449. {
  450. if (IS_SH_STRING (str))
  451. str = SH_STRING_STRING (str);
  452. SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
  453. return scm_i_is_narrow_string (str);
  454. }
  455. /* Return a pointer to the raw data of the string, which can be either Latin-1
  456. or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'. */
  457. const void *
  458. scm_i_string_data (SCM str)
  459. {
  460. SCM buf;
  461. size_t start;
  462. const char *data;
  463. get_str_buf_start (&str, &buf, &start);
  464. data = STRINGBUF_CONTENTS (buf);
  465. data += start * (scm_i_is_narrow_string (str) ? 1 : 4);
  466. return data;
  467. }
  468. /* Returns a pointer to the 8-bit Latin-1 encoded character array of
  469. STR. */
  470. const char *
  471. scm_i_string_chars (SCM str)
  472. {
  473. SCM buf;
  474. size_t start;
  475. get_str_buf_start (&str, &buf, &start);
  476. if (scm_i_is_narrow_string (str))
  477. return (const char *) STRINGBUF_CHARS (buf) + start;
  478. else
  479. scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
  480. scm_list_1 (str));
  481. return NULL;
  482. }
  483. /* Returns a pointer to the 32-bit UCS-4 encoded character array of
  484. STR. */
  485. const scm_t_wchar *
  486. scm_i_string_wide_chars (SCM str)
  487. {
  488. SCM buf;
  489. size_t start;
  490. get_str_buf_start (&str, &buf, &start);
  491. if (!scm_i_is_narrow_string (str))
  492. return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
  493. else
  494. scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
  495. scm_list_1 (str));
  496. }
  497. /* If the buffer in ORIG_STR is immutable, copy ORIG_STR's characters to
  498. a new string buffer, so that it can be modified without modifying
  499. other strings. */
  500. SCM
  501. scm_i_string_start_writing (SCM orig_str)
  502. {
  503. scm_i_string_ensure_mutable_x (orig_str);
  504. return orig_str;
  505. }
  506. /* Return a pointer to the 8-bit Latin-1 chars of a string. */
  507. char *
  508. scm_i_string_writable_chars (SCM str)
  509. {
  510. SCM buf;
  511. size_t start;
  512. get_str_buf_start (&str, &buf, &start);
  513. if (scm_i_is_narrow_string (str))
  514. return (char *) STRINGBUF_CHARS (buf) + start;
  515. else
  516. scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
  517. scm_list_1 (str));
  518. return NULL;
  519. }
  520. /* Return a pointer to the UCS-4 codepoints of a string. */
  521. static scm_t_wchar *
  522. scm_i_string_writable_wide_chars (SCM str)
  523. {
  524. SCM buf;
  525. size_t start;
  526. get_str_buf_start (&str, &buf, &start);
  527. if (!scm_i_is_narrow_string (str))
  528. return STRINGBUF_WIDE_CHARS (buf) + start;
  529. else
  530. scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
  531. scm_list_1 (str));
  532. }
  533. /* Unlock the string mutex that was locked when
  534. scm_i_string_start_writing was called. */
  535. void
  536. scm_i_string_stop_writing (void)
  537. {
  538. }
  539. /* Return the Xth character of STR as a UCS-4 codepoint. */
  540. scm_t_wchar
  541. scm_i_string_ref (SCM str, size_t x)
  542. {
  543. if (scm_i_is_narrow_string (str))
  544. return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
  545. else
  546. return scm_i_string_wide_chars (str)[x];
  547. }
  548. /* Returns index+1 of the first char in STR that matches C, or
  549. 0 if the char is not found. */
  550. int
  551. scm_i_string_contains_char (SCM str, char ch)
  552. {
  553. size_t i;
  554. size_t len = scm_i_string_length (str);
  555. i = 0;
  556. if (scm_i_is_narrow_string (str))
  557. {
  558. while (i < len)
  559. {
  560. if (scm_i_string_chars (str)[i] == ch)
  561. return i+1;
  562. i++;
  563. }
  564. }
  565. else
  566. {
  567. while (i < len)
  568. {
  569. if (scm_i_string_wide_chars (str)[i]
  570. == (unsigned char) ch)
  571. return i+1;
  572. i++;
  573. }
  574. }
  575. return 0;
  576. }
  577. int
  578. scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
  579. {
  580. if (scm_i_is_narrow_string (sstr))
  581. {
  582. const char *a = scm_i_string_chars (sstr) + start_x;
  583. const char *b = cstr;
  584. return strncmp (a, b, strlen(b));
  585. }
  586. else
  587. {
  588. size_t i;
  589. const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
  590. const char *b = cstr;
  591. for (i = 0; i < strlen (b); i++)
  592. {
  593. if (a[i] != (unsigned char) b[i])
  594. return 1;
  595. }
  596. }
  597. return 0;
  598. }
  599. /* Set the Pth character of STR to UCS-4 codepoint CHR. */
  600. void
  601. scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
  602. {
  603. if (IS_SH_STRING (str))
  604. {
  605. p += STRING_START (str);
  606. str = SH_STRING_STRING (str);
  607. }
  608. if (chr > 0xFF && scm_i_is_narrow_string (str))
  609. SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
  610. if (scm_i_is_narrow_string (str))
  611. {
  612. char *dst = scm_i_string_writable_chars (str);
  613. dst[p] = chr;
  614. }
  615. else
  616. {
  617. scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
  618. dst[p] = chr;
  619. }
  620. }
  621. /* Symbols.
  622. Basic symbol creation and accessing is done here, the rest is in
  623. symbols.[hc]. This has been done to keep stringbufs and the
  624. internals of strings and string-like objects confined to this file.
  625. */
  626. #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
  627. SCM
  628. scm_i_make_symbol (SCM name, scm_t_bits flags, unsigned long hash)
  629. {
  630. SCM buf, symbol;
  631. size_t start, length = STRING_LENGTH (name);
  632. get_str_buf_start (&name, &buf, &start);
  633. if (SCM_UNLIKELY (STRINGBUF_MUTABLE (buf)
  634. || start != 0
  635. || STRINGBUF_LENGTH (buf) != length))
  636. {
  637. name = scm_i_substring_copy (name, 0, length);
  638. buf = STRING_STRINGBUF (name);
  639. }
  640. symbol = scm_words (scm_tc7_symbol | flags, 3);
  641. SCM_SET_CELL_WORD_1 (symbol, SCM_UNPACK (buf));
  642. SCM_SET_CELL_WORD_2 (symbol, hash);
  643. return symbol;
  644. }
  645. /* Returns the number of characters in SYM. This may be different
  646. from the memory size of SYM. */
  647. size_t
  648. scm_i_symbol_length (SCM sym)
  649. {
  650. return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
  651. }
  652. size_t
  653. scm_c_symbol_length (SCM sym)
  654. #define FUNC_NAME "scm_c_symbol_length"
  655. {
  656. SCM_VALIDATE_SYMBOL (1, sym);
  657. return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
  658. }
  659. #undef FUNC_NAME
  660. /* True if the name of SYM is stored as a Latin-1 encoded string.
  661. False if it is stored as a 32-bit UCS-4-encoded string. */
  662. int
  663. scm_i_is_narrow_symbol (SCM sym)
  664. {
  665. SCM buf;
  666. buf = SYMBOL_STRINGBUF (sym);
  667. return !STRINGBUF_WIDE (buf);
  668. }
  669. /* Returns a pointer to the 8-bit Latin-1 encoded character array that
  670. contains the name of SYM. */
  671. const char *
  672. scm_i_symbol_chars (SCM sym)
  673. {
  674. SCM buf;
  675. buf = SYMBOL_STRINGBUF (sym);
  676. if (!STRINGBUF_WIDE (buf))
  677. return (const char *) STRINGBUF_CHARS (buf);
  678. else
  679. scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
  680. scm_list_1 (sym));
  681. }
  682. /* Return a pointer to the 32-bit UCS-4-encoded character array of a
  683. symbol's name. */
  684. const scm_t_wchar *
  685. scm_i_symbol_wide_chars (SCM sym)
  686. {
  687. SCM buf;
  688. buf = SYMBOL_STRINGBUF (sym);
  689. if (STRINGBUF_WIDE (buf))
  690. return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
  691. else
  692. scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
  693. scm_list_1 (sym));
  694. }
  695. SCM
  696. scm_i_symbol_substring (SCM sym, size_t start, size_t end)
  697. {
  698. SCM buf = SYMBOL_STRINGBUF (sym);
  699. return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
  700. (scm_t_bits)start, (scm_t_bits) end - start);
  701. }
  702. /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
  703. scm_t_wchar
  704. scm_i_symbol_ref (SCM sym, size_t x)
  705. {
  706. if (scm_i_is_narrow_symbol (sym))
  707. return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
  708. else
  709. return scm_i_symbol_wide_chars (sym)[x];
  710. }
  711. /* Debugging
  712. */
  713. SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
  714. "Returns an association list containing debugging information\n"
  715. "for @var{str}. The association list has the following entries."
  716. "@table @code\n"
  717. "@item string\n"
  718. "The string itself.\n"
  719. "@item start\n"
  720. "The start index of the string into its stringbuf\n"
  721. "@item length\n"
  722. "The length of the string\n"
  723. "@item shared\n"
  724. "If this string is a substring, it returns its parent string.\n"
  725. "Otherwise, it returns @code{#f}\n"
  726. "@item read-only\n"
  727. "@code{#t} if the string is read-only\n"
  728. "@item stringbuf-chars\n"
  729. "A new string containing this string's stringbuf's characters\n"
  730. "@item stringbuf-length\n"
  731. "The number of characters in this stringbuf\n"
  732. "@item stringbuf-mutable\n"
  733. "@code{#t} if this stringbuf is mutable\n"
  734. "@item stringbuf-wide\n"
  735. "@code{#t} if this stringbuf's characters are stored in a\n"
  736. "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
  737. "buffer\n"
  738. "@end table")
  739. #define FUNC_NAME s_scm_sys_string_dump
  740. {
  741. SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
  742. SCM buf;
  743. SCM_VALIDATE_STRING (1, str);
  744. /* String info */
  745. e1 = scm_cons (scm_from_latin1_symbol ("string"),
  746. str);
  747. e2 = scm_cons (scm_from_latin1_symbol ("start"),
  748. scm_from_size_t (STRING_START (str)));
  749. e3 = scm_cons (scm_from_latin1_symbol ("length"),
  750. scm_from_size_t (STRING_LENGTH (str)));
  751. if (IS_SH_STRING (str))
  752. {
  753. e4 = scm_cons (scm_from_latin1_symbol ("shared"),
  754. SH_STRING_STRING (str));
  755. buf = STRING_STRINGBUF (SH_STRING_STRING (str));
  756. }
  757. else
  758. {
  759. e4 = scm_cons (scm_from_latin1_symbol ("shared"),
  760. SCM_BOOL_F);
  761. buf = STRING_STRINGBUF (str);
  762. }
  763. if (IS_RO_STRING (str))
  764. e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
  765. SCM_BOOL_T);
  766. else
  767. e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
  768. SCM_BOOL_F);
  769. /* Stringbuf info */
  770. if (!STRINGBUF_WIDE (buf))
  771. {
  772. size_t len = STRINGBUF_LENGTH (buf);
  773. char *cbuf;
  774. SCM sbc = scm_i_make_string (len, &cbuf, 0);
  775. memcpy (cbuf, STRINGBUF_CHARS (buf), len);
  776. e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
  777. sbc);
  778. }
  779. else
  780. {
  781. size_t len = STRINGBUF_LENGTH (buf);
  782. scm_t_wchar *cbuf;
  783. SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
  784. u32_cpy ((uint32_t *) cbuf,
  785. (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len);
  786. e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
  787. sbc);
  788. }
  789. e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
  790. scm_from_size_t (STRINGBUF_LENGTH (buf)));
  791. if (STRINGBUF_MUTABLE (buf))
  792. e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
  793. SCM_BOOL_T);
  794. else
  795. e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
  796. SCM_BOOL_F);
  797. if (STRINGBUF_WIDE (buf))
  798. e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
  799. SCM_BOOL_T);
  800. else
  801. e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
  802. SCM_BOOL_F);
  803. return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
  804. }
  805. #undef FUNC_NAME
  806. SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
  807. "Returns an association list containing debugging information\n"
  808. "for @var{sym}. The association list has the following entries."
  809. "@table @code\n"
  810. "@item symbol\n"
  811. "The symbol itself\n"
  812. "@item hash\n"
  813. "Its hash value\n"
  814. "@item interned\n"
  815. "@code{#t} if it is an interned symbol\n"
  816. "@item stringbuf-chars\n"
  817. "A new string containing this symbols's stringbuf's characters\n"
  818. "@item stringbuf-length\n"
  819. "The number of characters in this stringbuf\n"
  820. "@item stringbuf-mutable\n"
  821. "@code{#t} if this stringbuf is mutable\n"
  822. "@item stringbuf-wide\n"
  823. "@code{#t} if this stringbuf's characters are stored in a\n"
  824. "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
  825. "buffer\n"
  826. "@end table")
  827. #define FUNC_NAME s_scm_sys_symbol_dump
  828. {
  829. SCM e1, e2, e3, e4, e5, e6, e7;
  830. SCM buf;
  831. SCM_VALIDATE_SYMBOL (1, sym);
  832. e1 = scm_cons (scm_from_latin1_symbol ("symbol"),
  833. sym);
  834. e2 = scm_cons (scm_from_latin1_symbol ("hash"),
  835. scm_from_ulong (scm_i_symbol_hash (sym)));
  836. e3 = scm_cons (scm_from_latin1_symbol ("interned"),
  837. scm_symbol_interned_p (sym));
  838. buf = SYMBOL_STRINGBUF (sym);
  839. /* Stringbuf info */
  840. if (!STRINGBUF_WIDE (buf))
  841. {
  842. size_t len = STRINGBUF_LENGTH (buf);
  843. char *cbuf;
  844. SCM sbc = scm_i_make_string (len, &cbuf, 0);
  845. memcpy (cbuf, STRINGBUF_CHARS (buf), len);
  846. e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
  847. sbc);
  848. }
  849. else
  850. {
  851. size_t len = STRINGBUF_LENGTH (buf);
  852. scm_t_wchar *cbuf;
  853. SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
  854. u32_cpy ((uint32_t *) cbuf,
  855. (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len);
  856. e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
  857. sbc);
  858. }
  859. e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
  860. scm_from_size_t (STRINGBUF_LENGTH (buf)));
  861. if (STRINGBUF_MUTABLE (buf))
  862. e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
  863. SCM_BOOL_T);
  864. else
  865. e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
  866. SCM_BOOL_F);
  867. if (STRINGBUF_WIDE (buf))
  868. e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
  869. SCM_BOOL_T);
  870. else
  871. e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
  872. SCM_BOOL_F);
  873. return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
  874. }
  875. #undef FUNC_NAME
  876. #ifdef SCM_STRING_LENGTH_HISTOGRAM
  877. SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
  878. #define FUNC_NAME s_scm_sys_stringbuf_hist
  879. {
  880. int i;
  881. for (i = 0; i < 1000; i++)
  882. if (lenhist[i])
  883. fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
  884. fprintf (stderr, ">999: %u\n", lenhist[1000]);
  885. return SCM_UNSPECIFIED;
  886. }
  887. #undef FUNC_NAME
  888. #endif
  889. SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
  890. (SCM obj),
  891. "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
  892. #define FUNC_NAME s_scm_string_p
  893. {
  894. return scm_from_bool (IS_STRING (obj));
  895. }
  896. #undef FUNC_NAME
  897. SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
  898. SCM_DEFINE (scm_string, "string", 0, 0, 1,
  899. (SCM chrs),
  900. "@deffnx {Scheme Procedure} list->string chrs\n"
  901. "Return a newly allocated string composed of the arguments,\n"
  902. "@var{chrs}.")
  903. #define FUNC_NAME s_scm_string
  904. {
  905. SCM result = SCM_BOOL_F;
  906. SCM rest;
  907. size_t len;
  908. size_t p = 0;
  909. long i;
  910. int wide = 0;
  911. /* Verify that this is a list of chars. */
  912. i = scm_ilength (chrs);
  913. SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
  914. len = (size_t) i;
  915. rest = chrs;
  916. while (len > 0 && scm_is_pair (rest))
  917. {
  918. SCM elt = SCM_CAR (rest);
  919. SCM_VALIDATE_CHAR (SCM_ARGn, elt);
  920. if (SCM_CHAR (elt) > 0xFF)
  921. wide = 1;
  922. rest = SCM_CDR (rest);
  923. len--;
  924. scm_remember_upto_here_1 (elt);
  925. }
  926. /* Construct a string containing this list of chars. */
  927. len = (size_t) i;
  928. rest = chrs;
  929. if (wide == 0)
  930. {
  931. char *buf;
  932. result = scm_i_make_string (len, NULL, 0);
  933. buf = scm_i_string_writable_chars (result);
  934. while (len > 0 && scm_is_pair (rest))
  935. {
  936. SCM elt = SCM_CAR (rest);
  937. buf[p] = (unsigned char) SCM_CHAR (elt);
  938. p++;
  939. rest = SCM_CDR (rest);
  940. len--;
  941. scm_remember_upto_here_1 (elt);
  942. }
  943. }
  944. else
  945. {
  946. scm_t_wchar *buf;
  947. result = scm_i_make_wide_string (len, NULL, 0);
  948. buf = scm_i_string_writable_wide_chars (result);
  949. while (len > 0 && scm_is_pair (rest))
  950. {
  951. SCM elt = SCM_CAR (rest);
  952. buf[p] = SCM_CHAR (elt);
  953. p++;
  954. rest = SCM_CDR (rest);
  955. len--;
  956. scm_remember_upto_here_1 (elt);
  957. }
  958. }
  959. if (len > 0)
  960. scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
  961. if (!scm_is_null (rest))
  962. scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
  963. return result;
  964. }
  965. #undef FUNC_NAME
  966. SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
  967. (SCM k, SCM chr),
  968. "Return a newly allocated string of\n"
  969. "length @var{k}. If @var{chr} is given, then all elements of\n"
  970. "the string are initialized to @var{chr}, otherwise the contents\n"
  971. "of the string are all set to @code{#\nul}.")
  972. #define FUNC_NAME s_scm_make_string
  973. {
  974. SCM ret = scm_c_make_string (scm_to_size_t (k), chr);
  975. /* Given that make-string is mostly used by Scheme to prepare a
  976. mutable string buffer, let's go ahead and mark this as mutable to
  977. avoid a copy when this buffer is next written to. */
  978. STRINGBUF_SET_MUTABLE (STRING_STRINGBUF (ret));
  979. return ret;
  980. }
  981. #undef FUNC_NAME
  982. SCM
  983. scm_c_make_string (size_t len, SCM chr)
  984. #define FUNC_NAME NULL
  985. {
  986. size_t p;
  987. char *contents = NULL;
  988. SCM res = scm_i_make_string (len, &contents, 0);
  989. /* If no char is given, initialize string contents to NULL. */
  990. if (SCM_UNBNDP (chr))
  991. memset (contents, 0, len);
  992. else
  993. {
  994. SCM_VALIDATE_CHAR (0, chr);
  995. for (p = 0; p < len; p++)
  996. scm_i_string_set_x (res, p, SCM_CHAR (chr));
  997. }
  998. return res;
  999. }
  1000. #undef FUNC_NAME
  1001. SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
  1002. (SCM string),
  1003. "Return the number of characters in @var{string}.")
  1004. #define FUNC_NAME s_scm_string_length
  1005. {
  1006. SCM_VALIDATE_STRING (1, string);
  1007. return scm_from_size_t (STRING_LENGTH (string));
  1008. }
  1009. #undef FUNC_NAME
  1010. SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
  1011. (SCM string),
  1012. "Return the bytes used to represent a character in @var{string}."
  1013. "This will return 1 or 4.")
  1014. #define FUNC_NAME s_scm_string_bytes_per_char
  1015. {
  1016. SCM_VALIDATE_STRING (1, string);
  1017. if (!scm_i_is_narrow_string (string))
  1018. return scm_from_int (4);
  1019. return scm_from_int (1);
  1020. }
  1021. #undef FUNC_NAME
  1022. size_t
  1023. scm_c_string_length (SCM string)
  1024. {
  1025. if (!IS_STRING (string))
  1026. scm_wrong_type_arg_msg (NULL, 0, string, "string");
  1027. return STRING_LENGTH (string);
  1028. }
  1029. SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
  1030. (SCM str, SCM k),
  1031. "Return character @var{k} of @var{str} using zero-origin\n"
  1032. "indexing. @var{k} must be a valid index of @var{str}.")
  1033. #define FUNC_NAME s_scm_string_ref
  1034. {
  1035. size_t len;
  1036. unsigned long idx;
  1037. SCM_VALIDATE_STRING (1, str);
  1038. len = scm_i_string_length (str);
  1039. if (SCM_LIKELY (len > 0))
  1040. idx = scm_to_unsigned_integer (k, 0, len - 1);
  1041. else
  1042. scm_out_of_range (NULL, k);
  1043. if (scm_i_is_narrow_string (str))
  1044. return scm_c_make_char (scm_i_string_chars (str)[idx]);
  1045. else
  1046. return scm_c_make_char (scm_i_string_wide_chars (str)[idx]);
  1047. }
  1048. #undef FUNC_NAME
  1049. SCM
  1050. scm_c_string_ref (SCM str, size_t p)
  1051. {
  1052. if (p >= scm_i_string_length (str))
  1053. scm_out_of_range (NULL, scm_from_size_t (p));
  1054. if (scm_i_is_narrow_string (str))
  1055. return scm_c_make_char (scm_i_string_chars (str)[p]);
  1056. else
  1057. return scm_c_make_char (scm_i_string_wide_chars (str)[p]);
  1058. }
  1059. SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
  1060. (SCM str, SCM k, SCM chr),
  1061. "Store @var{chr} in element @var{k} of @var{str} and return\n"
  1062. "an unspecified value. @var{k} must be a valid index of\n"
  1063. "@var{str}.")
  1064. #define FUNC_NAME s_scm_string_set_x
  1065. {
  1066. size_t len;
  1067. unsigned long idx;
  1068. SCM_VALIDATE_STRING (1, str);
  1069. len = scm_i_string_length (str);
  1070. if (SCM_LIKELY (len > 0))
  1071. idx = scm_to_unsigned_integer (k, 0, len - 1);
  1072. else
  1073. scm_out_of_range (NULL, k);
  1074. SCM_VALIDATE_CHAR (3, chr);
  1075. str = scm_i_string_start_writing (str);
  1076. scm_i_string_set_x (str, idx, SCM_CHAR (chr));
  1077. scm_i_string_stop_writing ();
  1078. return SCM_UNSPECIFIED;
  1079. }
  1080. #undef FUNC_NAME
  1081. void
  1082. scm_c_string_set_x (SCM str, size_t p, SCM chr)
  1083. {
  1084. if (p >= scm_i_string_length (str))
  1085. scm_out_of_range (NULL, scm_from_size_t (p));
  1086. str = scm_i_string_start_writing (str);
  1087. scm_i_string_set_x (str, p, SCM_CHAR (chr));
  1088. scm_i_string_stop_writing ();
  1089. }
  1090. SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
  1091. (SCM str, SCM start, SCM end),
  1092. "Return a newly allocated string formed from the characters\n"
  1093. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  1094. "ending with index @var{end} (exclusive).\n"
  1095. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  1096. "exact integers satisfying:\n\n"
  1097. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  1098. #define FUNC_NAME s_scm_substring
  1099. {
  1100. size_t len, from, to;
  1101. SCM_VALIDATE_STRING (1, str);
  1102. len = scm_i_string_length (str);
  1103. from = scm_to_unsigned_integer (start, 0, len);
  1104. if (SCM_UNBNDP (end))
  1105. to = len;
  1106. else
  1107. to = scm_to_unsigned_integer (end, from, len);
  1108. return scm_i_substring (str, from, to);
  1109. }
  1110. #undef FUNC_NAME
  1111. SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
  1112. (SCM str, SCM start, SCM end),
  1113. "Return a newly allocated string formed from the characters\n"
  1114. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  1115. "ending with index @var{end} (exclusive).\n"
  1116. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  1117. "exact integers satisfying:\n"
  1118. "\n"
  1119. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
  1120. "\n"
  1121. "The returned string is read-only.\n")
  1122. #define FUNC_NAME s_scm_substring_read_only
  1123. {
  1124. size_t len, from, to;
  1125. SCM_VALIDATE_STRING (1, str);
  1126. len = scm_i_string_length (str);
  1127. from = scm_to_unsigned_integer (start, 0, len);
  1128. if (SCM_UNBNDP (end))
  1129. to = len;
  1130. else
  1131. to = scm_to_unsigned_integer (end, from, len);
  1132. return scm_i_substring_read_only (str, from, to);
  1133. }
  1134. #undef FUNC_NAME
  1135. SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
  1136. (SCM str, SCM start, SCM end),
  1137. "Return a newly allocated string formed from the characters\n"
  1138. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  1139. "ending with index @var{end} (exclusive).\n"
  1140. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  1141. "exact integers satisfying:\n\n"
  1142. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  1143. #define FUNC_NAME s_scm_substring_copy
  1144. {
  1145. /* For the Scheme version, START is mandatory, but for the C
  1146. version, it is optional. See scm_string_copy in srfi-13.c for a
  1147. rationale.
  1148. */
  1149. size_t from, to;
  1150. SCM_VALIDATE_STRING (1, str);
  1151. scm_i_get_substring_spec (scm_i_string_length (str),
  1152. start, &from, end, &to);
  1153. return scm_i_substring_copy (str, from, to);
  1154. }
  1155. #undef FUNC_NAME
  1156. SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
  1157. (SCM str, SCM start, SCM end),
  1158. "Return string that indirectly refers to the characters\n"
  1159. "of @var{str} beginning with index @var{start} (inclusive) and\n"
  1160. "ending with index @var{end} (exclusive).\n"
  1161. "@var{str} must be a string, @var{start} and @var{end} must be\n"
  1162. "exact integers satisfying:\n\n"
  1163. "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
  1164. #define FUNC_NAME s_scm_substring_shared
  1165. {
  1166. size_t len, from, to;
  1167. SCM_VALIDATE_STRING (1, str);
  1168. len = scm_i_string_length (str);
  1169. from = scm_to_unsigned_integer (start, 0, len);
  1170. if (SCM_UNBNDP (end))
  1171. to = len;
  1172. else
  1173. to = scm_to_unsigned_integer (end, from, len);
  1174. return scm_i_substring_shared (str, from, to);
  1175. }
  1176. #undef FUNC_NAME
  1177. SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
  1178. (SCM args),
  1179. "Return a newly allocated string whose characters form the\n"
  1180. "concatenation of the given strings, @var{args}.")
  1181. #define FUNC_NAME s_scm_string_append
  1182. {
  1183. SCM res;
  1184. size_t total = 0;
  1185. size_t len;
  1186. int wide = 0;
  1187. SCM l, s;
  1188. size_t i;
  1189. union
  1190. {
  1191. char *narrow;
  1192. scm_t_wchar *wide;
  1193. } data;
  1194. SCM_VALIDATE_REST_ARGUMENT (args);
  1195. for (l = args; !scm_is_null (l); l = SCM_CDR (l))
  1196. {
  1197. s = SCM_CAR (l);
  1198. SCM_VALIDATE_STRING (SCM_ARGn, s);
  1199. len = scm_i_string_length (s);
  1200. if (INT_ADD_OVERFLOW (total, len))
  1201. scm_num_overflow (FUNC_NAME);
  1202. total += len;
  1203. if (!scm_i_is_narrow_string (s))
  1204. wide = 1;
  1205. }
  1206. data.narrow = NULL;
  1207. if (!wide)
  1208. res = scm_i_make_string (total, &data.narrow, 0);
  1209. else
  1210. res = scm_i_make_wide_string (total, &data.wide, 0);
  1211. for (l = args; !scm_is_null (l); l = SCM_CDR (l))
  1212. {
  1213. size_t len;
  1214. s = SCM_CAR (l);
  1215. SCM_VALIDATE_STRING (SCM_ARGn, s);
  1216. len = scm_i_string_length (s);
  1217. if (len > total)
  1218. SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
  1219. if (!wide)
  1220. {
  1221. memcpy (data.narrow, scm_i_string_chars (s), len);
  1222. data.narrow += len;
  1223. }
  1224. else
  1225. {
  1226. if (scm_i_is_narrow_string (s))
  1227. {
  1228. const char *src = scm_i_string_chars (s);
  1229. for (i = 0; i < len; i++)
  1230. data.wide[i] = (unsigned char) src[i];
  1231. }
  1232. else
  1233. u32_cpy ((uint32_t *) data.wide,
  1234. (uint32_t *) scm_i_string_wide_chars (s), len);
  1235. data.wide += len;
  1236. }
  1237. total -= len;
  1238. scm_remember_upto_here_1 (s);
  1239. }
  1240. if (total != 0)
  1241. SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
  1242. return res;
  1243. }
  1244. #undef FUNC_NAME
  1245. /* Charset conversion error handling. */
  1246. SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
  1247. SCM_SYMBOL (scm_decoding_error_key, "decoding-error");
  1248. /* Raise an exception informing that character CHR could not be written
  1249. to PORT in its current encoding. */
  1250. void
  1251. scm_encoding_error (const char *subr, int err, const char *message,
  1252. SCM port, SCM chr)
  1253. {
  1254. scm_throw (scm_encoding_error_key,
  1255. scm_list_n (scm_from_latin1_string (subr),
  1256. scm_from_latin1_string (message),
  1257. scm_from_int (err),
  1258. port, chr,
  1259. SCM_UNDEFINED));
  1260. }
  1261. /* Raise an exception informing of an encoding error on PORT. This
  1262. means that a character could not be written in PORT's encoding. */
  1263. void
  1264. scm_decoding_error (const char *subr, int err, const char *message, SCM port)
  1265. {
  1266. scm_throw (scm_decoding_error_key,
  1267. scm_list_n (scm_from_latin1_string (subr),
  1268. scm_from_latin1_string (message),
  1269. scm_from_int (err),
  1270. port,
  1271. SCM_UNDEFINED));
  1272. }
  1273. /* String conversion to/from C. */
  1274. static void
  1275. decoding_error (const char *func_name, int errno_save,
  1276. const char *str, size_t len)
  1277. {
  1278. /* Raise an error and pass the raw C string as a bytevector to the `throw'
  1279. handler. */
  1280. SCM bv;
  1281. signed char *buf;
  1282. buf = scm_gc_malloc_pointerless (len, "bytevector");
  1283. memcpy (buf, str, len);
  1284. bv = scm_c_take_gc_bytevector (buf, len, SCM_BOOL_F);
  1285. scm_decoding_error (func_name, errno_save,
  1286. "input locale conversion error", bv);
  1287. }
  1288. SCM
  1289. scm_from_stringn (const char *str, size_t len, const char *encoding,
  1290. scm_t_string_failed_conversion_handler handler)
  1291. {
  1292. size_t u32len, i;
  1293. scm_t_wchar *u32;
  1294. int wide = 0;
  1295. SCM res;
  1296. /* The order of these checks is important. */
  1297. if (!str && len != 0)
  1298. scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
  1299. if (len == (size_t) -1)
  1300. len = strlen (str);
  1301. if (c_strcasecmp (encoding, "ISO-8859-1") == 0 || len == 0)
  1302. return scm_from_latin1_stringn (str, len);
  1303. else if (c_strcasecmp (encoding, "UTF-8") == 0
  1304. && handler == SCM_FAILED_CONVERSION_ERROR)
  1305. return scm_from_utf8_stringn (str, len);
  1306. u32len = 0;
  1307. u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
  1308. (enum iconv_ilseq_handler)
  1309. handler,
  1310. str, len,
  1311. NULL,
  1312. NULL, &u32len);
  1313. if (SCM_UNLIKELY (u32 == NULL))
  1314. decoding_error (__func__, errno, str, len);
  1315. i = 0;
  1316. while (i < u32len)
  1317. if (u32[i++] > 0xFF)
  1318. {
  1319. wide = 1;
  1320. break;
  1321. }
  1322. if (!wide)
  1323. {
  1324. char *dst;
  1325. res = scm_i_make_string (u32len, &dst, 0);
  1326. for (i = 0; i < u32len; i ++)
  1327. dst[i] = (unsigned char) u32[i];
  1328. dst[u32len] = '\0';
  1329. }
  1330. else
  1331. {
  1332. scm_t_wchar *wdst;
  1333. res = scm_i_make_wide_string (u32len, &wdst, 0);
  1334. u32_cpy ((uint32_t *) wdst, (uint32_t *) u32, u32len);
  1335. wdst[u32len] = 0;
  1336. }
  1337. free (u32);
  1338. return res;
  1339. }
  1340. SCM
  1341. scm_from_locale_string (const char *str)
  1342. {
  1343. return scm_from_locale_stringn (str, -1);
  1344. }
  1345. scm_t_string_failed_conversion_handler
  1346. scm_i_default_string_failed_conversion_handler (void)
  1347. {
  1348. return scm_i_string_failed_conversion_handler
  1349. (scm_i_default_port_conversion_strategy ());
  1350. }
  1351. SCM
  1352. scm_from_locale_stringn (const char *str, size_t len)
  1353. {
  1354. return scm_from_stringn (str, len, locale_charset (),
  1355. scm_i_default_string_failed_conversion_handler ());
  1356. }
  1357. SCM
  1358. scm_from_latin1_string (const char *str)
  1359. {
  1360. return scm_from_latin1_stringn (str, -1);
  1361. }
  1362. SCM
  1363. scm_from_latin1_stringn (const char *str, size_t len)
  1364. {
  1365. char *buf;
  1366. SCM result;
  1367. if (len == (size_t) -1)
  1368. len = strlen (str);
  1369. /* Make a narrow string and copy STR as is. */
  1370. result = scm_i_make_string (len, &buf, 0);
  1371. memcpy (buf, str, len);
  1372. return result;
  1373. }
  1374. SCM
  1375. scm_from_utf8_string (const char *str)
  1376. {
  1377. return scm_from_utf8_stringn (str, -1);
  1378. }
  1379. SCM
  1380. scm_from_utf8_stringn (const char *str, size_t len)
  1381. {
  1382. size_t i, char_len;
  1383. const uint8_t *ustr = (const uint8_t *) str;
  1384. int ascii = 1, narrow = 1;
  1385. SCM res;
  1386. if (len == (size_t) -1)
  1387. len = strlen (str);
  1388. i = 0;
  1389. char_len = 0;
  1390. while (i < len)
  1391. {
  1392. if (ustr[i] <= 127)
  1393. {
  1394. char_len++;
  1395. i++;
  1396. }
  1397. else
  1398. {
  1399. ucs4_t c;
  1400. int nbytes;
  1401. ascii = 0;
  1402. nbytes = u8_mbtoucr (&c, ustr + i, len - i);
  1403. if (nbytes < 0)
  1404. /* Bad UTF-8. */
  1405. decoding_error (__func__, errno, str, len);
  1406. if (c > 255)
  1407. narrow = 0;
  1408. char_len++;
  1409. i += nbytes;
  1410. }
  1411. }
  1412. if (ascii)
  1413. {
  1414. char *dst;
  1415. res = scm_i_make_string (char_len, &dst, 0);
  1416. memcpy (dst, str, len);
  1417. }
  1418. else if (narrow)
  1419. {
  1420. char *dst;
  1421. size_t j;
  1422. ucs4_t c;
  1423. res = scm_i_make_string (char_len, &dst, 0);
  1424. for (i = 0, j = 0; i < len; j++)
  1425. {
  1426. i += u8_mbtouc_unsafe (&c, ustr + i, len - i);
  1427. dst[j] = (signed char) c;
  1428. }
  1429. }
  1430. else
  1431. {
  1432. scm_t_wchar *dst;
  1433. size_t j;
  1434. ucs4_t c;
  1435. res = scm_i_make_wide_string (char_len, &dst, 0);
  1436. for (i = 0, j = 0; i < len; j++)
  1437. {
  1438. i += u8_mbtouc_unsafe (&c, ustr + i, len - i);
  1439. dst[j] = c;
  1440. }
  1441. }
  1442. return res;
  1443. }
  1444. SCM
  1445. scm_from_utf32_string (const scm_t_wchar *str)
  1446. {
  1447. return scm_from_utf32_stringn (str, -1);
  1448. }
  1449. SCM
  1450. scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
  1451. {
  1452. SCM result;
  1453. scm_t_wchar *buf;
  1454. if (len == (size_t) -1)
  1455. len = u32_strlen ((uint32_t *) str);
  1456. result = scm_i_make_wide_string (len, &buf, 0);
  1457. memcpy (buf, str, len * sizeof (scm_t_wchar));
  1458. scm_i_try_narrow_string (result);
  1459. return result;
  1460. }
  1461. SCM
  1462. scm_from_port_string (const char *str, SCM port)
  1463. {
  1464. return scm_from_port_stringn (str, -1, port);
  1465. }
  1466. SCM
  1467. scm_from_port_stringn (const char *str, size_t len, SCM port)
  1468. {
  1469. scm_t_port *pt = SCM_PORT (port);
  1470. if (scm_is_eq (pt->encoding, sym_ISO_8859_1))
  1471. return scm_from_latin1_stringn (str, len);
  1472. else if (scm_is_eq (pt->encoding, sym_UTF_8)
  1473. && (scm_is_eq (pt->conversion_strategy, sym_error)
  1474. || (u8_check ((uint8_t *) str, len) == NULL)))
  1475. return scm_from_utf8_stringn (str, len);
  1476. else
  1477. return scm_from_stringn (str, len, scm_i_symbol_chars (pt->encoding),
  1478. scm_i_string_failed_conversion_handler
  1479. (SCM_PORT (port)->conversion_strategy));
  1480. }
  1481. /* Create a new scheme string from the C string STR. The memory of
  1482. STR may be used directly as storage for the new string. */
  1483. /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
  1484. would be to register a finalizer to eventually free(3) STR, which isn't
  1485. worth it. Should we just deprecate the `scm_take_' functions? */
  1486. SCM
  1487. scm_take_locale_stringn (char *str, size_t len)
  1488. {
  1489. SCM res;
  1490. res = scm_from_locale_stringn (str, len);
  1491. free (str);
  1492. return res;
  1493. }
  1494. SCM
  1495. scm_take_locale_string (char *str)
  1496. {
  1497. return scm_take_locale_stringn (str, -1);
  1498. }
  1499. /* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
  1500. *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
  1501. Set *LENP to the size of the resulting string.
  1502. FIXME: This is a hack we should get rid of. See
  1503. <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
  1504. for details. */
  1505. static void
  1506. unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
  1507. {
  1508. char *before, *after;
  1509. size_t i, j;
  1510. before = buf;
  1511. after = buf;
  1512. i = 0;
  1513. j = 0;
  1514. while (i < *lenp)
  1515. {
  1516. if ((i <= *lenp - 6)
  1517. && before[i] == '\\'
  1518. && before[i + 1] == 'u'
  1519. && before[i + 2] == '0' && before[i + 3] == '0')
  1520. {
  1521. /* Convert \u00NN to \xNN */
  1522. after[j] = '\\';
  1523. after[j + 1] = 'x';
  1524. after[j + 2] = tolower ((int) before[i + 4]);
  1525. after[j + 3] = tolower ((int) before[i + 5]);
  1526. i += 6;
  1527. j += 4;
  1528. }
  1529. else if ((i <= *lenp - 10)
  1530. && before[i] == '\\'
  1531. && before[i + 1] == 'U'
  1532. && before[i + 2] == '0' && before[i + 3] == '0')
  1533. {
  1534. /* Convert \U00NNNNNN to \UNNNNNN */
  1535. after[j] = '\\';
  1536. after[j + 1] = 'U';
  1537. after[j + 2] = tolower ((int) before[i + 4]);
  1538. after[j + 3] = tolower ((int) before[i + 5]);
  1539. after[j + 4] = tolower ((int) before[i + 6]);
  1540. after[j + 5] = tolower ((int) before[i + 7]);
  1541. after[j + 6] = tolower ((int) before[i + 8]);
  1542. after[j + 7] = tolower ((int) before[i + 9]);
  1543. i += 10;
  1544. j += 8;
  1545. }
  1546. else
  1547. {
  1548. after[j] = before[i];
  1549. i++;
  1550. j++;
  1551. }
  1552. }
  1553. *lenp = j;
  1554. }
  1555. /* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
  1556. *LENP-byte locale-encoded string, to `\xXXXX;'. Set *LEN to the size
  1557. of the resulting string. BUF must be large enough to handle the
  1558. worst case when `\uXXXX' escapes (6 characters) are replaced by
  1559. `\xXXXX;' (7 characters). */
  1560. static void
  1561. unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
  1562. {
  1563. char *before, *after;
  1564. int malloc_p;
  1565. size_t i, j;
  1566. /* The worst case is if the input string contains all 4-digit hex escapes.
  1567. "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
  1568. size_t max_out_len = (*lenp * 7) / 6 + 1;
  1569. size_t nzeros, ndigits;
  1570. before = buf;
  1571. malloc_p = (max_out_len > SCM_MAX_ALLOCA);
  1572. after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
  1573. i = 0;
  1574. j = 0;
  1575. while (i < *lenp)
  1576. {
  1577. if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
  1578. || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
  1579. {
  1580. if (before[i + 1] == 'u')
  1581. ndigits = 4;
  1582. else if (before[i + 1] == 'U')
  1583. ndigits = 8;
  1584. else
  1585. abort ();
  1586. /* Add the R6RS hex escape initial sequence. */
  1587. after[j] = '\\';
  1588. after[j + 1] = 'x';
  1589. /* Move string positions to the start of the hex numbers. */
  1590. i += 2;
  1591. j += 2;
  1592. /* Find the number of initial zeros in this hex number. */
  1593. nzeros = 0;
  1594. while (before[i + nzeros] == '0' && nzeros < ndigits)
  1595. nzeros++;
  1596. /* Copy the number, skipping initial zeros, and then move the string
  1597. positions. */
  1598. if (nzeros == ndigits)
  1599. {
  1600. after[j] = '0';
  1601. i += ndigits;
  1602. j += 1;
  1603. }
  1604. else
  1605. {
  1606. int pos;
  1607. for (pos = 0; pos < ndigits - nzeros; pos++)
  1608. after[j + pos] = tolower ((int) before[i + nzeros + pos]);
  1609. i += ndigits;
  1610. j += (ndigits - nzeros);
  1611. }
  1612. /* Add terminating semicolon. */
  1613. after[j] = ';';
  1614. j++;
  1615. }
  1616. else
  1617. {
  1618. after[j] = before[i];
  1619. i++;
  1620. j++;
  1621. }
  1622. }
  1623. *lenp = j;
  1624. memcpy (before, after, j);
  1625. if (malloc_p)
  1626. free (after);
  1627. }
  1628. char *
  1629. scm_to_locale_string (SCM str)
  1630. {
  1631. return scm_to_locale_stringn (str, NULL);
  1632. }
  1633. char *
  1634. scm_to_locale_stringn (SCM str, size_t *lenp)
  1635. {
  1636. return scm_to_stringn (str, lenp,
  1637. locale_charset (),
  1638. scm_i_default_string_failed_conversion_handler ());
  1639. }
  1640. char *
  1641. scm_to_latin1_string (SCM str)
  1642. {
  1643. return scm_to_latin1_stringn (str, NULL);
  1644. }
  1645. char *
  1646. scm_to_latin1_stringn (SCM str, size_t *lenp)
  1647. #define FUNC_NAME "scm_to_latin1_stringn"
  1648. {
  1649. char *result;
  1650. SCM_VALIDATE_STRING (1, str);
  1651. if (scm_i_is_narrow_string (str))
  1652. {
  1653. size_t len = scm_i_string_length (str);
  1654. if (lenp)
  1655. *lenp = len;
  1656. result = scm_strndup (scm_i_string_data (str), len);
  1657. }
  1658. else
  1659. result = scm_to_stringn (str, lenp, NULL,
  1660. SCM_FAILED_CONVERSION_ERROR);
  1661. return result;
  1662. }
  1663. #undef FUNC_NAME
  1664. char *
  1665. scm_to_utf8_string (SCM str)
  1666. {
  1667. return scm_to_utf8_stringn (str, NULL);
  1668. }
  1669. static size_t
  1670. latin1_u8_strlen (const uint8_t *str, size_t len)
  1671. {
  1672. size_t ret, i;
  1673. for (i = 0, ret = 0; i < len; i++)
  1674. ret += (str[i] < 128) ? 1 : 2;
  1675. return ret;
  1676. }
  1677. static uint8_t*
  1678. latin1_to_u8 (const uint8_t *str, size_t latin_len,
  1679. uint8_t *u8_result, size_t *u8_lenp)
  1680. {
  1681. size_t i, n;
  1682. size_t u8_len = latin1_u8_strlen (str, latin_len);
  1683. if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
  1684. u8_result = scm_malloc (u8_len + 1);
  1685. if (u8_lenp)
  1686. *u8_lenp = u8_len;
  1687. for (i = 0, n = 0; i < latin_len; i++)
  1688. n += u8_uctomb (u8_result + n, str[i], u8_len - n);
  1689. if (n != u8_len)
  1690. abort ();
  1691. u8_result[n] = 0;
  1692. return u8_result;
  1693. }
  1694. /* UTF-8 code table
  1695. (Note that this includes code points that are not allowed by Unicode,
  1696. but since this function has no way to report an error, and its
  1697. purpose is to determine the size of destination buffers for
  1698. libunicode conversion functions, we err on the safe side and handle
  1699. everything that libunicode might conceivably handle, now or in the
  1700. future.)
  1701. Char. number range | UTF-8 octet sequence
  1702. (hexadecimal) | (binary)
  1703. --------------------+------------------------------------------------------
  1704. 0000 0000-0000 007F | 0xxxxxxx
  1705. 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
  1706. 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
  1707. 0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  1708. 0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  1709. 0400 0000-7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  1710. */
  1711. static size_t
  1712. u32_u8_length_in_bytes (const uint32_t *str, size_t len)
  1713. {
  1714. size_t ret, i;
  1715. for (i = 0, ret = 0; i < len; i++)
  1716. {
  1717. uint32_t c = str[i];
  1718. if (c <= 0x7f)
  1719. ret += 1;
  1720. else if (c <= 0x7ff)
  1721. ret += 2;
  1722. else if (c <= 0xffff)
  1723. ret += 3;
  1724. else if (c <= 0x1fffff)
  1725. ret += 4;
  1726. else if (c <= 0x3ffffff)
  1727. ret += 5;
  1728. else
  1729. ret += 6;
  1730. }
  1731. return ret;
  1732. }
  1733. static size_t
  1734. utf8_length (SCM str)
  1735. {
  1736. if (scm_i_is_narrow_string (str))
  1737. return latin1_u8_strlen ((uint8_t *) scm_i_string_chars (str),
  1738. scm_i_string_length (str));
  1739. else
  1740. return u32_u8_length_in_bytes
  1741. ((uint32_t *) scm_i_string_wide_chars (str),
  1742. scm_i_string_length (str));
  1743. }
  1744. size_t
  1745. scm_c_string_utf8_length (SCM string)
  1746. #define FUNC_NAME "scm_c_string_utf8_length"
  1747. {
  1748. SCM_VALIDATE_STRING (1, string);
  1749. return utf8_length (string);
  1750. }
  1751. #undef FUNC_NAME
  1752. SCM_DEFINE (scm_string_utf8_length, "string-utf8-length", 1, 0, 0,
  1753. (SCM string),
  1754. "Returns the number of bytes in the UTF-8 representation of "
  1755. "@var{string}.")
  1756. #define FUNC_NAME s_scm_string_utf8_length
  1757. {
  1758. SCM_VALIDATE_STRING (1, string);
  1759. return scm_from_size_t (utf8_length (string));
  1760. }
  1761. #undef FUNC_NAME
  1762. char *
  1763. scm_to_utf8_stringn (SCM str, size_t *lenp)
  1764. #define FUNC_NAME "scm_to_utf8_stringn"
  1765. {
  1766. SCM_VALIDATE_STRING (1, str);
  1767. if (scm_i_is_narrow_string (str))
  1768. return (char *) latin1_to_u8 ((uint8_t *) scm_i_string_chars (str),
  1769. scm_i_string_length (str),
  1770. NULL, lenp);
  1771. else
  1772. {
  1773. uint32_t *chars = (uint32_t *) scm_i_string_wide_chars (str);
  1774. uint8_t *buf, *ret;
  1775. size_t num_chars = scm_i_string_length (str);
  1776. size_t num_bytes_predicted, num_bytes_actual;
  1777. num_bytes_predicted = u32_u8_length_in_bytes (chars, num_chars);
  1778. if (lenp)
  1779. {
  1780. *lenp = num_bytes_predicted;
  1781. buf = scm_malloc (num_bytes_predicted);
  1782. }
  1783. else
  1784. {
  1785. buf = scm_malloc (num_bytes_predicted + 1);
  1786. buf[num_bytes_predicted] = 0;
  1787. }
  1788. num_bytes_actual = num_bytes_predicted;
  1789. ret = u32_to_u8 (chars, num_chars, buf, &num_bytes_actual);
  1790. if (SCM_LIKELY (ret == buf && num_bytes_actual == num_bytes_predicted))
  1791. return (char *) ret;
  1792. /* An error: a bad codepoint. */
  1793. {
  1794. int saved_errno = errno;
  1795. free (buf);
  1796. if (!saved_errno)
  1797. abort ();
  1798. scm_decoding_error ("scm_to_utf8_stringn", errno,
  1799. "invalid codepoint in string", str);
  1800. /* Not reached. */
  1801. return NULL;
  1802. }
  1803. }
  1804. }
  1805. #undef FUNC_NAME
  1806. scm_t_wchar *
  1807. scm_to_utf32_string (SCM str)
  1808. {
  1809. return scm_to_utf32_stringn (str, NULL);
  1810. }
  1811. scm_t_wchar *
  1812. scm_to_utf32_stringn (SCM str, size_t *lenp)
  1813. #define FUNC_NAME "scm_to_utf32_stringn"
  1814. {
  1815. scm_t_wchar *result;
  1816. SCM_VALIDATE_STRING (1, str);
  1817. if (scm_i_is_narrow_string (str))
  1818. {
  1819. uint8_t *codepoints;
  1820. size_t i, len;
  1821. codepoints = (uint8_t*) scm_i_string_chars (str);
  1822. len = scm_i_string_length (str);
  1823. if (lenp)
  1824. *lenp = len;
  1825. result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
  1826. for (i = 0; i < len; i++)
  1827. result[i] = codepoints[i];
  1828. result[len] = 0;
  1829. }
  1830. else
  1831. {
  1832. size_t len;
  1833. len = scm_i_string_length (str);
  1834. if (lenp)
  1835. *lenp = len;
  1836. result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
  1837. memcpy (result, scm_i_string_wide_chars (str),
  1838. len * sizeof (scm_t_wchar));
  1839. result[len] = 0;
  1840. }
  1841. return result;
  1842. }
  1843. #undef FUNC_NAME
  1844. char *
  1845. scm_to_port_string (SCM str, SCM port)
  1846. {
  1847. return scm_to_port_stringn (str, NULL, port);
  1848. }
  1849. char *
  1850. scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
  1851. {
  1852. scm_t_port *pt = SCM_PORT (port);
  1853. if (scm_is_eq (pt->encoding, sym_ISO_8859_1)
  1854. && scm_is_eq (pt->conversion_strategy, sym_error))
  1855. return scm_to_latin1_stringn (str, lenp);
  1856. else if (scm_is_eq (pt->encoding, sym_UTF_8))
  1857. return scm_to_utf8_stringn (str, lenp);
  1858. else
  1859. return scm_to_stringn (str, lenp, scm_i_symbol_chars (pt->encoding),
  1860. scm_i_string_failed_conversion_handler
  1861. (SCM_PORT (port)->conversion_strategy));
  1862. }
  1863. /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
  1864. according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
  1865. the returned buffer. If the conversion to ENCODING fails, apply the strategy
  1866. defined by HANDLER. */
  1867. char *
  1868. scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
  1869. scm_t_string_failed_conversion_handler handler)
  1870. {
  1871. char *buf;
  1872. size_t ilen, len, i;
  1873. int ret;
  1874. if (!scm_is_string (str))
  1875. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  1876. if (encoding == NULL)
  1877. encoding = "ISO-8859-1";
  1878. if (c_strcasecmp (encoding, "UTF-8") == 0)
  1879. /* This is the most common case--e.g., when calling libc bindings
  1880. while using a UTF-8 locale. */
  1881. return scm_to_utf8_stringn (str, lenp);
  1882. ilen = scm_i_string_length (str);
  1883. if (ilen == 0)
  1884. {
  1885. buf = scm_malloc (1);
  1886. buf[0] = '\0';
  1887. if (lenp)
  1888. *lenp = 0;
  1889. return buf;
  1890. }
  1891. if (lenp == NULL)
  1892. for (i = 0; i < ilen; i++)
  1893. if (scm_i_string_ref (str, i) == '\0')
  1894. scm_misc_error (NULL,
  1895. "string contains #\\nul character: ~S",
  1896. scm_list_1 (str));
  1897. if (scm_i_is_narrow_string (str)
  1898. && c_strcasecmp (encoding, "ISO-8859-1") == 0)
  1899. {
  1900. /* If using native Latin-1 encoding, just copy the string
  1901. contents. */
  1902. if (lenp)
  1903. {
  1904. buf = scm_malloc (ilen);
  1905. memcpy (buf, scm_i_string_chars (str), ilen);
  1906. *lenp = ilen;
  1907. return buf;
  1908. }
  1909. else
  1910. {
  1911. buf = scm_malloc (ilen + 1);
  1912. memcpy (buf, scm_i_string_chars (str), ilen);
  1913. buf[ilen] = '\0';
  1914. return buf;
  1915. }
  1916. }
  1917. buf = NULL;
  1918. len = 0;
  1919. if (scm_i_is_narrow_string (str))
  1920. {
  1921. ret = mem_iconveh (scm_i_string_chars (str), ilen,
  1922. "ISO-8859-1", encoding,
  1923. (enum iconv_ilseq_handler) handler, NULL,
  1924. &buf, &len);
  1925. if (ret != 0)
  1926. scm_encoding_error (__func__, errno,
  1927. "cannot convert narrow string to output locale",
  1928. SCM_BOOL_F,
  1929. /* FIXME: Faulty character unknown. */
  1930. SCM_BOOL_F);
  1931. }
  1932. else
  1933. {
  1934. buf = u32_conv_to_encoding (encoding,
  1935. (enum iconv_ilseq_handler) handler,
  1936. (uint32_t *) scm_i_string_wide_chars (str),
  1937. ilen,
  1938. NULL,
  1939. NULL, &len);
  1940. if (buf == NULL)
  1941. scm_encoding_error (__func__, errno,
  1942. "cannot convert wide string to output locale",
  1943. SCM_BOOL_F,
  1944. /* FIXME: Faulty character unknown. */
  1945. SCM_BOOL_F);
  1946. }
  1947. if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
  1948. {
  1949. if (SCM_R6RS_ESCAPES_P)
  1950. {
  1951. /* The worst case is if the input string contains all 4-digit
  1952. hex escapes. "\uXXXX" (six characters) becomes "\xXXXX;"
  1953. (seven characters). Make BUF large enough to hold
  1954. that. */
  1955. buf = scm_realloc (buf, (len * 7) / 6 + 1);
  1956. unistring_escapes_to_r6rs_escapes (buf, &len);
  1957. }
  1958. else
  1959. unistring_escapes_to_guile_escapes (buf, &len);
  1960. buf = scm_realloc (buf, len);
  1961. }
  1962. if (lenp)
  1963. *lenp = len;
  1964. else
  1965. {
  1966. buf = scm_realloc (buf, len + 1);
  1967. buf[len] = '\0';
  1968. }
  1969. scm_remember_upto_here_1 (str);
  1970. return buf;
  1971. }
  1972. size_t
  1973. scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
  1974. {
  1975. size_t len, copy_len;
  1976. char *result = NULL;
  1977. if (!scm_is_string (str))
  1978. scm_wrong_type_arg_msg (NULL, 0, str, "string");
  1979. result = scm_to_locale_stringn (str, &len);
  1980. copy_len = (len > max_len) ? max_len : len;
  1981. if (copy_len != 0)
  1982. /* Some users of 'scm_to_locale_stringbuf' may pass NULL for buf
  1983. when max_len is zero, and yet we must avoid passing NULL to
  1984. memcpy to avoid undefined behavior. */
  1985. memcpy (buf, result, copy_len);
  1986. free (result);
  1987. scm_remember_upto_here_1 (str);
  1988. return len;
  1989. }
  1990. /* Unicode string normalization. */
  1991. /* This function is a partial clone of SCM_STRING_TO_U32_BUF from
  1992. libguile/i18n.c. It would be useful to have this factored out into a more
  1993. convenient location, but its use of alloca makes that tricky to do. */
  1994. static SCM
  1995. normalize_str (SCM string, uninorm_t form)
  1996. {
  1997. SCM ret;
  1998. uint32_t *w_str;
  1999. uint32_t *w_norm_str;
  2000. scm_t_wchar *cbuf;
  2001. int malloc_p;
  2002. size_t norm_len, len = scm_i_string_length (string);
  2003. if (scm_i_is_narrow_string (string))
  2004. {
  2005. size_t i, bytes;
  2006. const char *buf = scm_i_string_chars (string);
  2007. bytes = (len + 1) * sizeof (scm_t_wchar);
  2008. malloc_p = (bytes > SCM_MAX_ALLOCA);
  2009. w_str = malloc_p ? malloc (bytes) : alloca (bytes);
  2010. for (i = 0; i < len; i ++)
  2011. w_str[i] = (unsigned char) buf[i];
  2012. w_str[len] = 0;
  2013. }
  2014. else
  2015. {
  2016. malloc_p = 0;
  2017. w_str = (uint32_t *) scm_i_string_wide_chars (string);
  2018. }
  2019. w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
  2020. ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
  2021. u32_cpy ((uint32_t *) cbuf, w_norm_str, norm_len);
  2022. free (w_norm_str);
  2023. if (malloc_p)
  2024. free (w_str);
  2025. scm_i_try_narrow_string (ret);
  2026. return ret;
  2027. }
  2028. SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
  2029. (SCM string),
  2030. "Returns the NFC normalized form of @var{string}.")
  2031. #define FUNC_NAME s_scm_string_normalize_nfc
  2032. {
  2033. SCM_VALIDATE_STRING (1, string);
  2034. return normalize_str (string, UNINORM_NFC);
  2035. }
  2036. #undef FUNC_NAME
  2037. SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
  2038. (SCM string),
  2039. "Returns the NFD normalized form of @var{string}.")
  2040. #define FUNC_NAME s_scm_string_normalize_nfd
  2041. {
  2042. SCM_VALIDATE_STRING (1, string);
  2043. return normalize_str (string, UNINORM_NFD);
  2044. }
  2045. #undef FUNC_NAME
  2046. SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
  2047. (SCM string),
  2048. "Returns the NFKC normalized form of @var{string}.")
  2049. #define FUNC_NAME s_scm_string_normalize_nfkc
  2050. {
  2051. SCM_VALIDATE_STRING (1, string);
  2052. return normalize_str (string, UNINORM_NFKC);
  2053. }
  2054. #undef FUNC_NAME
  2055. SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
  2056. (SCM string),
  2057. "Returns the NFKD normalized form of @var{string}.")
  2058. #define FUNC_NAME s_scm_string_normalize_nfkd
  2059. {
  2060. SCM_VALIDATE_STRING (1, string);
  2061. return normalize_str (string, UNINORM_NFKD);
  2062. }
  2063. #undef FUNC_NAME
  2064. /* converts C scm_array of strings to SCM scm_list of strings.
  2065. If argc < 0, a null terminated scm_array is assumed.
  2066. The current locale encoding is assumed */
  2067. SCM
  2068. scm_makfromstrs (int argc, char **argv)
  2069. {
  2070. int i = argc;
  2071. SCM lst = SCM_EOL;
  2072. if (0 > i)
  2073. for (i = 0; argv[i]; i++);
  2074. while (i--)
  2075. lst = scm_cons (scm_from_locale_string (argv[i]), lst);
  2076. return lst;
  2077. }
  2078. /* Return a newly allocated array of char pointers to each of the strings
  2079. in args, with a terminating NULL pointer. The strings are encoded using
  2080. the current locale. */
  2081. char **
  2082. scm_i_allocate_string_pointers (SCM list)
  2083. #define FUNC_NAME "scm_i_allocate_string_pointers"
  2084. {
  2085. char **result;
  2086. int list_len = scm_ilength (list);
  2087. int i;
  2088. if (list_len < 0)
  2089. scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
  2090. result = scm_gc_malloc ((list_len + 1) * sizeof (char *),
  2091. "string pointers");
  2092. result[list_len] = NULL;
  2093. /* The list might have been modified in another thread, so
  2094. we check LIST before each access.
  2095. */
  2096. for (i = 0; i < list_len && scm_is_pair (list); i++)
  2097. {
  2098. SCM str = SCM_CAR (list);
  2099. size_t len; /* String length in bytes */
  2100. char *c_str = scm_to_locale_stringn (str, &len);
  2101. /* OPTIMIZE-ME: Right now, scm_to_locale_stringn always uses
  2102. scm_malloc to allocate the returned string, which must be
  2103. explicitly deallocated. This forces us to copy the string a
  2104. second time into a new buffer. Ideally there would be variants
  2105. of scm_to_*_stringn that can return garbage-collected buffers. */
  2106. result[i] = scm_gc_malloc_pointerless (len + 1, "string");
  2107. memcpy (result[i], c_str, len);
  2108. result[i][len] = '\0';
  2109. free (c_str);
  2110. list = SCM_CDR (list);
  2111. }
  2112. return result;
  2113. }
  2114. #undef FUNC_NAME
  2115. void
  2116. scm_i_get_substring_spec (size_t len,
  2117. SCM start, size_t *cstart,
  2118. SCM end, size_t *cend)
  2119. {
  2120. if (SCM_UNBNDP (start))
  2121. *cstart = 0;
  2122. else
  2123. *cstart = scm_to_unsigned_integer (start, 0, len);
  2124. if (SCM_UNBNDP (end))
  2125. *cend = len;
  2126. else
  2127. *cend = scm_to_unsigned_integer (end, *cstart, len);
  2128. }
  2129. SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
  2130. void
  2131. scm_init_strings ()
  2132. {
  2133. scm_nullstr = scm_i_make_string (0, NULL, 0);
  2134. #include "strings.x"
  2135. }