srfi-14.c 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112
  1. /* srfi-14.c --- SRFI-14 procedures for Guile
  2. *
  3. * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public License
  7. * as published by the Free Software Foundation; either version 3 of
  8. * the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18. * 02110-1301 USA
  19. */
  20. #ifdef HAVE_CONFIG_H
  21. # include <config.h>
  22. #endif
  23. #include <string.h>
  24. #include <unictype.h>
  25. #include "libguile.h"
  26. #include "libguile/srfi-14.h"
  27. #include "libguile/strings.h"
  28. #include "libguile/chars.h"
  29. /* Include the pre-computed standard charset data. */
  30. #include "libguile/srfi-14.i.c"
  31. scm_t_char_range cs_full_ranges[] = {
  32. {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
  33. ,
  34. {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
  35. };
  36. scm_t_char_set cs_full = {
  37. 2,
  38. cs_full_ranges
  39. };
  40. #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
  41. #define SCM_CHARSET_SET(cs, idx) \
  42. scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
  43. #define SCM_CHARSET_UNSET(cs, idx) \
  44. scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
  45. /* Smob type code for character sets. */
  46. int scm_tc16_charset = 0;
  47. int scm_tc16_charset_cursor = 0;
  48. /* True if N exists in charset CS. */
  49. int
  50. scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
  51. {
  52. size_t i;
  53. i = 0;
  54. while (i < cs->len)
  55. {
  56. if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
  57. return 1;
  58. i++;
  59. }
  60. return 0;
  61. }
  62. /* Put N into charset CS. */
  63. void
  64. scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
  65. {
  66. size_t i;
  67. size_t len;
  68. len = cs->len;
  69. i = 0;
  70. while (i < len)
  71. {
  72. /* Already in this range */
  73. if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
  74. {
  75. return;
  76. }
  77. if (n == cs->ranges[i].lo - 1)
  78. {
  79. /* This char is one below the current range. */
  80. if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
  81. {
  82. /* It is also one above the previous range. */
  83. /* This is an impossible condition: in the previous
  84. iteration, the test for 'one above the current range'
  85. should already have inserted the character here. */
  86. abort ();
  87. }
  88. else
  89. {
  90. /* Expand the range down by one. */
  91. cs->ranges[i].lo = n;
  92. return;
  93. }
  94. }
  95. else if (n == cs->ranges[i].hi + 1)
  96. {
  97. /* This char is one above the current range. */
  98. if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
  99. {
  100. /* It is also one below the next range, so combine them. */
  101. cs->ranges[i].hi = cs->ranges[i + 1].hi;
  102. if (i < len - 2)
  103. memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
  104. sizeof (scm_t_char_range) * (len - i - 2));
  105. cs->ranges = scm_gc_realloc (cs->ranges,
  106. sizeof (scm_t_char_range) * len,
  107. sizeof (scm_t_char_range) * (len -
  108. 1),
  109. "character-set");
  110. cs->len = len - 1;
  111. return;
  112. }
  113. else
  114. {
  115. /* Expand the range up by one. */
  116. cs->ranges[i].hi = n;
  117. return;
  118. }
  119. }
  120. else if (n < cs->ranges[i].lo - 1)
  121. {
  122. /* This is a new range below the current one. */
  123. cs->ranges = scm_gc_realloc (cs->ranges,
  124. sizeof (scm_t_char_range) * len,
  125. sizeof (scm_t_char_range) * (len + 1),
  126. "character-set");
  127. memmove (cs->ranges + (i + 1), cs->ranges + i,
  128. sizeof (scm_t_char_range) * (len - i));
  129. cs->ranges[i].lo = n;
  130. cs->ranges[i].hi = n;
  131. cs->len = len + 1;
  132. return;
  133. }
  134. i++;
  135. }
  136. /* This is a new range above all previous ranges. */
  137. if (len == 0)
  138. {
  139. cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
  140. }
  141. else
  142. {
  143. cs->ranges = scm_gc_realloc (cs->ranges,
  144. sizeof (scm_t_char_range) * len,
  145. sizeof (scm_t_char_range) * (len + 1),
  146. "character-set");
  147. }
  148. cs->ranges[len].lo = n;
  149. cs->ranges[len].hi = n;
  150. cs->len = len + 1;
  151. return;
  152. }
  153. /* Put LO to HI inclusive into charset CS. */
  154. static void
  155. scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
  156. {
  157. size_t i;
  158. i = 0;
  159. while (i < cs->len)
  160. {
  161. /* Already in this range */
  162. if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
  163. return;
  164. /* cur: +---+
  165. new: +---+
  166. */
  167. if (cs->ranges[i].lo - 1 > hi)
  168. {
  169. /* Add a new range below the current one. */
  170. cs->ranges = scm_gc_realloc (cs->ranges,
  171. sizeof (scm_t_char_range) * cs->len,
  172. sizeof (scm_t_char_range) * (cs->len + 1),
  173. "character-set");
  174. memmove (cs->ranges + (i + 1), cs->ranges + i,
  175. sizeof (scm_t_char_range) * (cs->len - i));
  176. cs->ranges[i].lo = lo;
  177. cs->ranges[i].hi = hi;
  178. cs->len += 1;
  179. return;
  180. }
  181. /* cur: +---+ or +---+ or +---+
  182. new: +---+ +---+ +---+
  183. */
  184. if (cs->ranges[i].lo > lo
  185. && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
  186. {
  187. cs->ranges[i].lo = lo;
  188. return;
  189. }
  190. /* cur: +---+ or +---+ or +---+
  191. new: +---+ +---+ +---+
  192. */
  193. else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
  194. {
  195. if (cs->ranges[i].lo > lo)
  196. cs->ranges[i].lo = lo;
  197. if (cs->ranges[i].hi < hi)
  198. cs->ranges[i].hi = hi;
  199. while (i < cs->len - 1)
  200. {
  201. /* cur: --+ +---+
  202. new: -----+
  203. */
  204. if (cs->ranges[i + 1].lo - 1 > hi)
  205. break;
  206. /* cur: --+ +---+ or --+ +---+ or --+ +--+
  207. new: -----+ ------+ ---------+
  208. */
  209. /* Combine this range with the previous one. */
  210. if (cs->ranges[i + 1].hi > hi)
  211. cs->ranges[i].hi = cs->ranges[i + 1].hi;
  212. if (i + 1 < cs->len)
  213. memmove (cs->ranges + i + 1, cs->ranges + i + 2,
  214. sizeof (scm_t_char_range) * (cs->len - i - 2));
  215. cs->ranges = scm_gc_realloc (cs->ranges,
  216. sizeof (scm_t_char_range) * cs->len,
  217. sizeof (scm_t_char_range) * (cs->len - 1),
  218. "character-set");
  219. cs->len -= 1;
  220. }
  221. return;
  222. }
  223. i ++;
  224. }
  225. /* This is a new range above all previous ranges. */
  226. if (cs->len == 0)
  227. {
  228. cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
  229. }
  230. else
  231. {
  232. cs->ranges = scm_gc_realloc (cs->ranges,
  233. sizeof (scm_t_char_range) * cs->len,
  234. sizeof (scm_t_char_range) * (cs->len + 1),
  235. "character-set");
  236. }
  237. cs->len += 1;
  238. cs->ranges[cs->len - 1].lo = lo;
  239. cs->ranges[cs->len - 1].hi = hi;
  240. return;
  241. }
  242. /* If N is in charset CS, remove it. */
  243. void
  244. scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
  245. {
  246. size_t i;
  247. size_t len;
  248. len = cs->len;
  249. i = 0;
  250. while (i < len)
  251. {
  252. if (n < cs->ranges[i].lo)
  253. /* Not in this set. */
  254. return;
  255. if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
  256. {
  257. /* Remove this one-character range. */
  258. if (len == 1)
  259. {
  260. scm_gc_free (cs->ranges,
  261. sizeof (scm_t_char_range) * cs->len,
  262. "character-set");
  263. cs->ranges = NULL;
  264. cs->len = 0;
  265. return;
  266. }
  267. else if (i < len - 1)
  268. {
  269. memmove (cs->ranges + i, cs->ranges + (i + 1),
  270. sizeof (scm_t_char_range) * (len - i - 1));
  271. cs->ranges = scm_gc_realloc (cs->ranges,
  272. sizeof (scm_t_char_range) * len,
  273. sizeof (scm_t_char_range) * (len -
  274. 1),
  275. "character-set");
  276. cs->len = len - 1;
  277. return;
  278. }
  279. else if (i == len - 1)
  280. {
  281. cs->ranges = scm_gc_realloc (cs->ranges,
  282. sizeof (scm_t_char_range) * len,
  283. sizeof (scm_t_char_range) * (len -
  284. 1),
  285. "character-set");
  286. cs->len = len - 1;
  287. return;
  288. }
  289. }
  290. else if (n == cs->ranges[i].lo)
  291. {
  292. /* Shrink this range from the left. */
  293. cs->ranges[i].lo = n + 1;
  294. return;
  295. }
  296. else if (n == cs->ranges[i].hi)
  297. {
  298. /* Shrink this range from the right. */
  299. cs->ranges[i].hi = n - 1;
  300. return;
  301. }
  302. else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
  303. {
  304. /* Split this range into two pieces. */
  305. cs->ranges = scm_gc_realloc (cs->ranges,
  306. sizeof (scm_t_char_range) * len,
  307. sizeof (scm_t_char_range) * (len + 1),
  308. "character-set");
  309. if (i < len - 1)
  310. memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
  311. sizeof (scm_t_char_range) * (len - i - 1));
  312. cs->ranges[i + 1].hi = cs->ranges[i].hi;
  313. cs->ranges[i + 1].lo = n + 1;
  314. cs->ranges[i].hi = n - 1;
  315. cs->len = len + 1;
  316. return;
  317. }
  318. i++;
  319. }
  320. /* This value is above all ranges, so do nothing here. */
  321. return;
  322. }
  323. static int
  324. charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
  325. {
  326. if (a->len != b->len)
  327. return 0;
  328. if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
  329. return 0;
  330. return 1;
  331. }
  332. /* Return true if every character in A is also in B. */
  333. static int
  334. charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
  335. {
  336. size_t i = 0, j = 0;
  337. scm_t_wchar alo, ahi;
  338. if (a->len == 0)
  339. return 1;
  340. if (b->len == 0)
  341. return 0;
  342. while (i < a->len)
  343. {
  344. alo = a->ranges[i].lo;
  345. ahi = a->ranges[i].hi;
  346. while (b->ranges[j].hi < alo)
  347. {
  348. if (j < b->len - 1)
  349. j++;
  350. else
  351. return 0;
  352. }
  353. if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
  354. return 0;
  355. i++;
  356. }
  357. return 1;
  358. }
  359. /* Merge B into A. */
  360. static void
  361. charsets_union (scm_t_char_set *a, scm_t_char_set *b)
  362. {
  363. size_t i = 0;
  364. scm_t_wchar blo, bhi;
  365. if (b->len == 0)
  366. return;
  367. if (a->len == 0)
  368. {
  369. a->len = b->len;
  370. a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
  371. "character-set");
  372. memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
  373. return;
  374. }
  375. while (i < b->len)
  376. {
  377. blo = b->ranges[i].lo;
  378. bhi = b->ranges[i].hi;
  379. scm_i_charset_set_range (a, blo, bhi);
  380. i++;
  381. }
  382. return;
  383. }
  384. /* Remove elements not both in A and B from A. */
  385. static void
  386. charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
  387. {
  388. size_t i = 0;
  389. scm_t_wchar blo, bhi, n;
  390. scm_t_char_set *c;
  391. if (a->len == 0)
  392. return;
  393. if (b->len == 0)
  394. {
  395. scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
  396. "character-set");
  397. a->len = 0;
  398. return;
  399. }
  400. c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
  401. c->len = 0;
  402. c->ranges = NULL;
  403. while (i < b->len)
  404. {
  405. blo = b->ranges[i].lo;
  406. bhi = b->ranges[i].hi;
  407. for (n = blo; n <= bhi; n++)
  408. if (scm_i_charset_get (a, n))
  409. scm_i_charset_set (c, n);
  410. i++;
  411. }
  412. scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
  413. "character-set");
  414. a->len = c->len;
  415. if (c->len != 0)
  416. a->ranges = c->ranges;
  417. else
  418. a->ranges = NULL;
  419. free (c);
  420. return;
  421. }
  422. #define SCM_ADD_RANGE(low, high) \
  423. do { \
  424. p->ranges[idx].lo = (low); \
  425. p->ranges[idx++].hi = (high); \
  426. } while (0)
  427. #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
  428. do { \
  429. p->ranges[idx].lo = (low); \
  430. p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
  431. p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
  432. p->ranges[idx++].hi = (high); \
  433. } while (0)
  434. /* Make P the compelement of Q. */
  435. static void
  436. charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
  437. {
  438. int k, idx;
  439. idx = 0;
  440. if (q->len == 0)
  441. {
  442. /* Fill with all valid codepoints. */
  443. p->len = 2;
  444. p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
  445. "character-set");
  446. SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
  447. return;
  448. }
  449. if (p->len > 0)
  450. scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
  451. "character-set");
  452. /* Count the number of ranges needed for the output. */
  453. p->len = 0;
  454. if (q->ranges[0].lo > 0)
  455. p->len++;
  456. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
  457. p->len++;
  458. p->len += q->len;
  459. p->ranges =
  460. (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
  461. "character-set");
  462. if (q->ranges[0].lo > 0)
  463. {
  464. if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
  465. SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
  466. else
  467. SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
  468. }
  469. for (k = 1; k < q->len; k++)
  470. {
  471. if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
  472. && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
  473. SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
  474. else
  475. SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
  476. }
  477. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
  478. {
  479. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
  480. SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
  481. else
  482. SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
  483. }
  484. return;
  485. }
  486. #undef SCM_ADD_RANGE
  487. #undef SCM_ADD_RANGE_SKIP_SURROGATES
  488. /* Replace A with elements only found in one of A or B. */
  489. static void
  490. charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
  491. {
  492. size_t i = 0;
  493. scm_t_wchar blo, bhi, n;
  494. if (b->len == 0)
  495. {
  496. return;
  497. }
  498. if (a->len == 0)
  499. {
  500. a->ranges =
  501. (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
  502. b->len, "character-set");
  503. a->len = b->len;
  504. memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
  505. return;
  506. }
  507. while (i < b->len)
  508. {
  509. blo = b->ranges[i].lo;
  510. bhi = b->ranges[i].hi;
  511. for (n = blo; n <= bhi; n++)
  512. {
  513. if (scm_i_charset_get (a, n))
  514. scm_i_charset_unset (a, n);
  515. else
  516. scm_i_charset_set (a, n);
  517. }
  518. i++;
  519. }
  520. return;
  521. }
  522. /* Smob print hook for character sets. */
  523. static int
  524. charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
  525. {
  526. size_t i;
  527. int first = 1;
  528. scm_t_char_set *p;
  529. const size_t max_ranges_to_print = 50;
  530. p = SCM_CHARSET_DATA (charset);
  531. scm_puts_unlocked ("#<charset {", port);
  532. for (i = 0; i < p->len; i++)
  533. {
  534. if (first)
  535. first = 0;
  536. else
  537. scm_puts_unlocked (" ", port);
  538. scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
  539. if (p->ranges[i].lo != p->ranges[i].hi)
  540. {
  541. scm_puts_unlocked ("..", port);
  542. scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
  543. }
  544. if (i >= max_ranges_to_print)
  545. {
  546. /* Too many to print here. Quit early. */
  547. scm_puts_unlocked (" ...", port);
  548. break;
  549. }
  550. }
  551. scm_puts_unlocked ("}>", port);
  552. return 1;
  553. }
  554. /* Smob print hook for character sets cursors. */
  555. static int
  556. charset_cursor_print (SCM cursor, SCM port,
  557. scm_print_state *pstate SCM_UNUSED)
  558. {
  559. scm_t_char_set_cursor *cur;
  560. cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  561. scm_puts_unlocked ("#<charset-cursor ", port);
  562. if (cur->range == (size_t) (-1))
  563. scm_puts_unlocked ("(empty)", port);
  564. else
  565. {
  566. scm_write (scm_from_size_t (cur->range), port);
  567. scm_puts_unlocked (":", port);
  568. scm_write (scm_from_int32 (cur->n), port);
  569. }
  570. scm_puts_unlocked (">", port);
  571. return 1;
  572. }
  573. /* Create a new, empty character set. */
  574. static SCM
  575. make_char_set (const char *func_name)
  576. {
  577. scm_t_char_set *p;
  578. p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
  579. memset (p, 0, sizeof (scm_t_char_set));
  580. SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
  581. }
  582. SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
  583. (SCM obj),
  584. "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
  585. "otherwise.")
  586. #define FUNC_NAME s_scm_char_set_p
  587. {
  588. return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
  589. }
  590. #undef FUNC_NAME
  591. SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
  592. (SCM char_sets),
  593. "Return @code{#t} if all given character sets are equal.")
  594. #define FUNC_NAME s_scm_char_set_eq
  595. {
  596. int argnum = 1;
  597. scm_t_char_set *cs1_data = NULL;
  598. SCM_VALIDATE_REST_ARGUMENT (char_sets);
  599. while (!scm_is_null (char_sets))
  600. {
  601. SCM csi = SCM_CAR (char_sets);
  602. scm_t_char_set *csi_data;
  603. SCM_VALIDATE_SMOB (argnum, csi, charset);
  604. argnum++;
  605. csi_data = SCM_CHARSET_DATA (csi);
  606. if (cs1_data == NULL)
  607. cs1_data = csi_data;
  608. else if (!charsets_equal (cs1_data, csi_data))
  609. return SCM_BOOL_F;
  610. char_sets = SCM_CDR (char_sets);
  611. }
  612. return SCM_BOOL_T;
  613. }
  614. #undef FUNC_NAME
  615. SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
  616. (SCM char_sets),
  617. "Return @code{#t} if every character set @var{char_set}i is a subset\n"
  618. "of character set @var{char_set}i+1.")
  619. #define FUNC_NAME s_scm_char_set_leq
  620. {
  621. int argnum = 1;
  622. scm_t_char_set *prev_data = NULL;
  623. SCM_VALIDATE_REST_ARGUMENT (char_sets);
  624. while (!scm_is_null (char_sets))
  625. {
  626. SCM csi = SCM_CAR (char_sets);
  627. scm_t_char_set *csi_data;
  628. SCM_VALIDATE_SMOB (argnum, csi, charset);
  629. argnum++;
  630. csi_data = SCM_CHARSET_DATA (csi);
  631. if (prev_data)
  632. {
  633. if (!charsets_leq (prev_data, csi_data))
  634. return SCM_BOOL_F;
  635. }
  636. prev_data = csi_data;
  637. char_sets = SCM_CDR (char_sets);
  638. }
  639. return SCM_BOOL_T;
  640. }
  641. #undef FUNC_NAME
  642. SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
  643. (SCM cs, SCM bound),
  644. "Compute a hash value for the character set @var{cs}. If\n"
  645. "@var{bound} is given and non-zero, it restricts the\n"
  646. "returned value to the range 0 @dots{} @var{bound} - 1.")
  647. #define FUNC_NAME s_scm_char_set_hash
  648. {
  649. const unsigned long default_bnd = 871;
  650. unsigned long bnd;
  651. scm_t_char_set *p;
  652. unsigned long val = 0;
  653. int k;
  654. scm_t_wchar c;
  655. SCM_VALIDATE_SMOB (1, cs, charset);
  656. if (SCM_UNBNDP (bound))
  657. bnd = default_bnd;
  658. else
  659. {
  660. bnd = scm_to_ulong (bound);
  661. if (bnd == 0)
  662. bnd = default_bnd;
  663. }
  664. p = SCM_CHARSET_DATA (cs);
  665. for (k = 0; k < p->len; k++)
  666. {
  667. for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
  668. val = c + (val << 1);
  669. }
  670. return scm_from_ulong (val % bnd);
  671. }
  672. #undef FUNC_NAME
  673. SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
  674. (SCM cs), "Return a cursor into the character set @var{cs}.")
  675. #define FUNC_NAME s_scm_char_set_cursor
  676. {
  677. scm_t_char_set *cs_data;
  678. scm_t_char_set_cursor *cur_data;
  679. SCM_VALIDATE_SMOB (1, cs, charset);
  680. cs_data = SCM_CHARSET_DATA (cs);
  681. cur_data =
  682. (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
  683. "charset-cursor");
  684. if (cs_data->len == 0)
  685. {
  686. cur_data->range = (size_t) (-1);
  687. cur_data->n = 0;
  688. }
  689. else
  690. {
  691. cur_data->range = 0;
  692. cur_data->n = cs_data->ranges[0].lo;
  693. }
  694. SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
  695. }
  696. #undef FUNC_NAME
  697. SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
  698. (SCM cs, SCM cursor),
  699. "Return the character at the current cursor position\n"
  700. "@var{cursor} in the character set @var{cs}. It is an error to\n"
  701. "pass a cursor for which @code{end-of-char-set?} returns true.")
  702. #define FUNC_NAME s_scm_char_set_ref
  703. {
  704. scm_t_char_set *cs_data;
  705. scm_t_char_set_cursor *cur_data;
  706. size_t i;
  707. SCM_VALIDATE_SMOB (1, cs, charset);
  708. SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
  709. cs_data = SCM_CHARSET_DATA (cs);
  710. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  711. /* Validate that this cursor is still true. */
  712. i = cur_data->range;
  713. if (i == (size_t) (-1)
  714. || i >= cs_data->len
  715. || cur_data->n < cs_data->ranges[i].lo
  716. || cur_data->n > cs_data->ranges[i].hi)
  717. SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
  718. return SCM_MAKE_CHAR (cur_data->n);
  719. }
  720. #undef FUNC_NAME
  721. SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
  722. (SCM cs, SCM cursor),
  723. "Advance the character set cursor @var{cursor} to the next\n"
  724. "character in the character set @var{cs}. It is an error if the\n"
  725. "cursor given satisfies @code{end-of-char-set?}.")
  726. #define FUNC_NAME s_scm_char_set_cursor_next
  727. {
  728. scm_t_char_set *cs_data;
  729. scm_t_char_set_cursor *cur_data;
  730. size_t i;
  731. SCM_VALIDATE_SMOB (1, cs, charset);
  732. SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
  733. cs_data = SCM_CHARSET_DATA (cs);
  734. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  735. /* Validate that this cursor is still true. */
  736. i = cur_data->range;
  737. if (i == (size_t) (-1)
  738. || i >= cs_data->len
  739. || cur_data->n < cs_data->ranges[i].lo
  740. || cur_data->n > cs_data->ranges[i].hi)
  741. SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
  742. /* Increment the cursor. */
  743. if (cur_data->n == cs_data->ranges[i].hi)
  744. {
  745. if (i + 1 < cs_data->len)
  746. {
  747. cur_data->range = i + 1;
  748. cur_data->n = cs_data->ranges[i + 1].lo;
  749. }
  750. else
  751. {
  752. /* This is the end of the road. */
  753. cur_data->range = (size_t) (-1);
  754. cur_data->n = 0;
  755. }
  756. }
  757. else
  758. {
  759. cur_data->n = cur_data->n + 1;
  760. }
  761. return cursor;
  762. }
  763. #undef FUNC_NAME
  764. SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
  765. (SCM cursor),
  766. "Return @code{#t} if @var{cursor} has reached the end of a\n"
  767. "character set, @code{#f} otherwise.")
  768. #define FUNC_NAME s_scm_end_of_char_set_p
  769. {
  770. scm_t_char_set_cursor *cur_data;
  771. SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
  772. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  773. if (cur_data->range == (size_t) (-1))
  774. return SCM_BOOL_T;
  775. return SCM_BOOL_F;
  776. }
  777. #undef FUNC_NAME
  778. SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
  779. (SCM kons, SCM knil, SCM cs),
  780. "Fold the procedure @var{kons} over the character set @var{cs},\n"
  781. "initializing it with @var{knil}.")
  782. #define FUNC_NAME s_scm_char_set_fold
  783. {
  784. scm_t_char_set *cs_data;
  785. int k;
  786. scm_t_wchar n;
  787. SCM_VALIDATE_PROC (1, kons);
  788. SCM_VALIDATE_SMOB (3, cs, charset);
  789. cs_data = SCM_CHARSET_DATA (cs);
  790. if (cs_data->len == 0)
  791. return knil;
  792. for (k = 0; k < cs_data->len; k++)
  793. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  794. {
  795. knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
  796. }
  797. return knil;
  798. }
  799. #undef FUNC_NAME
  800. SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
  801. (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
  802. "This is a fundamental constructor for character sets.\n"
  803. "@itemize @bullet\n"
  804. "@item @var{g} is used to generate a series of ``seed'' values\n"
  805. "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
  806. "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
  807. "@item @var{p} tells us when to stop -- when it returns true\n"
  808. "when applied to one of the seed values.\n"
  809. "@item @var{f} maps each seed value to a character. These\n"
  810. "characters are added to the base character set @var{base_cs} to\n"
  811. "form the result; @var{base_cs} defaults to the empty set.\n"
  812. "@end itemize")
  813. #define FUNC_NAME s_scm_char_set_unfold
  814. {
  815. SCM result, tmp;
  816. SCM_VALIDATE_PROC (1, p);
  817. SCM_VALIDATE_PROC (2, f);
  818. SCM_VALIDATE_PROC (3, g);
  819. if (!SCM_UNBNDP (base_cs))
  820. {
  821. SCM_VALIDATE_SMOB (5, base_cs, charset);
  822. result = scm_char_set_copy (base_cs);
  823. }
  824. else
  825. result = make_char_set (FUNC_NAME);
  826. tmp = scm_call_1 (p, seed);
  827. while (scm_is_false (tmp))
  828. {
  829. SCM ch = scm_call_1 (f, seed);
  830. if (!SCM_CHARP (ch))
  831. SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
  832. SCM_CHARSET_SET (result, SCM_CHAR (ch));
  833. seed = scm_call_1 (g, seed);
  834. tmp = scm_call_1 (p, seed);
  835. }
  836. return result;
  837. }
  838. #undef FUNC_NAME
  839. SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
  840. (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
  841. "This is a fundamental constructor for character sets.\n"
  842. "@itemize @bullet\n"
  843. "@item @var{g} is used to generate a series of ``seed'' values\n"
  844. "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
  845. "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
  846. "@item @var{p} tells us when to stop -- when it returns true\n"
  847. "when applied to one of the seed values.\n"
  848. "@item @var{f} maps each seed value to a character. These\n"
  849. "characters are added to the base character set @var{base_cs} to\n"
  850. "form the result; @var{base_cs} defaults to the empty set.\n"
  851. "@end itemize")
  852. #define FUNC_NAME s_scm_char_set_unfold_x
  853. {
  854. SCM tmp;
  855. SCM_VALIDATE_PROC (1, p);
  856. SCM_VALIDATE_PROC (2, f);
  857. SCM_VALIDATE_PROC (3, g);
  858. SCM_VALIDATE_SMOB (5, base_cs, charset);
  859. tmp = scm_call_1 (p, seed);
  860. while (scm_is_false (tmp))
  861. {
  862. SCM ch = scm_call_1 (f, seed);
  863. if (!SCM_CHARP (ch))
  864. SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
  865. SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
  866. seed = scm_call_1 (g, seed);
  867. tmp = scm_call_1 (p, seed);
  868. }
  869. return base_cs;
  870. }
  871. #undef FUNC_NAME
  872. SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
  873. (SCM proc, SCM cs),
  874. "Apply @var{proc} to every character in the character set\n"
  875. "@var{cs}. The return value is not specified.")
  876. #define FUNC_NAME s_scm_char_set_for_each
  877. {
  878. scm_t_char_set *cs_data;
  879. int k;
  880. scm_t_wchar n;
  881. SCM_VALIDATE_PROC (1, proc);
  882. SCM_VALIDATE_SMOB (2, cs, charset);
  883. cs_data = SCM_CHARSET_DATA (cs);
  884. if (cs_data->len == 0)
  885. return SCM_UNSPECIFIED;
  886. for (k = 0; k < cs_data->len; k++)
  887. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  888. {
  889. scm_call_1 (proc, SCM_MAKE_CHAR (n));
  890. }
  891. return SCM_UNSPECIFIED;
  892. }
  893. #undef FUNC_NAME
  894. SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
  895. (SCM proc, SCM cs),
  896. "Map the procedure @var{proc} over every character in @var{cs}.\n"
  897. "@var{proc} must be a character -> character procedure.")
  898. #define FUNC_NAME s_scm_char_set_map
  899. {
  900. SCM result;
  901. int k;
  902. scm_t_char_set *cs_data;
  903. scm_t_wchar n;
  904. SCM_VALIDATE_PROC (1, proc);
  905. SCM_VALIDATE_SMOB (2, cs, charset);
  906. result = make_char_set (FUNC_NAME);
  907. cs_data = SCM_CHARSET_DATA (cs);
  908. if (cs_data->len == 0)
  909. return result;
  910. for (k = 0; k < cs_data->len; k++)
  911. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  912. {
  913. SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
  914. if (!SCM_CHARP (ch))
  915. SCM_MISC_ERROR ("procedure ~S returned non-char",
  916. scm_list_1 (proc));
  917. SCM_CHARSET_SET (result, SCM_CHAR (ch));
  918. }
  919. return result;
  920. }
  921. #undef FUNC_NAME
  922. SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
  923. (SCM cs),
  924. "Return a newly allocated character set containing all\n"
  925. "characters in @var{cs}.")
  926. #define FUNC_NAME s_scm_char_set_copy
  927. {
  928. SCM ret;
  929. scm_t_char_set *p1, *p2;
  930. SCM_VALIDATE_SMOB (1, cs, charset);
  931. ret = make_char_set (FUNC_NAME);
  932. p1 = SCM_CHARSET_DATA (cs);
  933. p2 = SCM_CHARSET_DATA (ret);
  934. p2->len = p1->len;
  935. if (p1->len == 0)
  936. p2->ranges = NULL;
  937. else
  938. {
  939. p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
  940. "character-set");
  941. memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
  942. }
  943. return ret;
  944. }
  945. #undef FUNC_NAME
  946. SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
  947. (SCM rest),
  948. "Return a character set containing all given characters.")
  949. #define FUNC_NAME s_scm_char_set
  950. {
  951. SCM cs;
  952. int argnum = 1;
  953. SCM_VALIDATE_REST_ARGUMENT (rest);
  954. cs = make_char_set (FUNC_NAME);
  955. while (!scm_is_null (rest))
  956. {
  957. scm_t_wchar c;
  958. SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
  959. argnum++;
  960. rest = SCM_CDR (rest);
  961. SCM_CHARSET_SET (cs, c);
  962. }
  963. return cs;
  964. }
  965. #undef FUNC_NAME
  966. SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
  967. (SCM list, SCM base_cs),
  968. "Convert the character list @var{list} to a character set. If\n"
  969. "the character set @var{base_cs} is given, the character in this\n"
  970. "set are also included in the result.")
  971. #define FUNC_NAME s_scm_list_to_char_set
  972. {
  973. SCM cs;
  974. SCM_VALIDATE_LIST (1, list);
  975. if (SCM_UNBNDP (base_cs))
  976. cs = make_char_set (FUNC_NAME);
  977. else
  978. {
  979. SCM_VALIDATE_SMOB (2, base_cs, charset);
  980. cs = scm_char_set_copy (base_cs);
  981. }
  982. while (!scm_is_null (list))
  983. {
  984. SCM chr = SCM_CAR (list);
  985. scm_t_wchar c;
  986. SCM_VALIDATE_CHAR_COPY (0, chr, c);
  987. list = SCM_CDR (list);
  988. SCM_CHARSET_SET (cs, c);
  989. }
  990. return cs;
  991. }
  992. #undef FUNC_NAME
  993. SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
  994. (SCM list, SCM base_cs),
  995. "Convert the character list @var{list} to a character set. The\n"
  996. "characters are added to @var{base_cs} and @var{base_cs} is\n"
  997. "returned.")
  998. #define FUNC_NAME s_scm_list_to_char_set_x
  999. {
  1000. SCM_VALIDATE_LIST (1, list);
  1001. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1002. while (!scm_is_null (list))
  1003. {
  1004. SCM chr = SCM_CAR (list);
  1005. scm_t_wchar c;
  1006. SCM_VALIDATE_CHAR_COPY (0, chr, c);
  1007. list = SCM_CDR (list);
  1008. SCM_CHARSET_SET (base_cs, c);
  1009. }
  1010. return base_cs;
  1011. }
  1012. #undef FUNC_NAME
  1013. SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
  1014. (SCM str, SCM base_cs),
  1015. "Convert the string @var{str} to a character set. If the\n"
  1016. "character set @var{base_cs} is given, the characters in this\n"
  1017. "set are also included in the result.")
  1018. #define FUNC_NAME s_scm_string_to_char_set
  1019. {
  1020. SCM cs;
  1021. size_t k = 0, len;
  1022. SCM_VALIDATE_STRING (1, str);
  1023. if (SCM_UNBNDP (base_cs))
  1024. cs = make_char_set (FUNC_NAME);
  1025. else
  1026. {
  1027. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1028. cs = scm_char_set_copy (base_cs);
  1029. }
  1030. len = scm_i_string_length (str);
  1031. while (k < len)
  1032. {
  1033. scm_t_wchar c = scm_i_string_ref (str, k++);
  1034. SCM_CHARSET_SET (cs, c);
  1035. }
  1036. scm_remember_upto_here_1 (str);
  1037. return cs;
  1038. }
  1039. #undef FUNC_NAME
  1040. SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
  1041. (SCM str, SCM base_cs),
  1042. "Convert the string @var{str} to a character set. The\n"
  1043. "characters from the string are added to @var{base_cs}, and\n"
  1044. "@var{base_cs} is returned.")
  1045. #define FUNC_NAME s_scm_string_to_char_set_x
  1046. {
  1047. size_t k = 0, len;
  1048. SCM_VALIDATE_STRING (1, str);
  1049. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1050. len = scm_i_string_length (str);
  1051. while (k < len)
  1052. {
  1053. scm_t_wchar c = scm_i_string_ref (str, k++);
  1054. SCM_CHARSET_SET (base_cs, c);
  1055. }
  1056. scm_remember_upto_here_1 (str);
  1057. return base_cs;
  1058. }
  1059. #undef FUNC_NAME
  1060. SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
  1061. (SCM pred, SCM cs, SCM base_cs),
  1062. "Return a character set containing every character from @var{cs}\n"
  1063. "so that it satisfies @var{pred}. If provided, the characters\n"
  1064. "from @var{base_cs} are added to the result.")
  1065. #define FUNC_NAME s_scm_char_set_filter
  1066. {
  1067. SCM ret;
  1068. int k;
  1069. scm_t_wchar n;
  1070. scm_t_char_set *p;
  1071. SCM_VALIDATE_PROC (1, pred);
  1072. SCM_VALIDATE_SMOB (2, cs, charset);
  1073. if (!SCM_UNBNDP (base_cs))
  1074. {
  1075. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1076. ret = scm_char_set_copy (base_cs);
  1077. }
  1078. else
  1079. ret = make_char_set (FUNC_NAME);
  1080. p = SCM_CHARSET_DATA (cs);
  1081. if (p->len == 0)
  1082. return ret;
  1083. for (k = 0; k < p->len; k++)
  1084. for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
  1085. {
  1086. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1087. if (scm_is_true (res))
  1088. SCM_CHARSET_SET (ret, n);
  1089. }
  1090. return ret;
  1091. }
  1092. #undef FUNC_NAME
  1093. SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
  1094. (SCM pred, SCM cs, SCM base_cs),
  1095. "Return a character set containing every character from @var{cs}\n"
  1096. "so that it satisfies @var{pred}. The characters are added to\n"
  1097. "@var{base_cs} and @var{base_cs} is returned.")
  1098. #define FUNC_NAME s_scm_char_set_filter_x
  1099. {
  1100. int k;
  1101. scm_t_wchar n;
  1102. scm_t_char_set *p;
  1103. SCM_VALIDATE_PROC (1, pred);
  1104. SCM_VALIDATE_SMOB (2, cs, charset);
  1105. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1106. p = SCM_CHARSET_DATA (cs);
  1107. if (p->len == 0)
  1108. return base_cs;
  1109. for (k = 0; k < p->len; k++)
  1110. for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
  1111. {
  1112. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1113. if (scm_is_true (res))
  1114. SCM_CHARSET_SET (base_cs, n);
  1115. }
  1116. return base_cs;
  1117. }
  1118. #undef FUNC_NAME
  1119. /* Return a character set containing all the characters from [LOWER,UPPER),
  1120. giving range errors if ERROR, adding chars from BASE_CS, and recycling
  1121. BASE_CS if REUSE is true. */
  1122. static SCM
  1123. scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
  1124. SCM error, SCM base_cs, int reuse)
  1125. {
  1126. SCM cs;
  1127. size_t clower, cupper;
  1128. clower = scm_to_size_t (lower);
  1129. cupper = scm_to_size_t (upper) - 1;
  1130. SCM_ASSERT_RANGE (2, upper, cupper >= clower);
  1131. if (!SCM_UNBNDP (error))
  1132. {
  1133. if (scm_is_true (error))
  1134. {
  1135. SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
  1136. SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
  1137. if (clower < SCM_CODEPOINT_SURROGATE_START
  1138. && cupper > SCM_CODEPOINT_SURROGATE_END)
  1139. scm_error(scm_out_of_range_key,
  1140. FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
  1141. scm_list_2 (lower, upper), scm_list_1 (upper));
  1142. }
  1143. }
  1144. if (SCM_UNBNDP (base_cs))
  1145. cs = make_char_set (FUNC_NAME);
  1146. else
  1147. {
  1148. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1149. if (reuse)
  1150. cs = base_cs;
  1151. else
  1152. cs = scm_char_set_copy (base_cs);
  1153. }
  1154. if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
  1155. && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
  1156. return cs;
  1157. if (clower > SCM_CODEPOINT_MAX)
  1158. clower = SCM_CODEPOINT_MAX;
  1159. if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
  1160. clower = SCM_CODEPOINT_SURROGATE_END + 1;
  1161. if (cupper > SCM_CODEPOINT_MAX)
  1162. cupper = SCM_CODEPOINT_MAX;
  1163. if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
  1164. cupper = SCM_CODEPOINT_SURROGATE_START - 1;
  1165. if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
  1166. {
  1167. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
  1168. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
  1169. }
  1170. else
  1171. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
  1172. return cs;
  1173. }
  1174. SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
  1175. (SCM lower, SCM upper, SCM error, SCM base_cs),
  1176. "Return a character set containing all characters whose\n"
  1177. "character codes lie in the half-open range\n"
  1178. "[@var{lower},@var{upper}).\n"
  1179. "\n"
  1180. "If @var{error} is a true value, an error is signalled if the\n"
  1181. "specified range contains characters which are not valid\n"
  1182. "Unicode code points. If @var{error} is @code{#f},\n"
  1183. "these characters are silently left out of the resulting\n"
  1184. "character set.\n"
  1185. "\n"
  1186. "The characters in @var{base_cs} are added to the result, if\n"
  1187. "given.")
  1188. #define FUNC_NAME s_scm_ucs_range_to_char_set
  1189. {
  1190. return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
  1191. error, base_cs, 0);
  1192. }
  1193. #undef FUNC_NAME
  1194. SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
  1195. (SCM lower, SCM upper, SCM error, SCM base_cs),
  1196. "Return a character set containing all characters whose\n"
  1197. "character codes lie in the half-open range\n"
  1198. "[@var{lower},@var{upper}).\n"
  1199. "\n"
  1200. "If @var{error} is a true value, an error is signalled if the\n"
  1201. "specified range contains characters which are not contained in\n"
  1202. "the implemented character range. If @var{error} is @code{#f},\n"
  1203. "these characters are silently left out of the resulting\n"
  1204. "character set.\n"
  1205. "\n"
  1206. "The characters are added to @var{base_cs} and @var{base_cs} is\n"
  1207. "returned.")
  1208. #define FUNC_NAME s_scm_ucs_range_to_char_set_x
  1209. {
  1210. SCM_VALIDATE_SMOB (4, base_cs, charset);
  1211. return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
  1212. error, base_cs, 1);
  1213. }
  1214. #undef FUNC_NAME
  1215. SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
  1216. (SCM x),
  1217. "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
  1218. #define FUNC_NAME s_scm_to_char_set
  1219. {
  1220. if (scm_is_string (x))
  1221. return scm_string_to_char_set (x, SCM_UNDEFINED);
  1222. else if (SCM_CHARP (x))
  1223. return scm_char_set (scm_list_1 (x));
  1224. else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
  1225. return x;
  1226. else
  1227. scm_wrong_type_arg (NULL, 0, x);
  1228. }
  1229. #undef FUNC_NAME
  1230. SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
  1231. (SCM cs),
  1232. "Return the number of elements in character set @var{cs}.")
  1233. #define FUNC_NAME s_scm_char_set_size
  1234. {
  1235. int k, count = 0;
  1236. scm_t_char_set *cs_data;
  1237. SCM_VALIDATE_SMOB (1, cs, charset);
  1238. cs_data = SCM_CHARSET_DATA (cs);
  1239. if (cs_data->len == 0)
  1240. return scm_from_int (0);
  1241. for (k = 0; k < cs_data->len; k++)
  1242. count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
  1243. return scm_from_int (count);
  1244. }
  1245. #undef FUNC_NAME
  1246. SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
  1247. (SCM pred, SCM cs),
  1248. "Return the number of the elements int the character set\n"
  1249. "@var{cs} which satisfy the predicate @var{pred}.")
  1250. #define FUNC_NAME s_scm_char_set_count
  1251. {
  1252. int k, count = 0;
  1253. scm_t_wchar n;
  1254. scm_t_char_set *cs_data;
  1255. SCM_VALIDATE_PROC (1, pred);
  1256. SCM_VALIDATE_SMOB (2, cs, charset);
  1257. cs_data = SCM_CHARSET_DATA (cs);
  1258. if (cs_data->len == 0)
  1259. return scm_from_int (0);
  1260. for (k = 0; k < cs_data->len; k++)
  1261. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1262. {
  1263. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1264. if (scm_is_true (res))
  1265. count++;
  1266. }
  1267. return SCM_I_MAKINUM (count);
  1268. }
  1269. #undef FUNC_NAME
  1270. SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
  1271. (SCM cs),
  1272. "Return a list containing the elements of the character set\n"
  1273. "@var{cs}.")
  1274. #define FUNC_NAME s_scm_char_set_to_list
  1275. {
  1276. int k;
  1277. scm_t_wchar n;
  1278. SCM result = SCM_EOL;
  1279. scm_t_char_set *p;
  1280. SCM_VALIDATE_SMOB (1, cs, charset);
  1281. p = SCM_CHARSET_DATA (cs);
  1282. if (p->len == 0)
  1283. return SCM_EOL;
  1284. for (k = p->len - 1; k >= 0; k--)
  1285. for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
  1286. result = scm_cons (SCM_MAKE_CHAR (n), result);
  1287. return result;
  1288. }
  1289. #undef FUNC_NAME
  1290. SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
  1291. (SCM cs),
  1292. "Return a string containing the elements of the character set\n"
  1293. "@var{cs}. The order in which the characters are placed in the\n"
  1294. "string is not defined.")
  1295. #define FUNC_NAME s_scm_char_set_to_string
  1296. {
  1297. int k;
  1298. int count = 0;
  1299. int idx = 0;
  1300. int wide = 0;
  1301. SCM result;
  1302. scm_t_wchar n;
  1303. scm_t_char_set *cs_data;
  1304. char *buf;
  1305. scm_t_wchar *wbuf;
  1306. SCM_VALIDATE_SMOB (1, cs, charset);
  1307. cs_data = SCM_CHARSET_DATA (cs);
  1308. if (cs_data->len == 0)
  1309. return scm_nullstr;
  1310. if (cs_data->ranges[cs_data->len - 1].hi > 255)
  1311. wide = 1;
  1312. count = scm_to_int (scm_char_set_size (cs));
  1313. if (wide)
  1314. result = scm_i_make_wide_string (count, &wbuf, 0);
  1315. else
  1316. result = scm_i_make_string (count, &buf, 0);
  1317. for (k = 0; k < cs_data->len; k++)
  1318. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1319. {
  1320. if (wide)
  1321. wbuf[idx++] = n;
  1322. else
  1323. buf[idx++] = n;
  1324. }
  1325. return result;
  1326. }
  1327. #undef FUNC_NAME
  1328. SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
  1329. (SCM cs, SCM ch),
  1330. "Return @code{#t} iff the character @var{ch} is contained in the\n"
  1331. "character set @var{cs}.")
  1332. #define FUNC_NAME s_scm_char_set_contains_p
  1333. {
  1334. SCM_VALIDATE_SMOB (1, cs, charset);
  1335. SCM_VALIDATE_CHAR (2, ch);
  1336. return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
  1337. }
  1338. #undef FUNC_NAME
  1339. SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
  1340. (SCM pred, SCM cs),
  1341. "Return a true value if every character in the character set\n"
  1342. "@var{cs} satisfies the predicate @var{pred}.")
  1343. #define FUNC_NAME s_scm_char_set_every
  1344. {
  1345. int k;
  1346. scm_t_wchar n;
  1347. SCM res = SCM_BOOL_T;
  1348. scm_t_char_set *cs_data;
  1349. SCM_VALIDATE_PROC (1, pred);
  1350. SCM_VALIDATE_SMOB (2, cs, charset);
  1351. cs_data = SCM_CHARSET_DATA (cs);
  1352. if (cs_data->len == 0)
  1353. return SCM_BOOL_T;
  1354. for (k = 0; k < cs_data->len; k++)
  1355. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1356. {
  1357. res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1358. if (scm_is_false (res))
  1359. return res;
  1360. }
  1361. return SCM_BOOL_T;
  1362. }
  1363. #undef FUNC_NAME
  1364. SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
  1365. (SCM pred, SCM cs),
  1366. "Return a true value if any character in the character set\n"
  1367. "@var{cs} satisfies the predicate @var{pred}.")
  1368. #define FUNC_NAME s_scm_char_set_any
  1369. {
  1370. int k;
  1371. scm_t_wchar n;
  1372. scm_t_char_set *cs_data;
  1373. SCM_VALIDATE_PROC (1, pred);
  1374. SCM_VALIDATE_SMOB (2, cs, charset);
  1375. cs_data = SCM_CHARSET_DATA (cs);
  1376. if (cs_data->len == 0)
  1377. return SCM_BOOL_T;
  1378. for (k = 0; k < cs_data->len; k++)
  1379. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1380. {
  1381. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1382. if (scm_is_true (res))
  1383. return res;
  1384. }
  1385. return SCM_BOOL_F;
  1386. }
  1387. #undef FUNC_NAME
  1388. SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
  1389. (SCM cs, SCM rest),
  1390. "Add all character arguments to the first argument, which must\n"
  1391. "be a character set.")
  1392. #define FUNC_NAME s_scm_char_set_adjoin
  1393. {
  1394. SCM_VALIDATE_SMOB (1, cs, charset);
  1395. SCM_VALIDATE_REST_ARGUMENT (rest);
  1396. cs = scm_char_set_copy (cs);
  1397. while (!scm_is_null (rest))
  1398. {
  1399. SCM chr = SCM_CAR (rest);
  1400. scm_t_wchar c;
  1401. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1402. rest = SCM_CDR (rest);
  1403. SCM_CHARSET_SET (cs, c);
  1404. }
  1405. return cs;
  1406. }
  1407. #undef FUNC_NAME
  1408. SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
  1409. (SCM cs, SCM rest),
  1410. "Delete all character arguments from the first argument, which\n"
  1411. "must be a character set.")
  1412. #define FUNC_NAME s_scm_char_set_delete
  1413. {
  1414. SCM_VALIDATE_SMOB (1, cs, charset);
  1415. SCM_VALIDATE_REST_ARGUMENT (rest);
  1416. cs = scm_char_set_copy (cs);
  1417. while (!scm_is_null (rest))
  1418. {
  1419. SCM chr = SCM_CAR (rest);
  1420. scm_t_wchar c;
  1421. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1422. rest = SCM_CDR (rest);
  1423. SCM_CHARSET_UNSET (cs, c);
  1424. }
  1425. return cs;
  1426. }
  1427. #undef FUNC_NAME
  1428. SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
  1429. (SCM cs, SCM rest),
  1430. "Add all character arguments to the first argument, which must\n"
  1431. "be a character set.")
  1432. #define FUNC_NAME s_scm_char_set_adjoin_x
  1433. {
  1434. SCM_VALIDATE_SMOB (1, cs, charset);
  1435. SCM_VALIDATE_REST_ARGUMENT (rest);
  1436. while (!scm_is_null (rest))
  1437. {
  1438. SCM chr = SCM_CAR (rest);
  1439. scm_t_wchar c;
  1440. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1441. rest = SCM_CDR (rest);
  1442. SCM_CHARSET_SET (cs, c);
  1443. }
  1444. return cs;
  1445. }
  1446. #undef FUNC_NAME
  1447. SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
  1448. (SCM cs, SCM rest),
  1449. "Delete all character arguments from the first argument, which\n"
  1450. "must be a character set.")
  1451. #define FUNC_NAME s_scm_char_set_delete_x
  1452. {
  1453. SCM_VALIDATE_SMOB (1, cs, charset);
  1454. SCM_VALIDATE_REST_ARGUMENT (rest);
  1455. while (!scm_is_null (rest))
  1456. {
  1457. SCM chr = SCM_CAR (rest);
  1458. scm_t_wchar c;
  1459. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1460. rest = SCM_CDR (rest);
  1461. SCM_CHARSET_UNSET (cs, c);
  1462. }
  1463. return cs;
  1464. }
  1465. #undef FUNC_NAME
  1466. SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
  1467. (SCM cs), "Return the complement of the character set @var{cs}.")
  1468. #define FUNC_NAME s_scm_char_set_complement
  1469. {
  1470. SCM res;
  1471. scm_t_char_set *p, *q;
  1472. SCM_VALIDATE_SMOB (1, cs, charset);
  1473. res = make_char_set (FUNC_NAME);
  1474. p = SCM_CHARSET_DATA (res);
  1475. q = SCM_CHARSET_DATA (cs);
  1476. charsets_complement (p, q);
  1477. return res;
  1478. }
  1479. #undef FUNC_NAME
  1480. SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
  1481. (SCM rest),
  1482. "Return the union of all argument character sets.")
  1483. #define FUNC_NAME s_scm_char_set_union
  1484. {
  1485. int c = 1;
  1486. SCM res;
  1487. scm_t_char_set *p;
  1488. SCM_VALIDATE_REST_ARGUMENT (rest);
  1489. res = make_char_set (FUNC_NAME);
  1490. p = SCM_CHARSET_DATA (res);
  1491. while (!scm_is_null (rest))
  1492. {
  1493. SCM cs = SCM_CAR (rest);
  1494. SCM_VALIDATE_SMOB (c, cs, charset);
  1495. c++;
  1496. rest = SCM_CDR (rest);
  1497. charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
  1498. }
  1499. return res;
  1500. }
  1501. #undef FUNC_NAME
  1502. SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
  1503. (SCM rest),
  1504. "Return the intersection of all argument character sets.")
  1505. #define FUNC_NAME s_scm_char_set_intersection
  1506. {
  1507. SCM res;
  1508. SCM_VALIDATE_REST_ARGUMENT (rest);
  1509. if (scm_is_null (rest))
  1510. res = make_char_set (FUNC_NAME);
  1511. else
  1512. {
  1513. scm_t_char_set *p;
  1514. int argnum = 2;
  1515. res = scm_char_set_copy (SCM_CAR (rest));
  1516. p = SCM_CHARSET_DATA (res);
  1517. rest = SCM_CDR (rest);
  1518. while (scm_is_pair (rest))
  1519. {
  1520. SCM cs = SCM_CAR (rest);
  1521. scm_t_char_set *cs_data;
  1522. SCM_VALIDATE_SMOB (argnum, cs, charset);
  1523. argnum++;
  1524. cs_data = SCM_CHARSET_DATA (cs);
  1525. rest = SCM_CDR (rest);
  1526. charsets_intersection (p, cs_data);
  1527. }
  1528. }
  1529. return res;
  1530. }
  1531. #undef FUNC_NAME
  1532. SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
  1533. (SCM cs1, SCM rest),
  1534. "Return the difference of all argument character sets.")
  1535. #define FUNC_NAME s_scm_char_set_difference
  1536. {
  1537. int c = 2;
  1538. SCM res, compl;
  1539. scm_t_char_set *p, *q;
  1540. SCM_VALIDATE_SMOB (1, cs1, charset);
  1541. SCM_VALIDATE_REST_ARGUMENT (rest);
  1542. res = scm_char_set_copy (cs1);
  1543. p = SCM_CHARSET_DATA (res);
  1544. compl = make_char_set (FUNC_NAME);
  1545. q = SCM_CHARSET_DATA (compl);
  1546. while (!scm_is_null (rest))
  1547. {
  1548. SCM cs = SCM_CAR (rest);
  1549. SCM_VALIDATE_SMOB (c, cs, charset);
  1550. c++;
  1551. rest = SCM_CDR (rest);
  1552. charsets_complement (q, SCM_CHARSET_DATA (cs));
  1553. charsets_intersection (p, q);
  1554. }
  1555. return res;
  1556. }
  1557. #undef FUNC_NAME
  1558. SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
  1559. (SCM rest),
  1560. "Return the exclusive-or of all argument character sets.")
  1561. #define FUNC_NAME s_scm_char_set_xor
  1562. {
  1563. SCM res;
  1564. SCM_VALIDATE_REST_ARGUMENT (rest);
  1565. if (scm_is_null (rest))
  1566. res = make_char_set (FUNC_NAME);
  1567. else
  1568. {
  1569. int argnum = 2;
  1570. scm_t_char_set *p;
  1571. res = scm_char_set_copy (SCM_CAR (rest));
  1572. p = SCM_CHARSET_DATA (res);
  1573. rest = SCM_CDR (rest);
  1574. while (scm_is_pair (rest))
  1575. {
  1576. SCM cs = SCM_CAR (rest);
  1577. scm_t_char_set *cs_data;
  1578. SCM_VALIDATE_SMOB (argnum, cs, charset);
  1579. argnum++;
  1580. cs_data = SCM_CHARSET_DATA (cs);
  1581. rest = SCM_CDR (rest);
  1582. charsets_xor (p, cs_data);
  1583. }
  1584. }
  1585. return res;
  1586. }
  1587. #undef FUNC_NAME
  1588. SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
  1589. (SCM cs1, SCM rest),
  1590. "Return the difference and the intersection of all argument\n"
  1591. "character sets.")
  1592. #define FUNC_NAME s_scm_char_set_diff_plus_intersection
  1593. {
  1594. int c = 2;
  1595. SCM res1, res2;
  1596. scm_t_char_set *p, *q;
  1597. SCM_VALIDATE_SMOB (1, cs1, charset);
  1598. SCM_VALIDATE_REST_ARGUMENT (rest);
  1599. res1 = scm_char_set_copy (cs1);
  1600. res2 = make_char_set (FUNC_NAME);
  1601. p = SCM_CHARSET_DATA (res1);
  1602. q = SCM_CHARSET_DATA (res2);
  1603. while (!scm_is_null (rest))
  1604. {
  1605. SCM cs = SCM_CAR (rest);
  1606. scm_t_char_set *r;
  1607. SCM_VALIDATE_SMOB (c, cs, charset);
  1608. c++;
  1609. r = SCM_CHARSET_DATA (cs);
  1610. charsets_union (q, r);
  1611. charsets_intersection (p, r);
  1612. rest = SCM_CDR (rest);
  1613. }
  1614. return scm_values (scm_list_2 (res1, res2));
  1615. }
  1616. #undef FUNC_NAME
  1617. SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
  1618. (SCM cs), "Return the complement of the character set @var{cs}.")
  1619. #define FUNC_NAME s_scm_char_set_complement_x
  1620. {
  1621. SCM_VALIDATE_SMOB (1, cs, charset);
  1622. cs = scm_char_set_complement (cs);
  1623. return cs;
  1624. }
  1625. #undef FUNC_NAME
  1626. SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
  1627. (SCM cs1, SCM rest),
  1628. "Return the union of all argument character sets.")
  1629. #define FUNC_NAME s_scm_char_set_union_x
  1630. {
  1631. SCM_VALIDATE_SMOB (1, cs1, charset);
  1632. SCM_VALIDATE_REST_ARGUMENT (rest);
  1633. cs1 = scm_char_set_union (scm_cons (cs1, rest));
  1634. return cs1;
  1635. }
  1636. #undef FUNC_NAME
  1637. SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
  1638. (SCM cs1, SCM rest),
  1639. "Return the intersection of all argument character sets.")
  1640. #define FUNC_NAME s_scm_char_set_intersection_x
  1641. {
  1642. SCM_VALIDATE_SMOB (1, cs1, charset);
  1643. SCM_VALIDATE_REST_ARGUMENT (rest);
  1644. cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
  1645. return cs1;
  1646. }
  1647. #undef FUNC_NAME
  1648. SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
  1649. (SCM cs1, SCM rest),
  1650. "Return the difference of all argument character sets.")
  1651. #define FUNC_NAME s_scm_char_set_difference_x
  1652. {
  1653. SCM_VALIDATE_SMOB (1, cs1, charset);
  1654. SCM_VALIDATE_REST_ARGUMENT (rest);
  1655. cs1 = scm_char_set_difference (cs1, rest);
  1656. return cs1;
  1657. }
  1658. #undef FUNC_NAME
  1659. SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
  1660. (SCM cs1, SCM rest),
  1661. "Return the exclusive-or of all argument character sets.")
  1662. #define FUNC_NAME s_scm_char_set_xor_x
  1663. {
  1664. /* a side-effecting variant should presumably give consistent results:
  1665. (define a (char-set #\a))
  1666. (char-set-xor a a a) -> char set #\a
  1667. (char-set-xor! a a a) -> char set #\a
  1668. */
  1669. cs1 = scm_char_set_xor (scm_cons (cs1, rest));
  1670. return cs1;
  1671. }
  1672. #undef FUNC_NAME
  1673. SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
  1674. "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
  1675. SCM rest),
  1676. "Return the difference and the intersection of all argument\n"
  1677. "character sets.")
  1678. #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
  1679. {
  1680. SCM diff, intersect;
  1681. diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
  1682. intersect =
  1683. scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
  1684. cs1 = diff;
  1685. cs2 = intersect;
  1686. return scm_values (scm_list_2 (cs1, cs2));
  1687. }
  1688. #undef FUNC_NAME
  1689. /* Standard character sets. */
  1690. SCM scm_char_set_lower_case;
  1691. SCM scm_char_set_upper_case;
  1692. SCM scm_char_set_title_case;
  1693. SCM scm_char_set_letter;
  1694. SCM scm_char_set_digit;
  1695. SCM scm_char_set_letter_and_digit;
  1696. SCM scm_char_set_graphic;
  1697. SCM scm_char_set_printing;
  1698. SCM scm_char_set_whitespace;
  1699. SCM scm_char_set_iso_control;
  1700. SCM scm_char_set_punctuation;
  1701. SCM scm_char_set_symbol;
  1702. SCM scm_char_set_hex_digit;
  1703. SCM scm_char_set_blank;
  1704. SCM scm_char_set_ascii;
  1705. SCM scm_char_set_empty;
  1706. SCM scm_char_set_designated;
  1707. SCM scm_char_set_full;
  1708. /* Create an empty character set and return it after binding it to NAME. */
  1709. static inline SCM
  1710. define_charset (const char *name, const scm_t_char_set *p)
  1711. {
  1712. SCM cs;
  1713. SCM_NEWSMOB (cs, scm_tc16_charset, p);
  1714. scm_c_define (name, cs);
  1715. return cs;
  1716. }
  1717. SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
  1718. "Returns an association list containing debugging information\n"
  1719. "for @var{charset}. The association list has the following entries."
  1720. "@table @code\n"
  1721. "@item char-set\n"
  1722. "The char-set itself.\n"
  1723. "@item len\n"
  1724. "The number of character ranges the char-set contains\n"
  1725. "@item ranges\n"
  1726. "A list of lists where each sublist a range of code points\n"
  1727. "and their associated characters"
  1728. "@end table")
  1729. #define FUNC_NAME s_scm_sys_char_set_dump
  1730. {
  1731. SCM e1, e2, e3;
  1732. SCM ranges = SCM_EOL, elt;
  1733. size_t i;
  1734. scm_t_char_set *cs;
  1735. char codepoint_string_lo[9], codepoint_string_hi[9];
  1736. SCM_VALIDATE_SMOB (1, charset, charset);
  1737. cs = SCM_CHARSET_DATA (charset);
  1738. e1 = scm_cons (scm_from_latin1_symbol ("char-set"),
  1739. charset);
  1740. e2 = scm_cons (scm_from_latin1_symbol ("n"),
  1741. scm_from_size_t (cs->len));
  1742. for (i = 0; i < cs->len; i++)
  1743. {
  1744. if (cs->ranges[i].lo > 0xFFFF)
  1745. sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
  1746. else
  1747. sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
  1748. if (cs->ranges[i].hi > 0xFFFF)
  1749. sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
  1750. else
  1751. sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
  1752. elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
  1753. SCM_MAKE_CHAR (cs->ranges[i].hi),
  1754. scm_from_locale_string (codepoint_string_lo),
  1755. scm_from_locale_string (codepoint_string_hi));
  1756. ranges = scm_append (scm_list_2 (ranges,
  1757. scm_list_1 (elt)));
  1758. }
  1759. e3 = scm_cons (scm_from_latin1_symbol ("ranges"),
  1760. ranges);
  1761. return scm_list_3 (e1, e2, e3);
  1762. }
  1763. #undef FUNC_NAME
  1764. void
  1765. scm_init_srfi_14 (void)
  1766. {
  1767. scm_tc16_charset = scm_make_smob_type ("character-set", 0);
  1768. scm_set_smob_print (scm_tc16_charset, charset_print);
  1769. scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
  1770. scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
  1771. scm_char_set_upper_case =
  1772. define_charset ("char-set:upper-case", &cs_upper_case);
  1773. scm_char_set_lower_case =
  1774. define_charset ("char-set:lower-case", &cs_lower_case);
  1775. scm_char_set_title_case =
  1776. define_charset ("char-set:title-case", &cs_title_case);
  1777. scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
  1778. scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
  1779. scm_char_set_letter_and_digit =
  1780. define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
  1781. scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
  1782. scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
  1783. scm_char_set_whitespace =
  1784. define_charset ("char-set:whitespace", &cs_whitespace);
  1785. scm_char_set_iso_control =
  1786. define_charset ("char-set:iso-control", &cs_iso_control);
  1787. scm_char_set_punctuation =
  1788. define_charset ("char-set:punctuation", &cs_punctuation);
  1789. scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
  1790. scm_char_set_hex_digit =
  1791. define_charset ("char-set:hex-digit", &cs_hex_digit);
  1792. scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
  1793. scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
  1794. scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
  1795. scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
  1796. scm_char_set_full = define_charset ("char-set:full", &cs_full);
  1797. #include "libguile/srfi-14.x"
  1798. }
  1799. /* End of srfi-14.c. */