srfi-14.c 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119
  1. /* srfi-14.c --- SRFI-14 procedures for Guile
  2. *
  3. * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011,
  4. * 2019 Free Software Foundation, Inc.
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public License
  8. * as published by the Free Software Foundation; either version 3 of
  9. * the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19. * 02110-1301 USA
  20. */
  21. #ifdef HAVE_CONFIG_H
  22. # include <config.h>
  23. #endif
  24. #include <string.h>
  25. #include <unictype.h>
  26. #include "libguile.h"
  27. #include "libguile/srfi-14.h"
  28. #include "libguile/strings.h"
  29. #include "libguile/chars.h"
  30. /* Include the pre-computed standard charset data. */
  31. #include "libguile/srfi-14.i.c"
  32. scm_t_char_range cs_full_ranges[] = {
  33. {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
  34. ,
  35. {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
  36. };
  37. scm_t_char_set cs_full = {
  38. 2,
  39. cs_full_ranges
  40. };
  41. #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
  42. #define SCM_CHARSET_SET(cs, idx) \
  43. scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
  44. #define SCM_CHARSET_UNSET(cs, idx) \
  45. scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
  46. /* Smob type code for character sets. */
  47. int scm_tc16_charset = 0;
  48. int scm_tc16_charset_cursor = 0;
  49. /* True if N exists in charset CS. */
  50. int
  51. scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
  52. {
  53. size_t i;
  54. i = 0;
  55. while (i < cs->len)
  56. {
  57. if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
  58. return 1;
  59. i++;
  60. }
  61. return 0;
  62. }
  63. /* Put N into charset CS. */
  64. void
  65. scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
  66. {
  67. size_t i;
  68. size_t len;
  69. len = cs->len;
  70. i = 0;
  71. while (i < len)
  72. {
  73. /* Already in this range */
  74. if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
  75. {
  76. return;
  77. }
  78. if (n == cs->ranges[i].lo - 1)
  79. {
  80. /* This char is one below the current range. */
  81. if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
  82. {
  83. /* It is also one above the previous range. */
  84. /* This is an impossible condition: in the previous
  85. iteration, the test for 'one above the current range'
  86. should already have inserted the character here. */
  87. abort ();
  88. }
  89. else
  90. {
  91. /* Expand the range down by one. */
  92. cs->ranges[i].lo = n;
  93. return;
  94. }
  95. }
  96. else if (n == cs->ranges[i].hi + 1)
  97. {
  98. /* This char is one above the current range. */
  99. if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
  100. {
  101. /* It is also one below the next range, so combine them. */
  102. cs->ranges[i].hi = cs->ranges[i + 1].hi;
  103. if (i < len - 2)
  104. memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
  105. sizeof (scm_t_char_range) * (len - i - 2));
  106. cs->ranges = scm_gc_realloc (cs->ranges,
  107. sizeof (scm_t_char_range) * len,
  108. sizeof (scm_t_char_range) * (len -
  109. 1),
  110. "character-set");
  111. cs->len = len - 1;
  112. return;
  113. }
  114. else
  115. {
  116. /* Expand the range up by one. */
  117. cs->ranges[i].hi = n;
  118. return;
  119. }
  120. }
  121. else if (n < cs->ranges[i].lo - 1)
  122. {
  123. /* This is a new range below the current one. */
  124. cs->ranges = scm_gc_realloc (cs->ranges,
  125. sizeof (scm_t_char_range) * len,
  126. sizeof (scm_t_char_range) * (len + 1),
  127. "character-set");
  128. memmove (cs->ranges + (i + 1), cs->ranges + i,
  129. sizeof (scm_t_char_range) * (len - i));
  130. cs->ranges[i].lo = n;
  131. cs->ranges[i].hi = n;
  132. cs->len = len + 1;
  133. return;
  134. }
  135. i++;
  136. }
  137. /* This is a new range above all previous ranges. */
  138. if (len == 0)
  139. {
  140. cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
  141. }
  142. else
  143. {
  144. cs->ranges = scm_gc_realloc (cs->ranges,
  145. sizeof (scm_t_char_range) * len,
  146. sizeof (scm_t_char_range) * (len + 1),
  147. "character-set");
  148. }
  149. cs->ranges[len].lo = n;
  150. cs->ranges[len].hi = n;
  151. cs->len = len + 1;
  152. return;
  153. }
  154. /* Put LO to HI inclusive into charset CS. */
  155. static void
  156. scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
  157. {
  158. size_t i;
  159. i = 0;
  160. while (i < cs->len)
  161. {
  162. /* Already in this range */
  163. if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
  164. return;
  165. /* cur: +---+
  166. new: +---+
  167. */
  168. if (cs->ranges[i].lo - 1 > hi)
  169. {
  170. /* Add a new range below the current one. */
  171. cs->ranges = scm_gc_realloc (cs->ranges,
  172. sizeof (scm_t_char_range) * cs->len,
  173. sizeof (scm_t_char_range) * (cs->len + 1),
  174. "character-set");
  175. memmove (cs->ranges + (i + 1), cs->ranges + i,
  176. sizeof (scm_t_char_range) * (cs->len - i));
  177. cs->ranges[i].lo = lo;
  178. cs->ranges[i].hi = hi;
  179. cs->len += 1;
  180. return;
  181. }
  182. /* cur: +---+ or +---+ or +---+
  183. new: +---+ +---+ +---+
  184. */
  185. if (cs->ranges[i].lo > lo
  186. && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
  187. {
  188. cs->ranges[i].lo = lo;
  189. return;
  190. }
  191. /* cur: +---+ or +---+ or +---+
  192. new: +---+ +---+ +---+
  193. */
  194. else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
  195. {
  196. if (cs->ranges[i].lo > lo)
  197. cs->ranges[i].lo = lo;
  198. if (cs->ranges[i].hi < hi)
  199. cs->ranges[i].hi = hi;
  200. while (i < cs->len - 1)
  201. {
  202. /* cur: --+ +---+
  203. new: -----+
  204. */
  205. if (cs->ranges[i + 1].lo - 1 > hi)
  206. break;
  207. /* cur: --+ +---+ or --+ +---+ or --+ +--+
  208. new: -----+ ------+ ---------+
  209. */
  210. /* Combine this range with the previous one. */
  211. if (cs->ranges[i + 1].hi > hi)
  212. cs->ranges[i].hi = cs->ranges[i + 1].hi;
  213. if (i + 1 < cs->len)
  214. memmove (cs->ranges + i + 1, cs->ranges + i + 2,
  215. sizeof (scm_t_char_range) * (cs->len - i - 2));
  216. cs->ranges = scm_gc_realloc (cs->ranges,
  217. sizeof (scm_t_char_range) * cs->len,
  218. sizeof (scm_t_char_range) * (cs->len - 1),
  219. "character-set");
  220. cs->len -= 1;
  221. }
  222. return;
  223. }
  224. i ++;
  225. }
  226. /* This is a new range above all previous ranges. */
  227. if (cs->len == 0)
  228. {
  229. cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
  230. }
  231. else
  232. {
  233. cs->ranges = scm_gc_realloc (cs->ranges,
  234. sizeof (scm_t_char_range) * cs->len,
  235. sizeof (scm_t_char_range) * (cs->len + 1),
  236. "character-set");
  237. }
  238. cs->len += 1;
  239. cs->ranges[cs->len - 1].lo = lo;
  240. cs->ranges[cs->len - 1].hi = hi;
  241. return;
  242. }
  243. /* If N is in charset CS, remove it. */
  244. void
  245. scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
  246. {
  247. size_t i;
  248. size_t len;
  249. len = cs->len;
  250. i = 0;
  251. while (i < len)
  252. {
  253. if (n < cs->ranges[i].lo)
  254. /* Not in this set. */
  255. return;
  256. if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
  257. {
  258. /* Remove this one-character range. */
  259. if (len == 1)
  260. {
  261. scm_gc_free (cs->ranges,
  262. sizeof (scm_t_char_range) * cs->len,
  263. "character-set");
  264. cs->ranges = NULL;
  265. cs->len = 0;
  266. return;
  267. }
  268. else if (i < len - 1)
  269. {
  270. memmove (cs->ranges + i, cs->ranges + (i + 1),
  271. sizeof (scm_t_char_range) * (len - i - 1));
  272. cs->ranges = scm_gc_realloc (cs->ranges,
  273. sizeof (scm_t_char_range) * len,
  274. sizeof (scm_t_char_range) * (len -
  275. 1),
  276. "character-set");
  277. cs->len = len - 1;
  278. return;
  279. }
  280. else if (i == len - 1)
  281. {
  282. cs->ranges = scm_gc_realloc (cs->ranges,
  283. sizeof (scm_t_char_range) * len,
  284. sizeof (scm_t_char_range) * (len -
  285. 1),
  286. "character-set");
  287. cs->len = len - 1;
  288. return;
  289. }
  290. }
  291. else if (n == cs->ranges[i].lo)
  292. {
  293. /* Shrink this range from the left. */
  294. cs->ranges[i].lo = n + 1;
  295. return;
  296. }
  297. else if (n == cs->ranges[i].hi)
  298. {
  299. /* Shrink this range from the right. */
  300. cs->ranges[i].hi = n - 1;
  301. return;
  302. }
  303. else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
  304. {
  305. /* Split this range into two pieces. */
  306. cs->ranges = scm_gc_realloc (cs->ranges,
  307. sizeof (scm_t_char_range) * len,
  308. sizeof (scm_t_char_range) * (len + 1),
  309. "character-set");
  310. if (i < len - 1)
  311. memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
  312. sizeof (scm_t_char_range) * (len - i - 1));
  313. cs->ranges[i + 1].hi = cs->ranges[i].hi;
  314. cs->ranges[i + 1].lo = n + 1;
  315. cs->ranges[i].hi = n - 1;
  316. cs->len = len + 1;
  317. return;
  318. }
  319. i++;
  320. }
  321. /* This value is above all ranges, so do nothing here. */
  322. return;
  323. }
  324. static int
  325. charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
  326. {
  327. if (a->len != b->len)
  328. return 0;
  329. /* Empty charsets may have ranges == NULL. We must avoid passing
  330. NULL to memcmp, even if the length is zero, to avoid undefined
  331. behavior. */
  332. if (a->len == 0)
  333. return 1;
  334. if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
  335. return 0;
  336. return 1;
  337. }
  338. /* Return true if every character in A is also in B. */
  339. static int
  340. charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
  341. {
  342. size_t i = 0, j = 0;
  343. scm_t_wchar alo, ahi;
  344. if (a->len == 0)
  345. return 1;
  346. if (b->len == 0)
  347. return 0;
  348. while (i < a->len)
  349. {
  350. alo = a->ranges[i].lo;
  351. ahi = a->ranges[i].hi;
  352. while (b->ranges[j].hi < alo)
  353. {
  354. if (j < b->len - 1)
  355. j++;
  356. else
  357. return 0;
  358. }
  359. if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
  360. return 0;
  361. i++;
  362. }
  363. return 1;
  364. }
  365. /* Merge B into A. */
  366. static void
  367. charsets_union (scm_t_char_set *a, scm_t_char_set *b)
  368. {
  369. size_t i = 0;
  370. scm_t_wchar blo, bhi;
  371. if (b->len == 0)
  372. return;
  373. if (a->len == 0)
  374. {
  375. a->len = b->len;
  376. a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
  377. "character-set");
  378. memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
  379. return;
  380. }
  381. while (i < b->len)
  382. {
  383. blo = b->ranges[i].lo;
  384. bhi = b->ranges[i].hi;
  385. scm_i_charset_set_range (a, blo, bhi);
  386. i++;
  387. }
  388. return;
  389. }
  390. /* Remove elements not both in A and B from A. */
  391. static void
  392. charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
  393. {
  394. size_t i = 0;
  395. scm_t_wchar blo, bhi, n;
  396. scm_t_char_set *c;
  397. if (a->len == 0)
  398. return;
  399. if (b->len == 0)
  400. {
  401. scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
  402. "character-set");
  403. a->len = 0;
  404. return;
  405. }
  406. c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
  407. c->len = 0;
  408. c->ranges = NULL;
  409. while (i < b->len)
  410. {
  411. blo = b->ranges[i].lo;
  412. bhi = b->ranges[i].hi;
  413. for (n = blo; n <= bhi; n++)
  414. if (scm_i_charset_get (a, n))
  415. scm_i_charset_set (c, n);
  416. i++;
  417. }
  418. scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
  419. "character-set");
  420. a->len = c->len;
  421. if (c->len != 0)
  422. a->ranges = c->ranges;
  423. else
  424. a->ranges = NULL;
  425. free (c);
  426. return;
  427. }
  428. #define SCM_ADD_RANGE(low, high) \
  429. do { \
  430. p->ranges[idx].lo = (low); \
  431. p->ranges[idx++].hi = (high); \
  432. } while (0)
  433. #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
  434. do { \
  435. p->ranges[idx].lo = (low); \
  436. p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
  437. p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
  438. p->ranges[idx++].hi = (high); \
  439. } while (0)
  440. /* Make P the compelement of Q. */
  441. static void
  442. charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
  443. {
  444. int k, idx;
  445. idx = 0;
  446. if (q->len == 0)
  447. {
  448. /* Fill with all valid codepoints. */
  449. p->len = 2;
  450. p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
  451. "character-set");
  452. SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
  453. return;
  454. }
  455. if (p->len > 0)
  456. scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
  457. "character-set");
  458. /* Count the number of ranges needed for the output. */
  459. p->len = 0;
  460. if (q->ranges[0].lo > 0)
  461. p->len++;
  462. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
  463. p->len++;
  464. p->len += q->len;
  465. p->ranges =
  466. (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
  467. "character-set");
  468. if (q->ranges[0].lo > 0)
  469. {
  470. if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
  471. SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
  472. else
  473. SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
  474. }
  475. for (k = 1; k < q->len; k++)
  476. {
  477. if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
  478. && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
  479. SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
  480. else
  481. SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
  482. }
  483. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
  484. {
  485. if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
  486. SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
  487. else
  488. SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
  489. }
  490. return;
  491. }
  492. #undef SCM_ADD_RANGE
  493. #undef SCM_ADD_RANGE_SKIP_SURROGATES
  494. /* Replace A with elements only found in one of A or B. */
  495. static void
  496. charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
  497. {
  498. size_t i = 0;
  499. scm_t_wchar blo, bhi, n;
  500. if (b->len == 0)
  501. {
  502. return;
  503. }
  504. if (a->len == 0)
  505. {
  506. a->ranges =
  507. (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
  508. b->len, "character-set");
  509. a->len = b->len;
  510. memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
  511. return;
  512. }
  513. while (i < b->len)
  514. {
  515. blo = b->ranges[i].lo;
  516. bhi = b->ranges[i].hi;
  517. for (n = blo; n <= bhi; n++)
  518. {
  519. if (scm_i_charset_get (a, n))
  520. scm_i_charset_unset (a, n);
  521. else
  522. scm_i_charset_set (a, n);
  523. }
  524. i++;
  525. }
  526. return;
  527. }
  528. /* Smob print hook for character sets. */
  529. static int
  530. charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
  531. {
  532. size_t i;
  533. int first = 1;
  534. scm_t_char_set *p;
  535. const size_t max_ranges_to_print = 50;
  536. p = SCM_CHARSET_DATA (charset);
  537. scm_puts ("#<charset {", port);
  538. for (i = 0; i < p->len; i++)
  539. {
  540. if (first)
  541. first = 0;
  542. else
  543. scm_puts (" ", port);
  544. scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
  545. if (p->ranges[i].lo != p->ranges[i].hi)
  546. {
  547. scm_puts ("..", port);
  548. scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
  549. }
  550. if (i >= max_ranges_to_print)
  551. {
  552. /* Too many to print here. Quit early. */
  553. scm_puts (" ...", port);
  554. break;
  555. }
  556. }
  557. scm_puts ("}>", port);
  558. return 1;
  559. }
  560. /* Smob print hook for character sets cursors. */
  561. static int
  562. charset_cursor_print (SCM cursor, SCM port,
  563. scm_print_state *pstate SCM_UNUSED)
  564. {
  565. scm_t_char_set_cursor *cur;
  566. cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  567. scm_puts ("#<charset-cursor ", port);
  568. if (cur->range == (size_t) (-1))
  569. scm_puts ("(empty)", port);
  570. else
  571. {
  572. scm_write (scm_from_size_t (cur->range), port);
  573. scm_puts (":", port);
  574. scm_write (scm_from_int32 (cur->n), port);
  575. }
  576. scm_puts (">", port);
  577. return 1;
  578. }
  579. /* Create a new, empty character set. */
  580. static SCM
  581. make_char_set (const char *func_name)
  582. {
  583. scm_t_char_set *p;
  584. p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
  585. memset (p, 0, sizeof (scm_t_char_set));
  586. SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
  587. }
  588. SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
  589. (SCM obj),
  590. "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
  591. "otherwise.")
  592. #define FUNC_NAME s_scm_char_set_p
  593. {
  594. return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
  595. }
  596. #undef FUNC_NAME
  597. SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
  598. (SCM char_sets),
  599. "Return @code{#t} if all given character sets are equal.")
  600. #define FUNC_NAME s_scm_char_set_eq
  601. {
  602. int argnum = 1;
  603. scm_t_char_set *cs1_data = NULL;
  604. SCM_VALIDATE_REST_ARGUMENT (char_sets);
  605. while (!scm_is_null (char_sets))
  606. {
  607. SCM csi = SCM_CAR (char_sets);
  608. scm_t_char_set *csi_data;
  609. SCM_VALIDATE_SMOB (argnum, csi, charset);
  610. argnum++;
  611. csi_data = SCM_CHARSET_DATA (csi);
  612. if (cs1_data == NULL)
  613. cs1_data = csi_data;
  614. else if (!charsets_equal (cs1_data, csi_data))
  615. return SCM_BOOL_F;
  616. char_sets = SCM_CDR (char_sets);
  617. }
  618. return SCM_BOOL_T;
  619. }
  620. #undef FUNC_NAME
  621. SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
  622. (SCM char_sets),
  623. "Return @code{#t} if every character set @var{char_set}i is a subset\n"
  624. "of character set @var{char_set}i+1.")
  625. #define FUNC_NAME s_scm_char_set_leq
  626. {
  627. int argnum = 1;
  628. scm_t_char_set *prev_data = NULL;
  629. SCM_VALIDATE_REST_ARGUMENT (char_sets);
  630. while (!scm_is_null (char_sets))
  631. {
  632. SCM csi = SCM_CAR (char_sets);
  633. scm_t_char_set *csi_data;
  634. SCM_VALIDATE_SMOB (argnum, csi, charset);
  635. argnum++;
  636. csi_data = SCM_CHARSET_DATA (csi);
  637. if (prev_data)
  638. {
  639. if (!charsets_leq (prev_data, csi_data))
  640. return SCM_BOOL_F;
  641. }
  642. prev_data = csi_data;
  643. char_sets = SCM_CDR (char_sets);
  644. }
  645. return SCM_BOOL_T;
  646. }
  647. #undef FUNC_NAME
  648. SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
  649. (SCM cs, SCM bound),
  650. "Compute a hash value for the character set @var{cs}. If\n"
  651. "@var{bound} is given and non-zero, it restricts the\n"
  652. "returned value to the range 0 @dots{} @var{bound} - 1.")
  653. #define FUNC_NAME s_scm_char_set_hash
  654. {
  655. const unsigned long default_bnd = 871;
  656. unsigned long bnd;
  657. scm_t_char_set *p;
  658. unsigned long val = 0;
  659. int k;
  660. scm_t_wchar c;
  661. SCM_VALIDATE_SMOB (1, cs, charset);
  662. if (SCM_UNBNDP (bound))
  663. bnd = default_bnd;
  664. else
  665. {
  666. bnd = scm_to_ulong (bound);
  667. if (bnd == 0)
  668. bnd = default_bnd;
  669. }
  670. p = SCM_CHARSET_DATA (cs);
  671. for (k = 0; k < p->len; k++)
  672. {
  673. for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
  674. val = c + (val << 1);
  675. }
  676. return scm_from_ulong (val % bnd);
  677. }
  678. #undef FUNC_NAME
  679. SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
  680. (SCM cs), "Return a cursor into the character set @var{cs}.")
  681. #define FUNC_NAME s_scm_char_set_cursor
  682. {
  683. scm_t_char_set *cs_data;
  684. scm_t_char_set_cursor *cur_data;
  685. SCM_VALIDATE_SMOB (1, cs, charset);
  686. cs_data = SCM_CHARSET_DATA (cs);
  687. cur_data =
  688. (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
  689. "charset-cursor");
  690. if (cs_data->len == 0)
  691. {
  692. cur_data->range = (size_t) (-1);
  693. cur_data->n = 0;
  694. }
  695. else
  696. {
  697. cur_data->range = 0;
  698. cur_data->n = cs_data->ranges[0].lo;
  699. }
  700. SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
  701. }
  702. #undef FUNC_NAME
  703. SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
  704. (SCM cs, SCM cursor),
  705. "Return the character at the current cursor position\n"
  706. "@var{cursor} in the character set @var{cs}. It is an error to\n"
  707. "pass a cursor for which @code{end-of-char-set?} returns true.")
  708. #define FUNC_NAME s_scm_char_set_ref
  709. {
  710. scm_t_char_set *cs_data;
  711. scm_t_char_set_cursor *cur_data;
  712. size_t i;
  713. SCM_VALIDATE_SMOB (1, cs, charset);
  714. SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
  715. cs_data = SCM_CHARSET_DATA (cs);
  716. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  717. /* Validate that this cursor is still true. */
  718. i = cur_data->range;
  719. if (i == (size_t) (-1)
  720. || i >= cs_data->len
  721. || cur_data->n < cs_data->ranges[i].lo
  722. || cur_data->n > cs_data->ranges[i].hi)
  723. SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
  724. return SCM_MAKE_CHAR (cur_data->n);
  725. }
  726. #undef FUNC_NAME
  727. SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
  728. (SCM cs, SCM cursor),
  729. "Advance the character set cursor @var{cursor} to the next\n"
  730. "character in the character set @var{cs}. It is an error if the\n"
  731. "cursor given satisfies @code{end-of-char-set?}.")
  732. #define FUNC_NAME s_scm_char_set_cursor_next
  733. {
  734. scm_t_char_set *cs_data;
  735. scm_t_char_set_cursor *cur_data;
  736. size_t i;
  737. SCM_VALIDATE_SMOB (1, cs, charset);
  738. SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
  739. cs_data = SCM_CHARSET_DATA (cs);
  740. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  741. /* Validate that this cursor is still true. */
  742. i = cur_data->range;
  743. if (i == (size_t) (-1)
  744. || i >= cs_data->len
  745. || cur_data->n < cs_data->ranges[i].lo
  746. || cur_data->n > cs_data->ranges[i].hi)
  747. SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
  748. /* Increment the cursor. */
  749. if (cur_data->n == cs_data->ranges[i].hi)
  750. {
  751. if (i + 1 < cs_data->len)
  752. {
  753. cur_data->range = i + 1;
  754. cur_data->n = cs_data->ranges[i + 1].lo;
  755. }
  756. else
  757. {
  758. /* This is the end of the road. */
  759. cur_data->range = (size_t) (-1);
  760. cur_data->n = 0;
  761. }
  762. }
  763. else
  764. {
  765. cur_data->n = cur_data->n + 1;
  766. }
  767. return cursor;
  768. }
  769. #undef FUNC_NAME
  770. SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
  771. (SCM cursor),
  772. "Return @code{#t} if @var{cursor} has reached the end of a\n"
  773. "character set, @code{#f} otherwise.")
  774. #define FUNC_NAME s_scm_end_of_char_set_p
  775. {
  776. scm_t_char_set_cursor *cur_data;
  777. SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
  778. cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
  779. if (cur_data->range == (size_t) (-1))
  780. return SCM_BOOL_T;
  781. return SCM_BOOL_F;
  782. }
  783. #undef FUNC_NAME
  784. SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
  785. (SCM kons, SCM knil, SCM cs),
  786. "Fold the procedure @var{kons} over the character set @var{cs},\n"
  787. "initializing it with @var{knil}.")
  788. #define FUNC_NAME s_scm_char_set_fold
  789. {
  790. scm_t_char_set *cs_data;
  791. int k;
  792. scm_t_wchar n;
  793. SCM_VALIDATE_PROC (1, kons);
  794. SCM_VALIDATE_SMOB (3, cs, charset);
  795. cs_data = SCM_CHARSET_DATA (cs);
  796. if (cs_data->len == 0)
  797. return knil;
  798. for (k = 0; k < cs_data->len; k++)
  799. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  800. {
  801. knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
  802. }
  803. return knil;
  804. }
  805. #undef FUNC_NAME
  806. SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
  807. (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
  808. "This is a fundamental constructor for character sets.\n"
  809. "@itemize @bullet\n"
  810. "@item @var{g} is used to generate a series of ``seed'' values\n"
  811. "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
  812. "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
  813. "@item @var{p} tells us when to stop -- when it returns true\n"
  814. "when applied to one of the seed values.\n"
  815. "@item @var{f} maps each seed value to a character. These\n"
  816. "characters are added to the base character set @var{base_cs} to\n"
  817. "form the result; @var{base_cs} defaults to the empty set.\n"
  818. "@end itemize")
  819. #define FUNC_NAME s_scm_char_set_unfold
  820. {
  821. SCM result, tmp;
  822. SCM_VALIDATE_PROC (1, p);
  823. SCM_VALIDATE_PROC (2, f);
  824. SCM_VALIDATE_PROC (3, g);
  825. if (!SCM_UNBNDP (base_cs))
  826. {
  827. SCM_VALIDATE_SMOB (5, base_cs, charset);
  828. result = scm_char_set_copy (base_cs);
  829. }
  830. else
  831. result = make_char_set (FUNC_NAME);
  832. tmp = scm_call_1 (p, seed);
  833. while (scm_is_false (tmp))
  834. {
  835. SCM ch = scm_call_1 (f, seed);
  836. if (!SCM_CHARP (ch))
  837. SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
  838. SCM_CHARSET_SET (result, SCM_CHAR (ch));
  839. seed = scm_call_1 (g, seed);
  840. tmp = scm_call_1 (p, seed);
  841. }
  842. return result;
  843. }
  844. #undef FUNC_NAME
  845. SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
  846. (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
  847. "This is a fundamental constructor for character sets.\n"
  848. "@itemize @bullet\n"
  849. "@item @var{g} is used to generate a series of ``seed'' values\n"
  850. "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
  851. "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
  852. "@item @var{p} tells us when to stop -- when it returns true\n"
  853. "when applied to one of the seed values.\n"
  854. "@item @var{f} maps each seed value to a character. These\n"
  855. "characters are added to the base character set @var{base_cs} to\n"
  856. "form the result; @var{base_cs} defaults to the empty set.\n"
  857. "@end itemize")
  858. #define FUNC_NAME s_scm_char_set_unfold_x
  859. {
  860. SCM tmp;
  861. SCM_VALIDATE_PROC (1, p);
  862. SCM_VALIDATE_PROC (2, f);
  863. SCM_VALIDATE_PROC (3, g);
  864. SCM_VALIDATE_SMOB (5, base_cs, charset);
  865. tmp = scm_call_1 (p, seed);
  866. while (scm_is_false (tmp))
  867. {
  868. SCM ch = scm_call_1 (f, seed);
  869. if (!SCM_CHARP (ch))
  870. SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
  871. SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
  872. seed = scm_call_1 (g, seed);
  873. tmp = scm_call_1 (p, seed);
  874. }
  875. return base_cs;
  876. }
  877. #undef FUNC_NAME
  878. SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
  879. (SCM proc, SCM cs),
  880. "Apply @var{proc} to every character in the character set\n"
  881. "@var{cs}. The return value is not specified.")
  882. #define FUNC_NAME s_scm_char_set_for_each
  883. {
  884. scm_t_char_set *cs_data;
  885. int k;
  886. scm_t_wchar n;
  887. SCM_VALIDATE_PROC (1, proc);
  888. SCM_VALIDATE_SMOB (2, cs, charset);
  889. cs_data = SCM_CHARSET_DATA (cs);
  890. if (cs_data->len == 0)
  891. return SCM_UNSPECIFIED;
  892. for (k = 0; k < cs_data->len; k++)
  893. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  894. {
  895. scm_call_1 (proc, SCM_MAKE_CHAR (n));
  896. }
  897. return SCM_UNSPECIFIED;
  898. }
  899. #undef FUNC_NAME
  900. SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
  901. (SCM proc, SCM cs),
  902. "Map the procedure @var{proc} over every character in @var{cs}.\n"
  903. "@var{proc} must be a character -> character procedure.")
  904. #define FUNC_NAME s_scm_char_set_map
  905. {
  906. SCM result;
  907. int k;
  908. scm_t_char_set *cs_data;
  909. scm_t_wchar n;
  910. SCM_VALIDATE_PROC (1, proc);
  911. SCM_VALIDATE_SMOB (2, cs, charset);
  912. result = make_char_set (FUNC_NAME);
  913. cs_data = SCM_CHARSET_DATA (cs);
  914. if (cs_data->len == 0)
  915. return result;
  916. for (k = 0; k < cs_data->len; k++)
  917. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  918. {
  919. SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
  920. if (!SCM_CHARP (ch))
  921. SCM_MISC_ERROR ("procedure ~S returned non-char",
  922. scm_list_1 (proc));
  923. SCM_CHARSET_SET (result, SCM_CHAR (ch));
  924. }
  925. return result;
  926. }
  927. #undef FUNC_NAME
  928. SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
  929. (SCM cs),
  930. "Return a newly allocated character set containing all\n"
  931. "characters in @var{cs}.")
  932. #define FUNC_NAME s_scm_char_set_copy
  933. {
  934. SCM ret;
  935. scm_t_char_set *p1, *p2;
  936. SCM_VALIDATE_SMOB (1, cs, charset);
  937. ret = make_char_set (FUNC_NAME);
  938. p1 = SCM_CHARSET_DATA (cs);
  939. p2 = SCM_CHARSET_DATA (ret);
  940. p2->len = p1->len;
  941. if (p1->len == 0)
  942. p2->ranges = NULL;
  943. else
  944. {
  945. p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
  946. "character-set");
  947. memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
  948. }
  949. return ret;
  950. }
  951. #undef FUNC_NAME
  952. SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
  953. (SCM rest),
  954. "Return a character set containing all given characters.")
  955. #define FUNC_NAME s_scm_char_set
  956. {
  957. SCM cs;
  958. int argnum = 1;
  959. SCM_VALIDATE_REST_ARGUMENT (rest);
  960. cs = make_char_set (FUNC_NAME);
  961. while (!scm_is_null (rest))
  962. {
  963. scm_t_wchar c;
  964. SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
  965. argnum++;
  966. rest = SCM_CDR (rest);
  967. SCM_CHARSET_SET (cs, c);
  968. }
  969. return cs;
  970. }
  971. #undef FUNC_NAME
  972. SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
  973. (SCM list, SCM base_cs),
  974. "Convert the character list @var{list} to a character set. If\n"
  975. "the character set @var{base_cs} is given, the character in this\n"
  976. "set are also included in the result.")
  977. #define FUNC_NAME s_scm_list_to_char_set
  978. {
  979. SCM cs;
  980. SCM_VALIDATE_LIST (1, list);
  981. if (SCM_UNBNDP (base_cs))
  982. cs = make_char_set (FUNC_NAME);
  983. else
  984. {
  985. SCM_VALIDATE_SMOB (2, base_cs, charset);
  986. cs = scm_char_set_copy (base_cs);
  987. }
  988. while (!scm_is_null (list))
  989. {
  990. SCM chr = SCM_CAR (list);
  991. scm_t_wchar c;
  992. SCM_VALIDATE_CHAR_COPY (0, chr, c);
  993. list = SCM_CDR (list);
  994. SCM_CHARSET_SET (cs, c);
  995. }
  996. return cs;
  997. }
  998. #undef FUNC_NAME
  999. SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
  1000. (SCM list, SCM base_cs),
  1001. "Convert the character list @var{list} to a character set. The\n"
  1002. "characters are added to @var{base_cs} and @var{base_cs} is\n"
  1003. "returned.")
  1004. #define FUNC_NAME s_scm_list_to_char_set_x
  1005. {
  1006. SCM_VALIDATE_LIST (1, list);
  1007. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1008. while (!scm_is_null (list))
  1009. {
  1010. SCM chr = SCM_CAR (list);
  1011. scm_t_wchar c;
  1012. SCM_VALIDATE_CHAR_COPY (0, chr, c);
  1013. list = SCM_CDR (list);
  1014. SCM_CHARSET_SET (base_cs, c);
  1015. }
  1016. return base_cs;
  1017. }
  1018. #undef FUNC_NAME
  1019. SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
  1020. (SCM str, SCM base_cs),
  1021. "Convert the string @var{str} to a character set. If the\n"
  1022. "character set @var{base_cs} is given, the characters in this\n"
  1023. "set are also included in the result.")
  1024. #define FUNC_NAME s_scm_string_to_char_set
  1025. {
  1026. SCM cs;
  1027. size_t k = 0, len;
  1028. SCM_VALIDATE_STRING (1, str);
  1029. if (SCM_UNBNDP (base_cs))
  1030. cs = make_char_set (FUNC_NAME);
  1031. else
  1032. {
  1033. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1034. cs = scm_char_set_copy (base_cs);
  1035. }
  1036. len = scm_i_string_length (str);
  1037. while (k < len)
  1038. {
  1039. scm_t_wchar c = scm_i_string_ref (str, k++);
  1040. SCM_CHARSET_SET (cs, c);
  1041. }
  1042. scm_remember_upto_here_1 (str);
  1043. return cs;
  1044. }
  1045. #undef FUNC_NAME
  1046. SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
  1047. (SCM str, SCM base_cs),
  1048. "Convert the string @var{str} to a character set. The\n"
  1049. "characters from the string are added to @var{base_cs}, and\n"
  1050. "@var{base_cs} is returned.")
  1051. #define FUNC_NAME s_scm_string_to_char_set_x
  1052. {
  1053. size_t k = 0, len;
  1054. SCM_VALIDATE_STRING (1, str);
  1055. SCM_VALIDATE_SMOB (2, base_cs, charset);
  1056. len = scm_i_string_length (str);
  1057. while (k < len)
  1058. {
  1059. scm_t_wchar c = scm_i_string_ref (str, k++);
  1060. SCM_CHARSET_SET (base_cs, c);
  1061. }
  1062. scm_remember_upto_here_1 (str);
  1063. return base_cs;
  1064. }
  1065. #undef FUNC_NAME
  1066. SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
  1067. (SCM pred, SCM cs, SCM base_cs),
  1068. "Return a character set containing every character from @var{cs}\n"
  1069. "so that it satisfies @var{pred}. If provided, the characters\n"
  1070. "from @var{base_cs} are added to the result.")
  1071. #define FUNC_NAME s_scm_char_set_filter
  1072. {
  1073. SCM ret;
  1074. int k;
  1075. scm_t_wchar n;
  1076. scm_t_char_set *p;
  1077. SCM_VALIDATE_PROC (1, pred);
  1078. SCM_VALIDATE_SMOB (2, cs, charset);
  1079. if (!SCM_UNBNDP (base_cs))
  1080. {
  1081. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1082. ret = scm_char_set_copy (base_cs);
  1083. }
  1084. else
  1085. ret = make_char_set (FUNC_NAME);
  1086. p = SCM_CHARSET_DATA (cs);
  1087. if (p->len == 0)
  1088. return ret;
  1089. for (k = 0; k < p->len; k++)
  1090. for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
  1091. {
  1092. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1093. if (scm_is_true (res))
  1094. SCM_CHARSET_SET (ret, n);
  1095. }
  1096. return ret;
  1097. }
  1098. #undef FUNC_NAME
  1099. SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
  1100. (SCM pred, SCM cs, SCM base_cs),
  1101. "Return a character set containing every character from @var{cs}\n"
  1102. "so that it satisfies @var{pred}. The characters are added to\n"
  1103. "@var{base_cs} and @var{base_cs} is returned.")
  1104. #define FUNC_NAME s_scm_char_set_filter_x
  1105. {
  1106. int k;
  1107. scm_t_wchar n;
  1108. scm_t_char_set *p;
  1109. SCM_VALIDATE_PROC (1, pred);
  1110. SCM_VALIDATE_SMOB (2, cs, charset);
  1111. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1112. p = SCM_CHARSET_DATA (cs);
  1113. if (p->len == 0)
  1114. return base_cs;
  1115. for (k = 0; k < p->len; k++)
  1116. for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
  1117. {
  1118. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1119. if (scm_is_true (res))
  1120. SCM_CHARSET_SET (base_cs, n);
  1121. }
  1122. return base_cs;
  1123. }
  1124. #undef FUNC_NAME
  1125. /* Return a character set containing all the characters from [LOWER,UPPER),
  1126. giving range errors if ERROR, adding chars from BASE_CS, and recycling
  1127. BASE_CS if REUSE is true. */
  1128. static SCM
  1129. scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
  1130. SCM error, SCM base_cs, int reuse)
  1131. {
  1132. SCM cs;
  1133. size_t clower, cupper;
  1134. clower = scm_to_size_t (lower);
  1135. cupper = scm_to_size_t (upper) - 1;
  1136. SCM_ASSERT_RANGE (2, upper, cupper >= clower);
  1137. if (!SCM_UNBNDP (error))
  1138. {
  1139. if (scm_is_true (error))
  1140. {
  1141. SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
  1142. SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
  1143. if (clower < SCM_CODEPOINT_SURROGATE_START
  1144. && cupper > SCM_CODEPOINT_SURROGATE_END)
  1145. scm_error(scm_out_of_range_key,
  1146. FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
  1147. scm_list_2 (lower, upper), scm_list_1 (upper));
  1148. }
  1149. }
  1150. if (SCM_UNBNDP (base_cs))
  1151. cs = make_char_set (FUNC_NAME);
  1152. else
  1153. {
  1154. SCM_VALIDATE_SMOB (3, base_cs, charset);
  1155. if (reuse)
  1156. cs = base_cs;
  1157. else
  1158. cs = scm_char_set_copy (base_cs);
  1159. }
  1160. if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
  1161. && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
  1162. return cs;
  1163. if (clower > SCM_CODEPOINT_MAX)
  1164. clower = SCM_CODEPOINT_MAX;
  1165. if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
  1166. clower = SCM_CODEPOINT_SURROGATE_END + 1;
  1167. if (cupper > SCM_CODEPOINT_MAX)
  1168. cupper = SCM_CODEPOINT_MAX;
  1169. if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
  1170. cupper = SCM_CODEPOINT_SURROGATE_START - 1;
  1171. if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
  1172. {
  1173. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
  1174. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
  1175. }
  1176. else
  1177. scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
  1178. return cs;
  1179. }
  1180. SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
  1181. (SCM lower, SCM upper, SCM error, SCM base_cs),
  1182. "Return a character set containing all characters whose\n"
  1183. "character codes lie in the half-open range\n"
  1184. "[@var{lower},@var{upper}).\n"
  1185. "\n"
  1186. "If @var{error} is a true value, an error is signalled if the\n"
  1187. "specified range contains characters which are not valid\n"
  1188. "Unicode code points. If @var{error} is @code{#f},\n"
  1189. "these characters are silently left out of the resulting\n"
  1190. "character set.\n"
  1191. "\n"
  1192. "The characters in @var{base_cs} are added to the result, if\n"
  1193. "given.")
  1194. #define FUNC_NAME s_scm_ucs_range_to_char_set
  1195. {
  1196. return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
  1197. error, base_cs, 0);
  1198. }
  1199. #undef FUNC_NAME
  1200. SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
  1201. (SCM lower, SCM upper, SCM error, SCM base_cs),
  1202. "Return a character set containing all characters whose\n"
  1203. "character codes lie in the half-open range\n"
  1204. "[@var{lower},@var{upper}).\n"
  1205. "\n"
  1206. "If @var{error} is a true value, an error is signalled if the\n"
  1207. "specified range contains characters which are not contained in\n"
  1208. "the implemented character range. If @var{error} is @code{#f},\n"
  1209. "these characters are silently left out of the resulting\n"
  1210. "character set.\n"
  1211. "\n"
  1212. "The characters are added to @var{base_cs} and @var{base_cs} is\n"
  1213. "returned.")
  1214. #define FUNC_NAME s_scm_ucs_range_to_char_set_x
  1215. {
  1216. SCM_VALIDATE_SMOB (4, base_cs, charset);
  1217. return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
  1218. error, base_cs, 1);
  1219. }
  1220. #undef FUNC_NAME
  1221. SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
  1222. (SCM x),
  1223. "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
  1224. #define FUNC_NAME s_scm_to_char_set
  1225. {
  1226. if (scm_is_string (x))
  1227. return scm_string_to_char_set (x, SCM_UNDEFINED);
  1228. else if (SCM_CHARP (x))
  1229. return scm_char_set (scm_list_1 (x));
  1230. else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
  1231. return x;
  1232. else
  1233. scm_wrong_type_arg (NULL, 0, x);
  1234. }
  1235. #undef FUNC_NAME
  1236. SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
  1237. (SCM cs),
  1238. "Return the number of elements in character set @var{cs}.")
  1239. #define FUNC_NAME s_scm_char_set_size
  1240. {
  1241. int k, count = 0;
  1242. scm_t_char_set *cs_data;
  1243. SCM_VALIDATE_SMOB (1, cs, charset);
  1244. cs_data = SCM_CHARSET_DATA (cs);
  1245. if (cs_data->len == 0)
  1246. return scm_from_int (0);
  1247. for (k = 0; k < cs_data->len; k++)
  1248. count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
  1249. return scm_from_int (count);
  1250. }
  1251. #undef FUNC_NAME
  1252. SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
  1253. (SCM pred, SCM cs),
  1254. "Return the number of the elements int the character set\n"
  1255. "@var{cs} which satisfy the predicate @var{pred}.")
  1256. #define FUNC_NAME s_scm_char_set_count
  1257. {
  1258. int k, count = 0;
  1259. scm_t_wchar n;
  1260. scm_t_char_set *cs_data;
  1261. SCM_VALIDATE_PROC (1, pred);
  1262. SCM_VALIDATE_SMOB (2, cs, charset);
  1263. cs_data = SCM_CHARSET_DATA (cs);
  1264. if (cs_data->len == 0)
  1265. return scm_from_int (0);
  1266. for (k = 0; k < cs_data->len; k++)
  1267. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1268. {
  1269. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1270. if (scm_is_true (res))
  1271. count++;
  1272. }
  1273. return SCM_I_MAKINUM (count);
  1274. }
  1275. #undef FUNC_NAME
  1276. SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
  1277. (SCM cs),
  1278. "Return a list containing the elements of the character set\n"
  1279. "@var{cs}.")
  1280. #define FUNC_NAME s_scm_char_set_to_list
  1281. {
  1282. int k;
  1283. scm_t_wchar n;
  1284. SCM result = SCM_EOL;
  1285. scm_t_char_set *p;
  1286. SCM_VALIDATE_SMOB (1, cs, charset);
  1287. p = SCM_CHARSET_DATA (cs);
  1288. if (p->len == 0)
  1289. return SCM_EOL;
  1290. for (k = p->len - 1; k >= 0; k--)
  1291. for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
  1292. result = scm_cons (SCM_MAKE_CHAR (n), result);
  1293. return result;
  1294. }
  1295. #undef FUNC_NAME
  1296. SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
  1297. (SCM cs),
  1298. "Return a string containing the elements of the character set\n"
  1299. "@var{cs}. The order in which the characters are placed in the\n"
  1300. "string is not defined.")
  1301. #define FUNC_NAME s_scm_char_set_to_string
  1302. {
  1303. int k;
  1304. int count = 0;
  1305. int idx = 0;
  1306. int wide = 0;
  1307. SCM result;
  1308. scm_t_wchar n;
  1309. scm_t_char_set *cs_data;
  1310. char *buf;
  1311. scm_t_wchar *wbuf;
  1312. SCM_VALIDATE_SMOB (1, cs, charset);
  1313. cs_data = SCM_CHARSET_DATA (cs);
  1314. if (cs_data->len == 0)
  1315. return scm_nullstr;
  1316. if (cs_data->ranges[cs_data->len - 1].hi > 255)
  1317. wide = 1;
  1318. count = scm_to_int (scm_char_set_size (cs));
  1319. if (wide)
  1320. result = scm_i_make_wide_string (count, &wbuf, 0);
  1321. else
  1322. result = scm_i_make_string (count, &buf, 0);
  1323. for (k = 0; k < cs_data->len; k++)
  1324. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1325. {
  1326. if (wide)
  1327. wbuf[idx++] = n;
  1328. else
  1329. buf[idx++] = n;
  1330. }
  1331. return result;
  1332. }
  1333. #undef FUNC_NAME
  1334. SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
  1335. (SCM cs, SCM ch),
  1336. "Return @code{#t} iff the character @var{ch} is contained in the\n"
  1337. "character set @var{cs}.")
  1338. #define FUNC_NAME s_scm_char_set_contains_p
  1339. {
  1340. SCM_VALIDATE_SMOB (1, cs, charset);
  1341. SCM_VALIDATE_CHAR (2, ch);
  1342. return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
  1343. }
  1344. #undef FUNC_NAME
  1345. SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
  1346. (SCM pred, SCM cs),
  1347. "Return a true value if every character in the character set\n"
  1348. "@var{cs} satisfies the predicate @var{pred}.")
  1349. #define FUNC_NAME s_scm_char_set_every
  1350. {
  1351. int k;
  1352. scm_t_wchar n;
  1353. SCM res = SCM_BOOL_T;
  1354. scm_t_char_set *cs_data;
  1355. SCM_VALIDATE_PROC (1, pred);
  1356. SCM_VALIDATE_SMOB (2, cs, charset);
  1357. cs_data = SCM_CHARSET_DATA (cs);
  1358. if (cs_data->len == 0)
  1359. return SCM_BOOL_T;
  1360. for (k = 0; k < cs_data->len; k++)
  1361. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1362. {
  1363. res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1364. if (scm_is_false (res))
  1365. return res;
  1366. }
  1367. return SCM_BOOL_T;
  1368. }
  1369. #undef FUNC_NAME
  1370. SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
  1371. (SCM pred, SCM cs),
  1372. "Return a true value if any character in the character set\n"
  1373. "@var{cs} satisfies the predicate @var{pred}.")
  1374. #define FUNC_NAME s_scm_char_set_any
  1375. {
  1376. int k;
  1377. scm_t_wchar n;
  1378. scm_t_char_set *cs_data;
  1379. SCM_VALIDATE_PROC (1, pred);
  1380. SCM_VALIDATE_SMOB (2, cs, charset);
  1381. cs_data = SCM_CHARSET_DATA (cs);
  1382. if (cs_data->len == 0)
  1383. return SCM_BOOL_T;
  1384. for (k = 0; k < cs_data->len; k++)
  1385. for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
  1386. {
  1387. SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
  1388. if (scm_is_true (res))
  1389. return res;
  1390. }
  1391. return SCM_BOOL_F;
  1392. }
  1393. #undef FUNC_NAME
  1394. SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
  1395. (SCM cs, SCM rest),
  1396. "Add all character arguments to the first argument, which must\n"
  1397. "be a character set.")
  1398. #define FUNC_NAME s_scm_char_set_adjoin
  1399. {
  1400. SCM_VALIDATE_SMOB (1, cs, charset);
  1401. SCM_VALIDATE_REST_ARGUMENT (rest);
  1402. cs = scm_char_set_copy (cs);
  1403. while (!scm_is_null (rest))
  1404. {
  1405. SCM chr = SCM_CAR (rest);
  1406. scm_t_wchar c;
  1407. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1408. rest = SCM_CDR (rest);
  1409. SCM_CHARSET_SET (cs, c);
  1410. }
  1411. return cs;
  1412. }
  1413. #undef FUNC_NAME
  1414. SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
  1415. (SCM cs, SCM rest),
  1416. "Delete all character arguments from the first argument, which\n"
  1417. "must be a character set.")
  1418. #define FUNC_NAME s_scm_char_set_delete
  1419. {
  1420. SCM_VALIDATE_SMOB (1, cs, charset);
  1421. SCM_VALIDATE_REST_ARGUMENT (rest);
  1422. cs = scm_char_set_copy (cs);
  1423. while (!scm_is_null (rest))
  1424. {
  1425. SCM chr = SCM_CAR (rest);
  1426. scm_t_wchar c;
  1427. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1428. rest = SCM_CDR (rest);
  1429. SCM_CHARSET_UNSET (cs, c);
  1430. }
  1431. return cs;
  1432. }
  1433. #undef FUNC_NAME
  1434. SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
  1435. (SCM cs, SCM rest),
  1436. "Add all character arguments to the first argument, which must\n"
  1437. "be a character set.")
  1438. #define FUNC_NAME s_scm_char_set_adjoin_x
  1439. {
  1440. SCM_VALIDATE_SMOB (1, cs, charset);
  1441. SCM_VALIDATE_REST_ARGUMENT (rest);
  1442. while (!scm_is_null (rest))
  1443. {
  1444. SCM chr = SCM_CAR (rest);
  1445. scm_t_wchar c;
  1446. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1447. rest = SCM_CDR (rest);
  1448. SCM_CHARSET_SET (cs, c);
  1449. }
  1450. return cs;
  1451. }
  1452. #undef FUNC_NAME
  1453. SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
  1454. (SCM cs, SCM rest),
  1455. "Delete all character arguments from the first argument, which\n"
  1456. "must be a character set.")
  1457. #define FUNC_NAME s_scm_char_set_delete_x
  1458. {
  1459. SCM_VALIDATE_SMOB (1, cs, charset);
  1460. SCM_VALIDATE_REST_ARGUMENT (rest);
  1461. while (!scm_is_null (rest))
  1462. {
  1463. SCM chr = SCM_CAR (rest);
  1464. scm_t_wchar c;
  1465. SCM_VALIDATE_CHAR_COPY (1, chr, c);
  1466. rest = SCM_CDR (rest);
  1467. SCM_CHARSET_UNSET (cs, c);
  1468. }
  1469. return cs;
  1470. }
  1471. #undef FUNC_NAME
  1472. SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
  1473. (SCM cs), "Return the complement of the character set @var{cs}.")
  1474. #define FUNC_NAME s_scm_char_set_complement
  1475. {
  1476. SCM res;
  1477. scm_t_char_set *p, *q;
  1478. SCM_VALIDATE_SMOB (1, cs, charset);
  1479. res = make_char_set (FUNC_NAME);
  1480. p = SCM_CHARSET_DATA (res);
  1481. q = SCM_CHARSET_DATA (cs);
  1482. charsets_complement (p, q);
  1483. return res;
  1484. }
  1485. #undef FUNC_NAME
  1486. SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
  1487. (SCM rest),
  1488. "Return the union of all argument character sets.")
  1489. #define FUNC_NAME s_scm_char_set_union
  1490. {
  1491. int c = 1;
  1492. SCM res;
  1493. scm_t_char_set *p;
  1494. SCM_VALIDATE_REST_ARGUMENT (rest);
  1495. res = make_char_set (FUNC_NAME);
  1496. p = SCM_CHARSET_DATA (res);
  1497. while (!scm_is_null (rest))
  1498. {
  1499. SCM cs = SCM_CAR (rest);
  1500. SCM_VALIDATE_SMOB (c, cs, charset);
  1501. c++;
  1502. rest = SCM_CDR (rest);
  1503. charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
  1504. }
  1505. return res;
  1506. }
  1507. #undef FUNC_NAME
  1508. SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
  1509. (SCM rest),
  1510. "Return the intersection of all argument character sets.")
  1511. #define FUNC_NAME s_scm_char_set_intersection
  1512. {
  1513. SCM res;
  1514. SCM_VALIDATE_REST_ARGUMENT (rest);
  1515. if (scm_is_null (rest))
  1516. res = make_char_set (FUNC_NAME);
  1517. else
  1518. {
  1519. scm_t_char_set *p;
  1520. int argnum = 2;
  1521. res = scm_char_set_copy (SCM_CAR (rest));
  1522. p = SCM_CHARSET_DATA (res);
  1523. rest = SCM_CDR (rest);
  1524. while (scm_is_pair (rest))
  1525. {
  1526. SCM cs = SCM_CAR (rest);
  1527. scm_t_char_set *cs_data;
  1528. SCM_VALIDATE_SMOB (argnum, cs, charset);
  1529. argnum++;
  1530. cs_data = SCM_CHARSET_DATA (cs);
  1531. rest = SCM_CDR (rest);
  1532. charsets_intersection (p, cs_data);
  1533. }
  1534. }
  1535. return res;
  1536. }
  1537. #undef FUNC_NAME
  1538. SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
  1539. (SCM cs1, SCM rest),
  1540. "Return the difference of all argument character sets.")
  1541. #define FUNC_NAME s_scm_char_set_difference
  1542. {
  1543. int c = 2;
  1544. SCM res, compl;
  1545. scm_t_char_set *p, *q;
  1546. SCM_VALIDATE_SMOB (1, cs1, charset);
  1547. SCM_VALIDATE_REST_ARGUMENT (rest);
  1548. res = scm_char_set_copy (cs1);
  1549. p = SCM_CHARSET_DATA (res);
  1550. compl = make_char_set (FUNC_NAME);
  1551. q = SCM_CHARSET_DATA (compl);
  1552. while (!scm_is_null (rest))
  1553. {
  1554. SCM cs = SCM_CAR (rest);
  1555. SCM_VALIDATE_SMOB (c, cs, charset);
  1556. c++;
  1557. rest = SCM_CDR (rest);
  1558. charsets_complement (q, SCM_CHARSET_DATA (cs));
  1559. charsets_intersection (p, q);
  1560. }
  1561. return res;
  1562. }
  1563. #undef FUNC_NAME
  1564. SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
  1565. (SCM rest),
  1566. "Return the exclusive-or of all argument character sets.")
  1567. #define FUNC_NAME s_scm_char_set_xor
  1568. {
  1569. SCM res;
  1570. SCM_VALIDATE_REST_ARGUMENT (rest);
  1571. if (scm_is_null (rest))
  1572. res = make_char_set (FUNC_NAME);
  1573. else
  1574. {
  1575. int argnum = 2;
  1576. scm_t_char_set *p;
  1577. res = scm_char_set_copy (SCM_CAR (rest));
  1578. p = SCM_CHARSET_DATA (res);
  1579. rest = SCM_CDR (rest);
  1580. while (scm_is_pair (rest))
  1581. {
  1582. SCM cs = SCM_CAR (rest);
  1583. scm_t_char_set *cs_data;
  1584. SCM_VALIDATE_SMOB (argnum, cs, charset);
  1585. argnum++;
  1586. cs_data = SCM_CHARSET_DATA (cs);
  1587. rest = SCM_CDR (rest);
  1588. charsets_xor (p, cs_data);
  1589. }
  1590. }
  1591. return res;
  1592. }
  1593. #undef FUNC_NAME
  1594. SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
  1595. (SCM cs1, SCM rest),
  1596. "Return the difference and the intersection of all argument\n"
  1597. "character sets.")
  1598. #define FUNC_NAME s_scm_char_set_diff_plus_intersection
  1599. {
  1600. int c = 2;
  1601. SCM res1, res2;
  1602. scm_t_char_set *p, *q;
  1603. SCM_VALIDATE_SMOB (1, cs1, charset);
  1604. SCM_VALIDATE_REST_ARGUMENT (rest);
  1605. res1 = scm_char_set_copy (cs1);
  1606. res2 = make_char_set (FUNC_NAME);
  1607. p = SCM_CHARSET_DATA (res1);
  1608. q = SCM_CHARSET_DATA (res2);
  1609. while (!scm_is_null (rest))
  1610. {
  1611. SCM cs = SCM_CAR (rest);
  1612. scm_t_char_set *r;
  1613. SCM_VALIDATE_SMOB (c, cs, charset);
  1614. c++;
  1615. r = SCM_CHARSET_DATA (cs);
  1616. charsets_union (q, r);
  1617. charsets_intersection (p, r);
  1618. rest = SCM_CDR (rest);
  1619. }
  1620. return scm_values (scm_list_2 (res1, res2));
  1621. }
  1622. #undef FUNC_NAME
  1623. SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
  1624. (SCM cs), "Return the complement of the character set @var{cs}.")
  1625. #define FUNC_NAME s_scm_char_set_complement_x
  1626. {
  1627. SCM_VALIDATE_SMOB (1, cs, charset);
  1628. cs = scm_char_set_complement (cs);
  1629. return cs;
  1630. }
  1631. #undef FUNC_NAME
  1632. SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
  1633. (SCM cs1, SCM rest),
  1634. "Return the union of all argument character sets.")
  1635. #define FUNC_NAME s_scm_char_set_union_x
  1636. {
  1637. SCM_VALIDATE_SMOB (1, cs1, charset);
  1638. SCM_VALIDATE_REST_ARGUMENT (rest);
  1639. cs1 = scm_char_set_union (scm_cons (cs1, rest));
  1640. return cs1;
  1641. }
  1642. #undef FUNC_NAME
  1643. SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
  1644. (SCM cs1, SCM rest),
  1645. "Return the intersection of all argument character sets.")
  1646. #define FUNC_NAME s_scm_char_set_intersection_x
  1647. {
  1648. SCM_VALIDATE_SMOB (1, cs1, charset);
  1649. SCM_VALIDATE_REST_ARGUMENT (rest);
  1650. cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
  1651. return cs1;
  1652. }
  1653. #undef FUNC_NAME
  1654. SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
  1655. (SCM cs1, SCM rest),
  1656. "Return the difference of all argument character sets.")
  1657. #define FUNC_NAME s_scm_char_set_difference_x
  1658. {
  1659. SCM_VALIDATE_SMOB (1, cs1, charset);
  1660. SCM_VALIDATE_REST_ARGUMENT (rest);
  1661. cs1 = scm_char_set_difference (cs1, rest);
  1662. return cs1;
  1663. }
  1664. #undef FUNC_NAME
  1665. SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
  1666. (SCM cs1, SCM rest),
  1667. "Return the exclusive-or of all argument character sets.")
  1668. #define FUNC_NAME s_scm_char_set_xor_x
  1669. {
  1670. /* a side-effecting variant should presumably give consistent results:
  1671. (define a (char-set #\a))
  1672. (char-set-xor a a a) -> char set #\a
  1673. (char-set-xor! a a a) -> char set #\a
  1674. */
  1675. cs1 = scm_char_set_xor (scm_cons (cs1, rest));
  1676. return cs1;
  1677. }
  1678. #undef FUNC_NAME
  1679. SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
  1680. "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
  1681. SCM rest),
  1682. "Return the difference and the intersection of all argument\n"
  1683. "character sets.")
  1684. #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
  1685. {
  1686. SCM diff, intersect;
  1687. diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
  1688. intersect =
  1689. scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
  1690. cs1 = diff;
  1691. cs2 = intersect;
  1692. return scm_values (scm_list_2 (cs1, cs2));
  1693. }
  1694. #undef FUNC_NAME
  1695. /* Standard character sets. */
  1696. SCM scm_char_set_lower_case;
  1697. SCM scm_char_set_upper_case;
  1698. SCM scm_char_set_title_case;
  1699. SCM scm_char_set_letter;
  1700. SCM scm_char_set_digit;
  1701. SCM scm_char_set_letter_and_digit;
  1702. SCM scm_char_set_graphic;
  1703. SCM scm_char_set_printing;
  1704. SCM scm_char_set_whitespace;
  1705. SCM scm_char_set_iso_control;
  1706. SCM scm_char_set_punctuation;
  1707. SCM scm_char_set_symbol;
  1708. SCM scm_char_set_hex_digit;
  1709. SCM scm_char_set_blank;
  1710. SCM scm_char_set_ascii;
  1711. SCM scm_char_set_empty;
  1712. SCM scm_char_set_designated;
  1713. SCM scm_char_set_full;
  1714. /* Create an empty character set and return it after binding it to NAME. */
  1715. static inline SCM
  1716. define_charset (const char *name, const scm_t_char_set *p)
  1717. {
  1718. SCM cs;
  1719. SCM_NEWSMOB (cs, scm_tc16_charset, p);
  1720. scm_c_define (name, cs);
  1721. return cs;
  1722. }
  1723. SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
  1724. "Returns an association list containing debugging information\n"
  1725. "for @var{charset}. The association list has the following entries."
  1726. "@table @code\n"
  1727. "@item char-set\n"
  1728. "The char-set itself.\n"
  1729. "@item len\n"
  1730. "The number of character ranges the char-set contains\n"
  1731. "@item ranges\n"
  1732. "A list of lists where each sublist a range of code points\n"
  1733. "and their associated characters"
  1734. "@end table")
  1735. #define FUNC_NAME s_scm_sys_char_set_dump
  1736. {
  1737. SCM e1, e2, e3;
  1738. SCM ranges = SCM_EOL, elt;
  1739. size_t i;
  1740. scm_t_char_set *cs;
  1741. char codepoint_string_lo[11], codepoint_string_hi[11];
  1742. SCM_VALIDATE_SMOB (1, charset, charset);
  1743. cs = SCM_CHARSET_DATA (charset);
  1744. e1 = scm_cons (scm_from_latin1_symbol ("char-set"),
  1745. charset);
  1746. e2 = scm_cons (scm_from_latin1_symbol ("n"),
  1747. scm_from_size_t (cs->len));
  1748. for (i = 0; i < cs->len; i++)
  1749. {
  1750. if (cs->ranges[i].lo > 0xFFFF)
  1751. sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
  1752. else
  1753. sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
  1754. if (cs->ranges[i].hi > 0xFFFF)
  1755. sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
  1756. else
  1757. sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
  1758. elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
  1759. SCM_MAKE_CHAR (cs->ranges[i].hi),
  1760. scm_from_locale_string (codepoint_string_lo),
  1761. scm_from_locale_string (codepoint_string_hi));
  1762. ranges = scm_append (scm_list_2 (ranges,
  1763. scm_list_1 (elt)));
  1764. }
  1765. e3 = scm_cons (scm_from_latin1_symbol ("ranges"),
  1766. ranges);
  1767. return scm_list_3 (e1, e2, e3);
  1768. }
  1769. #undef FUNC_NAME
  1770. void
  1771. scm_init_srfi_14 (void)
  1772. {
  1773. scm_tc16_charset = scm_make_smob_type ("character-set", 0);
  1774. scm_set_smob_print (scm_tc16_charset, charset_print);
  1775. scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
  1776. scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
  1777. scm_char_set_upper_case =
  1778. define_charset ("char-set:upper-case", &cs_upper_case);
  1779. scm_char_set_lower_case =
  1780. define_charset ("char-set:lower-case", &cs_lower_case);
  1781. scm_char_set_title_case =
  1782. define_charset ("char-set:title-case", &cs_title_case);
  1783. scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
  1784. scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
  1785. scm_char_set_letter_and_digit =
  1786. define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
  1787. scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
  1788. scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
  1789. scm_char_set_whitespace =
  1790. define_charset ("char-set:whitespace", &cs_whitespace);
  1791. scm_char_set_iso_control =
  1792. define_charset ("char-set:iso-control", &cs_iso_control);
  1793. scm_char_set_punctuation =
  1794. define_charset ("char-set:punctuation", &cs_punctuation);
  1795. scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
  1796. scm_char_set_hex_digit =
  1797. define_charset ("char-set:hex-digit", &cs_hex_digit);
  1798. scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
  1799. scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
  1800. scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
  1801. scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
  1802. scm_char_set_full = define_charset ("char-set:full", &cs_full);
  1803. #include "libguile/srfi-14.x"
  1804. }
  1805. /* End of srfi-14.c. */