chars.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. /* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2011,2014,2018-2019
  2. Free Software Foundation, Inc.
  3. This file is part of Guile.
  4. Guile is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published
  6. by the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. Guile is distributed in the hope that it will be useful, but WITHOUT
  9. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with Guile. If not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #ifdef HAVE_CONFIG_H
  16. # include <config.h>
  17. #endif
  18. #include <ctype.h>
  19. #include <limits.h>
  20. #include <string.h>
  21. #include <unicase.h>
  22. #include <unictype.h>
  23. #include "boolean.h"
  24. #include "gsubr.h"
  25. #include "numbers.h"
  26. #include "pairs.h"
  27. #include "srfi-14.h"
  28. #include "symbols.h"
  29. #include "chars.h"
  30. SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
  31. (SCM x),
  32. "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
  33. #define FUNC_NAME s_scm_char_p
  34. {
  35. return scm_from_bool (SCM_CHARP(x));
  36. }
  37. #undef FUNC_NAME
  38. static SCM scm_i_char_eq_p (SCM x, SCM y, SCM rest);
  39. SCM_DEFINE (scm_i_char_eq_p, "char=?", 0, 2, 1,
  40. (SCM x, SCM y, SCM rest),
  41. "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
  42. "code point of @var{y}, else @code{#f}.\n")
  43. #define FUNC_NAME s_scm_i_char_eq_p
  44. {
  45. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  46. return SCM_BOOL_T;
  47. while (!scm_is_null (rest))
  48. {
  49. if (scm_is_false (scm_char_eq_p (x, y)))
  50. return SCM_BOOL_F;
  51. x = y;
  52. y = scm_car (rest);
  53. rest = scm_cdr (rest);
  54. }
  55. return scm_char_eq_p (x, y);
  56. }
  57. #undef FUNC_NAME
  58. SCM scm_char_eq_p (SCM x, SCM y)
  59. #define FUNC_NAME s_scm_i_char_eq_p
  60. {
  61. SCM_VALIDATE_CHAR (1, x);
  62. SCM_VALIDATE_CHAR (2, y);
  63. return scm_from_bool (scm_is_eq (x, y));
  64. }
  65. #undef FUNC_NAME
  66. static SCM scm_i_char_less_p (SCM x, SCM y, SCM rest);
  67. SCM_DEFINE (scm_i_char_less_p, "char<?", 0, 2, 1,
  68. (SCM x, SCM y, SCM rest),
  69. "Return @code{#t} iff the code point of @var{x} is less than the code\n"
  70. "point of @var{y}, else @code{#f}.")
  71. #define FUNC_NAME s_scm_i_char_less_p
  72. {
  73. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  74. return SCM_BOOL_T;
  75. while (!scm_is_null (rest))
  76. {
  77. if (scm_is_false (scm_char_less_p (x, y)))
  78. return SCM_BOOL_F;
  79. x = y;
  80. y = scm_car (rest);
  81. rest = scm_cdr (rest);
  82. }
  83. return scm_char_less_p (x, y);
  84. }
  85. #undef FUNC_NAME
  86. SCM scm_char_less_p (SCM x, SCM y)
  87. #define FUNC_NAME s_scm_i_char_less_p
  88. {
  89. SCM_VALIDATE_CHAR (1, x);
  90. SCM_VALIDATE_CHAR (2, y);
  91. return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
  92. }
  93. #undef FUNC_NAME
  94. static SCM scm_i_char_leq_p (SCM x, SCM y, SCM rest);
  95. SCM_DEFINE (scm_i_char_leq_p, "char<=?", 0, 2, 1,
  96. (SCM x, SCM y, SCM rest),
  97. "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
  98. "equal to the code point of @var{y}, else @code{#f}.")
  99. #define FUNC_NAME s_scm_i_char_leq_p
  100. {
  101. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  102. return SCM_BOOL_T;
  103. while (!scm_is_null (rest))
  104. {
  105. if (scm_is_false (scm_char_leq_p (x, y)))
  106. return SCM_BOOL_F;
  107. x = y;
  108. y = scm_car (rest);
  109. rest = scm_cdr (rest);
  110. }
  111. return scm_char_leq_p (x, y);
  112. }
  113. #undef FUNC_NAME
  114. SCM scm_char_leq_p (SCM x, SCM y)
  115. #define FUNC_NAME s_scm_i_char_leq_p
  116. {
  117. SCM_VALIDATE_CHAR (1, x);
  118. SCM_VALIDATE_CHAR (2, y);
  119. return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
  120. }
  121. #undef FUNC_NAME
  122. static SCM scm_i_char_gr_p (SCM x, SCM y, SCM rest);
  123. SCM_DEFINE (scm_i_char_gr_p, "char>?", 0, 2, 1,
  124. (SCM x, SCM y, SCM rest),
  125. "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
  126. "the code point of @var{y}, else @code{#f}.")
  127. #define FUNC_NAME s_scm_i_char_gr_p
  128. {
  129. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  130. return SCM_BOOL_T;
  131. while (!scm_is_null (rest))
  132. {
  133. if (scm_is_false (scm_char_gr_p (x, y)))
  134. return SCM_BOOL_F;
  135. x = y;
  136. y = scm_car (rest);
  137. rest = scm_cdr (rest);
  138. }
  139. return scm_char_gr_p (x, y);
  140. }
  141. #undef FUNC_NAME
  142. SCM scm_char_gr_p (SCM x, SCM y)
  143. #define FUNC_NAME s_scm_i_char_gr_p
  144. {
  145. SCM_VALIDATE_CHAR (1, x);
  146. SCM_VALIDATE_CHAR (2, y);
  147. return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
  148. }
  149. #undef FUNC_NAME
  150. static SCM scm_i_char_geq_p (SCM x, SCM y, SCM rest);
  151. SCM_DEFINE (scm_i_char_geq_p, "char>=?", 0, 2, 1,
  152. (SCM x, SCM y, SCM rest),
  153. "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
  154. "or equal to the code point of @var{y}, else @code{#f}.")
  155. #define FUNC_NAME s_scm_i_char_geq_p
  156. {
  157. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  158. return SCM_BOOL_T;
  159. while (!scm_is_null (rest))
  160. {
  161. if (scm_is_false (scm_char_geq_p (x, y)))
  162. return SCM_BOOL_F;
  163. x = y;
  164. y = scm_car (rest);
  165. rest = scm_cdr (rest);
  166. }
  167. return scm_char_geq_p (x, y);
  168. }
  169. #undef FUNC_NAME
  170. SCM scm_char_geq_p (SCM x, SCM y)
  171. #define FUNC_NAME s_scm_i_char_geq_p
  172. {
  173. SCM_VALIDATE_CHAR (1, x);
  174. SCM_VALIDATE_CHAR (2, y);
  175. return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
  176. }
  177. #undef FUNC_NAME
  178. /* FIXME?: R6RS specifies that these comparisons are case-folded.
  179. This is the same thing as comparing the uppercase characters in
  180. practice, but, not in theory. Unicode has table containing their
  181. definition of case-folded character mappings. A more correct
  182. implementation would be to use that table and make a char-foldcase
  183. function. */
  184. static SCM scm_i_char_ci_eq_p (SCM x, SCM y, SCM rest);
  185. SCM_DEFINE (scm_i_char_ci_eq_p, "char-ci=?", 0, 2, 1,
  186. (SCM x, SCM y, SCM rest),
  187. "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
  188. "the same as the case-folded code point of @var{y}, else @code{#f}.")
  189. #define FUNC_NAME s_scm_i_char_ci_eq_p
  190. {
  191. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  192. return SCM_BOOL_T;
  193. while (!scm_is_null (rest))
  194. {
  195. if (scm_is_false (scm_char_ci_eq_p (x, y)))
  196. return SCM_BOOL_F;
  197. x = y;
  198. y = scm_car (rest);
  199. rest = scm_cdr (rest);
  200. }
  201. return scm_char_ci_eq_p (x, y);
  202. }
  203. #undef FUNC_NAME
  204. SCM scm_char_ci_eq_p (SCM x, SCM y)
  205. #define FUNC_NAME s_scm_i_char_ci_eq_p
  206. {
  207. SCM_VALIDATE_CHAR (1, x);
  208. SCM_VALIDATE_CHAR (2, y);
  209. return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
  210. }
  211. #undef FUNC_NAME
  212. static SCM scm_i_char_ci_less_p (SCM x, SCM y, SCM rest);
  213. SCM_DEFINE (scm_i_char_ci_less_p, "char-ci<?", 0, 2, 1,
  214. (SCM x, SCM y, SCM rest),
  215. "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
  216. "less than the case-folded code point of @var{y}, else @code{#f}.")
  217. #define FUNC_NAME s_scm_i_char_ci_less_p
  218. {
  219. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  220. return SCM_BOOL_T;
  221. while (!scm_is_null (rest))
  222. {
  223. if (scm_is_false (scm_char_ci_less_p (x, y)))
  224. return SCM_BOOL_F;
  225. x = y;
  226. y = scm_car (rest);
  227. rest = scm_cdr (rest);
  228. }
  229. return scm_char_ci_less_p (x, y);
  230. }
  231. #undef FUNC_NAME
  232. SCM scm_char_ci_less_p (SCM x, SCM y)
  233. #define FUNC_NAME s_scm_i_char_ci_less_p
  234. {
  235. SCM_VALIDATE_CHAR (1, x);
  236. SCM_VALIDATE_CHAR (2, y);
  237. return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
  238. }
  239. #undef FUNC_NAME
  240. static SCM scm_i_char_ci_leq_p (SCM x, SCM y, SCM rest);
  241. SCM_DEFINE (scm_i_char_ci_leq_p, "char-ci<=?", 0, 2, 1,
  242. (SCM x, SCM y, SCM rest),
  243. "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
  244. "less than or equal to the case-folded code point of @var{y}, else\n"
  245. "@code{#f}")
  246. #define FUNC_NAME s_scm_i_char_ci_leq_p
  247. {
  248. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  249. return SCM_BOOL_T;
  250. while (!scm_is_null (rest))
  251. {
  252. if (scm_is_false (scm_char_ci_leq_p (x, y)))
  253. return SCM_BOOL_F;
  254. x = y;
  255. y = scm_car (rest);
  256. rest = scm_cdr (rest);
  257. }
  258. return scm_char_ci_leq_p (x, y);
  259. }
  260. #undef FUNC_NAME
  261. SCM scm_char_ci_leq_p (SCM x, SCM y)
  262. #define FUNC_NAME s_scm_i_char_ci_leq_p
  263. {
  264. SCM_VALIDATE_CHAR (1, x);
  265. SCM_VALIDATE_CHAR (2, y);
  266. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
  267. }
  268. #undef FUNC_NAME
  269. static SCM scm_i_char_ci_gr_p (SCM x, SCM y, SCM rest);
  270. SCM_DEFINE (scm_i_char_ci_gr_p, "char-ci>?", 0, 2, 1,
  271. (SCM x, SCM y, SCM rest),
  272. "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
  273. "than the case-folded code point of @var{y}, else @code{#f}.")
  274. #define FUNC_NAME s_scm_i_char_ci_gr_p
  275. {
  276. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  277. return SCM_BOOL_T;
  278. while (!scm_is_null (rest))
  279. {
  280. if (scm_is_false (scm_char_ci_gr_p (x, y)))
  281. return SCM_BOOL_F;
  282. x = y;
  283. y = scm_car (rest);
  284. rest = scm_cdr (rest);
  285. }
  286. return scm_char_ci_gr_p (x, y);
  287. }
  288. #undef FUNC_NAME
  289. SCM scm_char_ci_gr_p (SCM x, SCM y)
  290. #define FUNC_NAME s_scm_i_char_ci_gr_p
  291. {
  292. SCM_VALIDATE_CHAR (1, x);
  293. SCM_VALIDATE_CHAR (2, y);
  294. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
  295. }
  296. #undef FUNC_NAME
  297. static SCM scm_i_char_ci_geq_p (SCM x, SCM y, SCM rest);
  298. SCM_DEFINE (scm_i_char_ci_geq_p, "char-ci>=?", 0, 2, 1,
  299. (SCM x, SCM y, SCM rest),
  300. "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
  301. "greater than or equal to the case-folded code point of @var{y}, else\n"
  302. "@code{#f}.")
  303. #define FUNC_NAME s_scm_i_char_ci_geq_p
  304. {
  305. if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  306. return SCM_BOOL_T;
  307. while (!scm_is_null (rest))
  308. {
  309. if (scm_is_false (scm_char_ci_geq_p (x, y)))
  310. return SCM_BOOL_F;
  311. x = y;
  312. y = scm_car (rest);
  313. rest = scm_cdr (rest);
  314. }
  315. return scm_char_ci_geq_p (x, y);
  316. }
  317. #undef FUNC_NAME
  318. SCM scm_char_ci_geq_p (SCM x, SCM y)
  319. #define FUNC_NAME s_scm_i_char_ci_geq_p
  320. {
  321. SCM_VALIDATE_CHAR (1, x);
  322. SCM_VALIDATE_CHAR (2, y);
  323. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
  324. }
  325. #undef FUNC_NAME
  326. SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
  327. (SCM chr),
  328. "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
  329. #define FUNC_NAME s_scm_char_alphabetic_p
  330. {
  331. return scm_char_set_contains_p (scm_char_set_letter, chr);
  332. }
  333. #undef FUNC_NAME
  334. SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
  335. (SCM chr),
  336. "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
  337. #define FUNC_NAME s_scm_char_numeric_p
  338. {
  339. return scm_char_set_contains_p (scm_char_set_digit, chr);
  340. }
  341. #undef FUNC_NAME
  342. SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
  343. (SCM chr),
  344. "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
  345. #define FUNC_NAME s_scm_char_whitespace_p
  346. {
  347. return scm_char_set_contains_p (scm_char_set_whitespace, chr);
  348. }
  349. #undef FUNC_NAME
  350. SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
  351. (SCM chr),
  352. "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
  353. #define FUNC_NAME s_scm_char_upper_case_p
  354. {
  355. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  356. }
  357. #undef FUNC_NAME
  358. SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
  359. (SCM chr),
  360. "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
  361. #define FUNC_NAME s_scm_char_lower_case_p
  362. {
  363. return scm_char_set_contains_p (scm_char_set_lower_case, chr);
  364. }
  365. #undef FUNC_NAME
  366. SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
  367. (SCM chr),
  368. "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
  369. "@code{#f}.\n")
  370. #define FUNC_NAME s_scm_char_is_both_p
  371. {
  372. if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
  373. return SCM_BOOL_T;
  374. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  375. }
  376. #undef FUNC_NAME
  377. SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
  378. (SCM chr),
  379. "Return the Unicode code point of @var{chr}.")
  380. #define FUNC_NAME s_scm_char_to_integer
  381. {
  382. SCM_VALIDATE_CHAR (1, chr);
  383. return scm_from_uint32 (SCM_CHAR(chr));
  384. }
  385. #undef FUNC_NAME
  386. SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
  387. (SCM n),
  388. "Return the character that has Unicode code point @var{n}. The integer\n"
  389. "@var{n} must be a valid code point. Valid code points are in the\n"
  390. "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
  391. "@code{#x10FFFF} inclusive.")
  392. #define FUNC_NAME s_scm_integer_to_char
  393. {
  394. scm_t_wchar cn;
  395. cn = scm_to_wchar (n);
  396. /* Avoid the surrogates. */
  397. if (!SCM_IS_UNICODE_CHAR (cn))
  398. scm_out_of_range (FUNC_NAME, n);
  399. return SCM_MAKE_CHAR (cn);
  400. }
  401. #undef FUNC_NAME
  402. SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
  403. (SCM chr),
  404. "Return the uppercase character version of @var{chr}.")
  405. #define FUNC_NAME s_scm_char_upcase
  406. {
  407. SCM_VALIDATE_CHAR (1, chr);
  408. return scm_c_make_char (scm_c_upcase (SCM_CHAR (chr)));
  409. }
  410. #undef FUNC_NAME
  411. SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
  412. (SCM chr),
  413. "Return the lowercase character version of @var{chr}.")
  414. #define FUNC_NAME s_scm_char_downcase
  415. {
  416. SCM_VALIDATE_CHAR (1, chr);
  417. return scm_c_make_char (scm_c_downcase (SCM_CHAR(chr)));
  418. }
  419. #undef FUNC_NAME
  420. SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
  421. (SCM chr),
  422. "Return the titlecase character version of @var{chr}.")
  423. #define FUNC_NAME s_scm_char_titlecase
  424. {
  425. SCM_VALIDATE_CHAR (1, chr);
  426. return scm_c_make_char (scm_c_titlecase (SCM_CHAR(chr)));
  427. }
  428. #undef FUNC_NAME
  429. SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
  430. (SCM chr),
  431. "Return a symbol representing the Unicode general category of "
  432. "@var{chr} or @code{#f} if a named category cannot be found.")
  433. #define FUNC_NAME s_scm_char_general_category
  434. {
  435. const char *sym;
  436. uc_general_category_t cat;
  437. SCM_VALIDATE_CHAR (1, chr);
  438. cat = uc_general_category (SCM_CHAR (chr));
  439. sym = uc_general_category_name (cat);
  440. if (sym != NULL)
  441. return scm_from_utf8_symbol (sym);
  442. return SCM_BOOL_F;
  443. }
  444. #undef FUNC_NAME
  445. /*
  446. TODO: change name to scm_i_.. ? --hwn
  447. */
  448. scm_t_wchar
  449. scm_c_upcase (scm_t_wchar c)
  450. {
  451. return uc_toupper ((int) c);
  452. }
  453. scm_t_wchar
  454. scm_c_downcase (scm_t_wchar c)
  455. {
  456. return uc_tolower ((int) c);
  457. }
  458. scm_t_wchar
  459. scm_c_titlecase (scm_t_wchar c)
  460. {
  461. return uc_totitle ((int) c);
  462. }
  463. /* There are a few sets of character names: R5RS, Guile
  464. extensions for control characters, and leftover Guile extensions.
  465. They are listed in order of precedence. */
  466. static const char *const scm_r5rs_charnames[] = {
  467. "space", "newline"
  468. };
  469. static const uint32_t scm_r5rs_charnums[] = {
  470. 0x20, 0x0a
  471. };
  472. #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
  473. static const char *const scm_r6rs_charnames[] = {
  474. "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
  475. "return", "esc", "delete"
  476. /* 'space' and 'newline' are already included from the R5RS list. */
  477. };
  478. static const uint32_t scm_r6rs_charnums[] = {
  479. 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
  480. 0x0d, 0x1b, 0x7f
  481. };
  482. #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
  483. static const char *const scm_r7rs_charnames[] = {
  484. "escape"
  485. };
  486. static const uint32_t scm_r7rs_charnums[] = {
  487. 0x1b
  488. };
  489. #define SCM_N_R7RS_CHARNAMES (sizeof (scm_r7rs_charnames) / sizeof (char *))
  490. /* The abbreviated names for control characters. */
  491. static const char *const scm_C0_control_charnames[] = {
  492. /* C0 controls */
  493. "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
  494. "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
  495. "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
  496. "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
  497. "sp", "del"
  498. };
  499. static const uint32_t scm_C0_control_charnums[] = {
  500. 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  501. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  502. 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  503. 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  504. 0x20, 0x7f
  505. };
  506. #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
  507. static const char *const scm_alt_charnames[] = {
  508. "null", "nl", "np"
  509. };
  510. static const uint32_t scm_alt_charnums[] = {
  511. 0x00, 0x0a, 0x0c
  512. };
  513. #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
  514. /* Returns the string charname for a character if it exists, or NULL
  515. otherwise. */
  516. const char *
  517. scm_i_charname (SCM chr)
  518. {
  519. size_t c;
  520. uint32_t i = SCM_CHAR (chr);
  521. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  522. if (scm_r5rs_charnums[c] == i)
  523. return scm_r5rs_charnames[c];
  524. for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
  525. if (scm_r6rs_charnums[c] == i)
  526. return scm_r6rs_charnames[c];
  527. for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++)
  528. if (scm_r7rs_charnums[c] == i)
  529. return scm_r7rs_charnames[c];
  530. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  531. if (scm_C0_control_charnums[c] == i)
  532. return scm_C0_control_charnames[c];
  533. /* Since the characters in scm_alt_charnums is a subset of
  534. scm_C0_control_charnums, this code is never reached. */
  535. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  536. if (scm_alt_charnums[c] == i)
  537. return scm_alt_charnames[c];
  538. return NULL;
  539. }
  540. /* Return a character from a string charname. */
  541. SCM
  542. scm_i_charname_to_char (const char *charname, size_t charname_len)
  543. {
  544. size_t c;
  545. /* The R5RS charnames. These are supposed to be case insensitive. */
  546. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  547. if ((strlen (scm_r5rs_charnames[c]) == charname_len)
  548. && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
  549. return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
  550. /* The R6RS charnames. R6RS says that these should be case-sensitive.
  551. They are left as case-insensitive to avoid confusion. */
  552. for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
  553. if ((strlen (scm_r6rs_charnames[c]) == charname_len)
  554. && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
  555. return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
  556. /* The R7RS charnames. R7RS says that these should be case-sensitive.
  557. They are left as case-insensitive to avoid confusion. */
  558. for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++)
  559. if ((strlen (scm_r7rs_charnames[c]) == charname_len)
  560. && (!strncasecmp (scm_r7rs_charnames[c], charname, charname_len)))
  561. return SCM_MAKE_CHAR (scm_r7rs_charnums[c]);
  562. /* Then come the controls. By Guile convention, these are not case
  563. sensitive. */
  564. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  565. if ((strlen (scm_C0_control_charnames[c]) == charname_len)
  566. && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
  567. return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
  568. /* Lastly are some old names carried over for compatibility. */
  569. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  570. if ((strlen (scm_alt_charnames[c]) == charname_len)
  571. && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
  572. return SCM_MAKE_CHAR (scm_alt_charnums[c]);
  573. return SCM_BOOL_F;
  574. }
  575. void
  576. scm_init_chars ()
  577. {
  578. #include "chars.x"
  579. }