chars.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Lesser General Public License
  5. * as published by the Free Software Foundation; either version 3 of
  6. * the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Lesser General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Lesser General Public
  14. * License along with this library; if not, write to the Free Software
  15. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16. * 02110-1301 USA
  17. */
  18. #ifdef HAVE_CONFIG_H
  19. # include <config.h>
  20. #endif
  21. #include <ctype.h>
  22. #include <limits.h>
  23. #include <unicase.h>
  24. #include "libguile/_scm.h"
  25. #include "libguile/validate.h"
  26. #include "libguile/chars.h"
  27. #include "libguile/srfi-14.h"
  28. SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
  29. (SCM x),
  30. "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
  31. #define FUNC_NAME s_scm_char_p
  32. {
  33. return scm_from_bool (SCM_CHARP(x));
  34. }
  35. #undef FUNC_NAME
  36. SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
  37. (SCM x, SCM y),
  38. "Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.")
  39. #define FUNC_NAME s_scm_char_eq_p
  40. {
  41. SCM_VALIDATE_CHAR (1, x);
  42. SCM_VALIDATE_CHAR (2, y);
  43. return scm_from_bool (scm_is_eq (x, y));
  44. }
  45. #undef FUNC_NAME
  46. SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
  47. (SCM x, SCM y),
  48. "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode sequence,\n"
  49. "else @code{#f}.")
  50. #define FUNC_NAME s_scm_char_less_p
  51. {
  52. SCM_VALIDATE_CHAR (1, x);
  53. SCM_VALIDATE_CHAR (2, y);
  54. return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
  55. }
  56. #undef FUNC_NAME
  57. SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
  58. (SCM x, SCM y),
  59. "Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
  60. "Unicode sequence, else @code{#f}.")
  61. #define FUNC_NAME s_scm_char_leq_p
  62. {
  63. SCM_VALIDATE_CHAR (1, x);
  64. SCM_VALIDATE_CHAR (2, y);
  65. return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
  66. }
  67. #undef FUNC_NAME
  68. SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
  69. (SCM x, SCM y),
  70. "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n"
  71. "sequence, else @code{#f}.")
  72. #define FUNC_NAME s_scm_char_gr_p
  73. {
  74. SCM_VALIDATE_CHAR (1, x);
  75. SCM_VALIDATE_CHAR (2, y);
  76. return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
  77. }
  78. #undef FUNC_NAME
  79. SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
  80. (SCM x, SCM y),
  81. "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
  82. "Unicode sequence, else @code{#f}.")
  83. #define FUNC_NAME s_scm_char_geq_p
  84. {
  85. SCM_VALIDATE_CHAR (1, x);
  86. SCM_VALIDATE_CHAR (2, y);
  87. return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
  88. }
  89. #undef FUNC_NAME
  90. SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
  91. (SCM x, SCM y),
  92. "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n"
  93. "case, else @code{#f}. Case is locale free and not context sensitive.")
  94. #define FUNC_NAME s_scm_char_ci_eq_p
  95. {
  96. SCM_VALIDATE_CHAR (1, x);
  97. SCM_VALIDATE_CHAR (2, y);
  98. return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
  99. }
  100. #undef FUNC_NAME
  101. SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
  102. (SCM x, SCM y),
  103. "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
  104. "than the Unicode uppercase form @var{y} in the Unicode sequence,\n"
  105. "else @code{#f}.")
  106. #define FUNC_NAME s_scm_char_ci_less_p
  107. {
  108. SCM_VALIDATE_CHAR (1, x);
  109. SCM_VALIDATE_CHAR (2, y);
  110. return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
  111. }
  112. #undef FUNC_NAME
  113. SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
  114. (SCM x, SCM y),
  115. "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
  116. "than or equal to the Unicode uppercase form of @var{y} in the\n"
  117. "Unicode sequence, else @code{#f}.")
  118. #define FUNC_NAME s_scm_char_ci_leq_p
  119. {
  120. SCM_VALIDATE_CHAR (1, x);
  121. SCM_VALIDATE_CHAR (2, y);
  122. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
  123. }
  124. #undef FUNC_NAME
  125. SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
  126. (SCM x, SCM y),
  127. "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
  128. "than the Unicode uppercase form of @var{y} in the Unicode\n"
  129. "sequence, else @code{#f}.")
  130. #define FUNC_NAME s_scm_char_ci_gr_p
  131. {
  132. SCM_VALIDATE_CHAR (1, x);
  133. SCM_VALIDATE_CHAR (2, y);
  134. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
  135. }
  136. #undef FUNC_NAME
  137. SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
  138. (SCM x, SCM y),
  139. "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
  140. "than or equal to the Unicode uppercase form of @var{y} in the\n"
  141. "Unicode sequence, else @code{#f}.")
  142. #define FUNC_NAME s_scm_char_ci_geq_p
  143. {
  144. SCM_VALIDATE_CHAR (1, x);
  145. SCM_VALIDATE_CHAR (2, y);
  146. return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
  147. }
  148. #undef FUNC_NAME
  149. SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
  150. (SCM chr),
  151. "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
  152. #define FUNC_NAME s_scm_char_alphabetic_p
  153. {
  154. return scm_char_set_contains_p (scm_char_set_letter, chr);
  155. }
  156. #undef FUNC_NAME
  157. SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
  158. (SCM chr),
  159. "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
  160. #define FUNC_NAME s_scm_char_numeric_p
  161. {
  162. return scm_char_set_contains_p (scm_char_set_digit, chr);
  163. }
  164. #undef FUNC_NAME
  165. SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
  166. (SCM chr),
  167. "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
  168. #define FUNC_NAME s_scm_char_whitespace_p
  169. {
  170. return scm_char_set_contains_p (scm_char_set_whitespace, chr);
  171. }
  172. #undef FUNC_NAME
  173. SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
  174. (SCM chr),
  175. "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
  176. #define FUNC_NAME s_scm_char_upper_case_p
  177. {
  178. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  179. }
  180. #undef FUNC_NAME
  181. SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
  182. (SCM chr),
  183. "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
  184. #define FUNC_NAME s_scm_char_lower_case_p
  185. {
  186. return scm_char_set_contains_p (scm_char_set_lower_case, chr);
  187. }
  188. #undef FUNC_NAME
  189. SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
  190. (SCM chr),
  191. "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n")
  192. #define FUNC_NAME s_scm_char_is_both_p
  193. {
  194. if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
  195. return SCM_BOOL_T;
  196. return scm_char_set_contains_p (scm_char_set_upper_case, chr);
  197. }
  198. #undef FUNC_NAME
  199. SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
  200. (SCM chr),
  201. "Return the number corresponding to ordinal position of @var{chr} in the\n"
  202. "ASCII sequence.")
  203. #define FUNC_NAME s_scm_char_to_integer
  204. {
  205. SCM_VALIDATE_CHAR (1, chr);
  206. return scm_from_uint32 (SCM_CHAR(chr));
  207. }
  208. #undef FUNC_NAME
  209. SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
  210. (SCM n),
  211. "Return the character at position @var{n} in the ASCII sequence.")
  212. #define FUNC_NAME s_scm_integer_to_char
  213. {
  214. scm_t_wchar cn;
  215. cn = scm_to_wchar (n);
  216. /* Avoid the surrogates. */
  217. if (!SCM_IS_UNICODE_CHAR (cn))
  218. scm_out_of_range (FUNC_NAME, n);
  219. return SCM_MAKE_CHAR (cn);
  220. }
  221. #undef FUNC_NAME
  222. SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
  223. (SCM chr),
  224. "Return the uppercase character version of @var{chr}.")
  225. #define FUNC_NAME s_scm_char_upcase
  226. {
  227. SCM_VALIDATE_CHAR (1, chr);
  228. return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
  229. }
  230. #undef FUNC_NAME
  231. SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
  232. (SCM chr),
  233. "Return the lowercase character version of @var{chr}.")
  234. #define FUNC_NAME s_scm_char_downcase
  235. {
  236. SCM_VALIDATE_CHAR (1, chr);
  237. return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
  238. }
  239. #undef FUNC_NAME
  240. /*
  241. TODO: change name to scm_i_.. ? --hwn
  242. */
  243. scm_t_wchar
  244. scm_c_upcase (scm_t_wchar c)
  245. {
  246. if (c > 255)
  247. return c;
  248. return toupper ((int) c);
  249. }
  250. scm_t_wchar
  251. scm_c_downcase (scm_t_wchar c)
  252. {
  253. if (c > 255)
  254. return c;
  255. return tolower ((int) c);
  256. }
  257. /* There are a few sets of character names: R5RS, Guile
  258. extensions for control characters, and leftover Guile extensions.
  259. They are listed in order of precedence. */
  260. static const char *const scm_r5rs_charnames[] = {
  261. "space", "newline"
  262. };
  263. static const scm_t_uint32 const scm_r5rs_charnums[] = {
  264. 0x20, 0x0A
  265. };
  266. #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
  267. /* The abbreviated names for control characters. */
  268. static const char *const scm_C0_control_charnames[] = {
  269. /* C0 controls */
  270. "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
  271. "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
  272. "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
  273. "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
  274. "sp", "del"
  275. };
  276. static const scm_t_uint32 const scm_C0_control_charnums[] = {
  277. 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  278. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  279. 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  280. 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  281. 0x20, 0x7f
  282. };
  283. #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
  284. static const char *const scm_alt_charnames[] = {
  285. "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
  286. };
  287. static const scm_t_uint32 const scm_alt_charnums[] = {
  288. 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
  289. };
  290. #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
  291. /* Returns the string charname for a character if it exists, or NULL
  292. otherwise. */
  293. const char *
  294. scm_i_charname (SCM chr)
  295. {
  296. size_t c;
  297. scm_t_uint32 i = SCM_CHAR (chr);
  298. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  299. if (scm_r5rs_charnums[c] == i)
  300. return scm_r5rs_charnames[c];
  301. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  302. if (scm_C0_control_charnums[c] == i)
  303. return scm_C0_control_charnames[c];
  304. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  305. if (scm_alt_charnums[c] == i)
  306. return scm_alt_charnames[i];
  307. return NULL;
  308. }
  309. /* Return a character from a string charname. */
  310. SCM
  311. scm_i_charname_to_char (const char *charname, size_t charname_len)
  312. {
  313. size_t c;
  314. /* The R5RS charnames. These are supposed to be case
  315. insensitive. */
  316. for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
  317. if ((strlen (scm_r5rs_charnames[c]) == charname_len)
  318. && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
  319. return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
  320. /* Then come the controls. These are not case sensitive. */
  321. for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
  322. if ((strlen (scm_C0_control_charnames[c]) == charname_len)
  323. && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
  324. return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
  325. /* Lastly are some old names carried over for compatibility. */
  326. for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
  327. if ((strlen (scm_alt_charnames[c]) == charname_len)
  328. && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
  329. return SCM_MAKE_CHAR (scm_alt_charnums[c]);
  330. return SCM_BOOL_F;
  331. }
  332. void
  333. scm_init_chars ()
  334. {
  335. #include "libguile/chars.x"
  336. }
  337. /*
  338. Local Variables:
  339. c-file-style: "gnu"
  340. End:
  341. */