symbols.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
  2. * 2006, 2009, 2011 Free Software Foundation, Inc.
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public License
  6. * as published by the Free Software Foundation; either version 3 of
  7. * the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301 USA
  18. */
  19. #ifdef HAVE_CONFIG_H
  20. # include <config.h>
  21. #endif
  22. #include <unistr.h>
  23. #include "libguile/_scm.h"
  24. #include "libguile/chars.h"
  25. #include "libguile/eval.h"
  26. #include "libguile/hash.h"
  27. #include "libguile/smob.h"
  28. #include "libguile/variable.h"
  29. #include "libguile/alist.h"
  30. #include "libguile/fluids.h"
  31. #include "libguile/strings.h"
  32. #include "libguile/vectors.h"
  33. #include "libguile/weak-set.h"
  34. #include "libguile/modules.h"
  35. #include "libguile/read.h"
  36. #include "libguile/srfi-13.h"
  37. #include "libguile/validate.h"
  38. #include "libguile/symbols.h"
  39. #include "libguile/private-options.h"
  40. #ifdef HAVE_STRING_H
  41. #include <string.h>
  42. #endif
  43. static SCM symbols;
  44. #ifdef GUILE_DEBUG
  45. SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
  46. (),
  47. "Return the system symbol obarray.")
  48. #define FUNC_NAME s_scm_sys_symbols
  49. {
  50. return symbols;
  51. }
  52. #undef FUNC_NAME
  53. #endif
  54. /* {Symbols}
  55. */
  56. unsigned long
  57. scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
  58. {
  59. return scm_i_symbol_hash (obj) % n;
  60. }
  61. struct string_lookup_data
  62. {
  63. SCM string;
  64. unsigned long string_hash;
  65. };
  66. static int
  67. string_lookup_predicate_fn (SCM sym, void *closure)
  68. {
  69. struct string_lookup_data *data = closure;
  70. if (scm_i_symbol_hash (sym) == data->string_hash
  71. && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
  72. {
  73. size_t n = scm_i_symbol_length (sym);
  74. while (n--)
  75. if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
  76. return 0;
  77. return 1;
  78. }
  79. else
  80. return 0;
  81. }
  82. static SCM
  83. lookup_interned_symbol (SCM name, unsigned long raw_hash)
  84. {
  85. struct string_lookup_data data;
  86. data.string = name;
  87. data.string_hash = raw_hash;
  88. return scm_c_weak_set_lookup (symbols, raw_hash,
  89. string_lookup_predicate_fn,
  90. &data, SCM_BOOL_F);
  91. }
  92. struct latin1_lookup_data
  93. {
  94. const char *str;
  95. size_t len;
  96. unsigned long string_hash;
  97. };
  98. static int
  99. latin1_lookup_predicate_fn (SCM sym, void *closure)
  100. {
  101. struct latin1_lookup_data *data = closure;
  102. return scm_i_symbol_hash (sym) == data->string_hash
  103. && scm_i_is_narrow_symbol (sym)
  104. && scm_i_symbol_length (sym) == data->len
  105. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
  106. }
  107. static SCM
  108. lookup_interned_latin1_symbol (const char *str, size_t len,
  109. unsigned long raw_hash)
  110. {
  111. struct latin1_lookup_data data;
  112. data.str = str;
  113. data.len = len;
  114. data.string_hash = raw_hash;
  115. return scm_c_weak_set_lookup (symbols, raw_hash,
  116. latin1_lookup_predicate_fn,
  117. &data, SCM_BOOL_F);
  118. }
  119. struct utf8_lookup_data
  120. {
  121. const char *str;
  122. size_t len;
  123. unsigned long string_hash;
  124. };
  125. static int
  126. utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
  127. const scm_t_wchar *wide, size_t wlen)
  128. {
  129. size_t byte_idx = 0, char_idx = 0;
  130. while (byte_idx < nlen && char_idx < wlen)
  131. {
  132. ucs4_t c;
  133. int nbytes;
  134. nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
  135. if (nbytes == 0)
  136. break;
  137. else if (nbytes < 0)
  138. /* Bad UTF-8. */
  139. return 0;
  140. else if (c != wide[char_idx])
  141. return 0;
  142. byte_idx += nbytes;
  143. char_idx++;
  144. }
  145. return byte_idx == nlen && char_idx == wlen;
  146. }
  147. static int
  148. utf8_lookup_predicate_fn (SCM sym, void *closure)
  149. {
  150. struct utf8_lookup_data *data = closure;
  151. if (scm_i_symbol_hash (sym) != data->string_hash)
  152. return 0;
  153. if (scm_i_is_narrow_symbol (sym))
  154. return (scm_i_symbol_length (sym) == data->len
  155. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
  156. else
  157. return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
  158. data->len,
  159. scm_i_symbol_wide_chars (sym),
  160. scm_i_symbol_length (sym));
  161. }
  162. static SCM
  163. lookup_interned_utf8_symbol (const char *str, size_t len,
  164. unsigned long raw_hash)
  165. {
  166. struct utf8_lookup_data data;
  167. data.str = str;
  168. data.len = len;
  169. data.string_hash = raw_hash;
  170. return scm_c_weak_set_lookup (symbols, raw_hash,
  171. utf8_lookup_predicate_fn,
  172. &data, SCM_BOOL_F);
  173. }
  174. static int
  175. symbol_lookup_predicate_fn (SCM sym, void *closure)
  176. {
  177. SCM other = SCM_PACK_POINTER (closure);
  178. if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
  179. && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
  180. {
  181. if (scm_i_is_narrow_symbol (sym))
  182. return scm_i_is_narrow_symbol (other)
  183. && (strncmp (scm_i_symbol_chars (sym),
  184. scm_i_symbol_chars (other),
  185. scm_i_symbol_length (other)) == 0);
  186. else
  187. return scm_is_true
  188. (scm_string_equal_p (scm_symbol_to_string (sym),
  189. scm_symbol_to_string (other)));
  190. }
  191. return 0;
  192. }
  193. static SCM
  194. scm_i_str2symbol (SCM str)
  195. {
  196. SCM symbol;
  197. size_t raw_hash = scm_i_string_hash (str);
  198. symbol = lookup_interned_symbol (str, raw_hash);
  199. if (scm_is_true (symbol))
  200. return symbol;
  201. else
  202. {
  203. /* The symbol was not found, create it. */
  204. symbol = scm_i_make_symbol (str, 0, raw_hash,
  205. scm_cons (SCM_BOOL_F, SCM_EOL));
  206. /* Might return a different symbol, if another one was interned at
  207. the same time. */
  208. return scm_c_weak_set_add_x (symbols, raw_hash,
  209. symbol_lookup_predicate_fn,
  210. SCM_UNPACK_POINTER (symbol), symbol);
  211. }
  212. }
  213. static SCM
  214. scm_i_str2uninterned_symbol (SCM str)
  215. {
  216. size_t raw_hash = scm_i_string_hash (str);
  217. return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
  218. raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
  219. }
  220. SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
  221. (SCM obj),
  222. "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
  223. "@code{#f}.")
  224. #define FUNC_NAME s_scm_symbol_p
  225. {
  226. return scm_from_bool (scm_is_symbol (obj));
  227. }
  228. #undef FUNC_NAME
  229. SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
  230. (SCM symbol),
  231. "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
  232. "@code{#f}.")
  233. #define FUNC_NAME s_scm_symbol_interned_p
  234. {
  235. SCM_VALIDATE_SYMBOL (1, symbol);
  236. return scm_from_bool (scm_i_symbol_is_interned (symbol));
  237. }
  238. #undef FUNC_NAME
  239. SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
  240. (SCM name),
  241. "Return a new uninterned symbol with the name @var{name}. "
  242. "The returned symbol is guaranteed to be unique and future "
  243. "calls to @code{string->symbol} will not return it.")
  244. #define FUNC_NAME s_scm_make_symbol
  245. {
  246. SCM_VALIDATE_STRING (1, name);
  247. return scm_i_str2uninterned_symbol (name);
  248. }
  249. #undef FUNC_NAME
  250. SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
  251. (SCM s),
  252. "Return the name of @var{symbol} as a string. If the symbol was\n"
  253. "part of an object returned as the value of a literal expression\n"
  254. "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
  255. "Report on Scheme}) or by a call to the @code{read} procedure,\n"
  256. "and its name contains alphabetic characters, then the string\n"
  257. "returned will contain characters in the implementation's\n"
  258. "preferred standard case---some implementations will prefer\n"
  259. "upper case, others lower case. If the symbol was returned by\n"
  260. "@code{string->symbol}, the case of characters in the string\n"
  261. "returned will be the same as the case in the string that was\n"
  262. "passed to @code{string->symbol}. It is an error to apply\n"
  263. "mutation procedures like @code{string-set!} to strings returned\n"
  264. "by this procedure.\n"
  265. "\n"
  266. "The following examples assume that the implementation's\n"
  267. "standard case is lower case:\n"
  268. "\n"
  269. "@lisp\n"
  270. "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
  271. "(symbol->string 'Martin) @result{} \"martin\"\n"
  272. "(symbol->string\n"
  273. " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
  274. "@end lisp")
  275. #define FUNC_NAME s_scm_symbol_to_string
  276. {
  277. SCM_VALIDATE_SYMBOL (1, s);
  278. return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
  279. }
  280. #undef FUNC_NAME
  281. SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
  282. (SCM string),
  283. "Return the symbol whose name is @var{string}. This procedure\n"
  284. "can create symbols with names containing special characters or\n"
  285. "letters in the non-standard case, but it is usually a bad idea\n"
  286. "to create such symbols because in some implementations of\n"
  287. "Scheme they cannot be read as themselves. See\n"
  288. "@code{symbol->string}.\n"
  289. "\n"
  290. "The following examples assume that the implementation's\n"
  291. "standard case is lower case:\n"
  292. "\n"
  293. "@lisp\n"
  294. "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
  295. "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
  296. "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
  297. "(eq? 'JollyWog\n"
  298. " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
  299. "(string=? \"K. Harper, M.D.\"\n"
  300. " (symbol->string\n"
  301. " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
  302. "@end lisp")
  303. #define FUNC_NAME s_scm_string_to_symbol
  304. {
  305. SCM_VALIDATE_STRING (1, string);
  306. return scm_i_str2symbol (string);
  307. }
  308. #undef FUNC_NAME
  309. SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
  310. (SCM str),
  311. "Return the symbol whose name is @var{str}. @var{str} is\n"
  312. "converted to lowercase before the conversion is done, if Guile\n"
  313. "is currently reading symbols case-insensitively.")
  314. #define FUNC_NAME s_scm_string_ci_to_symbol
  315. {
  316. return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
  317. ? scm_string_downcase(str)
  318. : str);
  319. }
  320. #undef FUNC_NAME
  321. /* The default prefix for `gensym'd symbols. */
  322. static SCM default_gensym_prefix;
  323. #define MAX_PREFIX_LENGTH 30
  324. SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
  325. (SCM prefix),
  326. "Create a new symbol with a name constructed from a prefix and\n"
  327. "a counter value. The string @var{prefix} can be specified as\n"
  328. "an optional argument. Default prefix is @code{ g}. The counter\n"
  329. "is increased by 1 at each call. There is no provision for\n"
  330. "resetting the counter.")
  331. #define FUNC_NAME s_scm_gensym
  332. {
  333. static int gensym_counter = 0;
  334. SCM suffix, name;
  335. int n, n_digits;
  336. char buf[SCM_INTBUFLEN];
  337. if (SCM_UNBNDP (prefix))
  338. prefix = default_gensym_prefix;
  339. /* mutex in case another thread looks and incs at the exact same moment */
  340. scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
  341. n = gensym_counter++;
  342. scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
  343. n_digits = scm_iint2str (n, 10, buf);
  344. suffix = scm_from_latin1_stringn (buf, n_digits);
  345. name = scm_string_append (scm_list_2 (prefix, suffix));
  346. return scm_string_to_symbol (name);
  347. }
  348. #undef FUNC_NAME
  349. SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
  350. (SCM symbol),
  351. "Return a hash value for @var{symbol}.")
  352. #define FUNC_NAME s_scm_symbol_hash
  353. {
  354. SCM_VALIDATE_SYMBOL (1, symbol);
  355. return scm_from_ulong (scm_i_symbol_hash (symbol));
  356. }
  357. #undef FUNC_NAME
  358. SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
  359. (SCM s),
  360. "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
  361. #define FUNC_NAME s_scm_symbol_fref
  362. {
  363. SCM_VALIDATE_SYMBOL (1, s);
  364. return SCM_CAR (SCM_CELL_OBJECT_3 (s));
  365. }
  366. #undef FUNC_NAME
  367. SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
  368. (SCM s),
  369. "Return the @dfn{property list} currently associated with the\n"
  370. "symbol @var{s}.")
  371. #define FUNC_NAME s_scm_symbol_pref
  372. {
  373. SCM_VALIDATE_SYMBOL (1, s);
  374. return SCM_CDR (SCM_CELL_OBJECT_3 (s));
  375. }
  376. #undef FUNC_NAME
  377. SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
  378. (SCM s, SCM val),
  379. "Change the binding of the symbol @var{s}'s function slot.")
  380. #define FUNC_NAME s_scm_symbol_fset_x
  381. {
  382. SCM_VALIDATE_SYMBOL (1, s);
  383. SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
  384. return SCM_UNSPECIFIED;
  385. }
  386. #undef FUNC_NAME
  387. SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
  388. (SCM s, SCM val),
  389. "Change the binding of the symbol @var{s}'s property slot.")
  390. #define FUNC_NAME s_scm_symbol_pset_x
  391. {
  392. SCM_VALIDATE_SYMBOL (1, s);
  393. SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
  394. return SCM_UNSPECIFIED;
  395. }
  396. #undef FUNC_NAME
  397. SCM
  398. scm_from_locale_symbol (const char *sym)
  399. {
  400. return scm_from_locale_symboln (sym, -1);
  401. }
  402. SCM
  403. scm_from_locale_symboln (const char *sym, size_t len)
  404. {
  405. SCM str = scm_from_locale_stringn (sym, len);
  406. return scm_i_str2symbol (str);
  407. }
  408. SCM
  409. scm_take_locale_symboln (char *sym, size_t len)
  410. {
  411. SCM str;
  412. str = scm_take_locale_stringn (sym, len);
  413. return scm_i_str2symbol (str);
  414. }
  415. SCM
  416. scm_take_locale_symbol (char *sym)
  417. {
  418. return scm_take_locale_symboln (sym, (size_t)-1);
  419. }
  420. SCM
  421. scm_from_latin1_symbol (const char *sym)
  422. {
  423. return scm_from_latin1_symboln (sym, -1);
  424. }
  425. SCM
  426. scm_from_latin1_symboln (const char *sym, size_t len)
  427. {
  428. unsigned long hash;
  429. SCM ret;
  430. if (len == (size_t) -1)
  431. len = strlen (sym);
  432. hash = scm_i_latin1_string_hash (sym, len);
  433. ret = lookup_interned_latin1_symbol (sym, len, hash);
  434. if (scm_is_false (ret))
  435. {
  436. SCM str = scm_from_latin1_stringn (sym, len);
  437. ret = scm_i_str2symbol (str);
  438. }
  439. return ret;
  440. }
  441. SCM
  442. scm_from_utf8_symbol (const char *sym)
  443. {
  444. return scm_from_utf8_symboln (sym, -1);
  445. }
  446. SCM
  447. scm_from_utf8_symboln (const char *sym, size_t len)
  448. {
  449. unsigned long hash;
  450. SCM ret;
  451. if (len == (size_t) -1)
  452. len = strlen (sym);
  453. hash = scm_i_utf8_string_hash (sym, len);
  454. ret = lookup_interned_utf8_symbol (sym, len, hash);
  455. if (scm_is_false (ret))
  456. {
  457. SCM str = scm_from_utf8_stringn (sym, len);
  458. ret = scm_i_str2symbol (str);
  459. }
  460. return ret;
  461. }
  462. void
  463. scm_symbols_prehistory ()
  464. {
  465. symbols = scm_c_make_weak_set (5000);
  466. }
  467. void
  468. scm_init_symbols ()
  469. {
  470. #include "libguile/symbols.x"
  471. default_gensym_prefix = scm_from_latin1_string (" g");
  472. }
  473. /*
  474. Local Variables:
  475. c-file-style: "gnu"
  476. End:
  477. */