symbols.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. /* Copyright (C) 1995-1998, 2000, 2001, 2003, 2004, 2006, 2009, 2011,
  2. * 2013, 2015 Free Software Foundation, Inc.
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public License
  6. * as published by the Free Software Foundation; either version 3 of
  7. * the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301 USA
  18. */
  19. #ifdef HAVE_CONFIG_H
  20. # include <config.h>
  21. #endif
  22. #include <unistr.h>
  23. #include "libguile/_scm.h"
  24. #include "libguile/chars.h"
  25. #include "libguile/eval.h"
  26. #include "libguile/hash.h"
  27. #include "libguile/smob.h"
  28. #include "libguile/variable.h"
  29. #include "libguile/alist.h"
  30. #include "libguile/fluids.h"
  31. #include "libguile/strings.h"
  32. #include "libguile/vectors.h"
  33. #include "libguile/weak-set.h"
  34. #include "libguile/modules.h"
  35. #include "libguile/read.h"
  36. #include "libguile/srfi-13.h"
  37. #include "libguile/validate.h"
  38. #include "libguile/symbols.h"
  39. #include "libguile/private-options.h"
  40. #ifdef HAVE_STRING_H
  41. #include <string.h>
  42. #endif
  43. static SCM symbols;
  44. #ifdef GUILE_DEBUG
  45. SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
  46. (),
  47. "Return the system symbol obarray.")
  48. #define FUNC_NAME s_scm_sys_symbols
  49. {
  50. return symbols;
  51. }
  52. #undef FUNC_NAME
  53. #endif
  54. /* {Symbols}
  55. */
  56. unsigned long
  57. scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
  58. {
  59. return scm_i_symbol_hash (obj) % n;
  60. }
  61. struct string_lookup_data
  62. {
  63. SCM string;
  64. unsigned long string_hash;
  65. };
  66. static int
  67. string_lookup_predicate_fn (SCM sym, void *closure)
  68. {
  69. struct string_lookup_data *data = closure;
  70. if (scm_i_symbol_hash (sym) == data->string_hash
  71. && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
  72. {
  73. size_t n = scm_i_symbol_length (sym);
  74. while (n--)
  75. if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
  76. return 0;
  77. return 1;
  78. }
  79. else
  80. return 0;
  81. }
  82. static SCM
  83. lookup_interned_symbol (SCM name, unsigned long raw_hash, SCM obarray)
  84. {
  85. struct string_lookup_data data;
  86. data.string = name;
  87. data.string_hash = raw_hash;
  88. return scm_c_weak_set_lookup (obarray, raw_hash,
  89. string_lookup_predicate_fn,
  90. &data, SCM_BOOL_F);
  91. }
  92. struct latin1_lookup_data
  93. {
  94. const char *str;
  95. size_t len;
  96. unsigned long string_hash;
  97. };
  98. static int
  99. latin1_lookup_predicate_fn (SCM sym, void *closure)
  100. {
  101. struct latin1_lookup_data *data = closure;
  102. return scm_i_symbol_hash (sym) == data->string_hash
  103. && scm_i_is_narrow_symbol (sym)
  104. && scm_i_symbol_length (sym) == data->len
  105. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
  106. }
  107. static SCM
  108. lookup_interned_latin1_symbol (const char *str, size_t len,
  109. unsigned long raw_hash,
  110. SCM obarray)
  111. {
  112. struct latin1_lookup_data data;
  113. data.str = str;
  114. data.len = len;
  115. data.string_hash = raw_hash;
  116. return scm_c_weak_set_lookup (obarray, raw_hash,
  117. latin1_lookup_predicate_fn,
  118. &data, SCM_BOOL_F);
  119. }
  120. struct utf8_lookup_data
  121. {
  122. const char *str;
  123. size_t len;
  124. unsigned long string_hash;
  125. };
  126. static int
  127. utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
  128. const scm_t_wchar *wide, size_t wlen)
  129. {
  130. size_t byte_idx = 0, char_idx = 0;
  131. while (byte_idx < nlen && char_idx < wlen)
  132. {
  133. ucs4_t c;
  134. int nbytes;
  135. nbytes = u8_mbtoucr (&c, narrow + byte_idx, nlen - byte_idx);
  136. if (nbytes == 0)
  137. break;
  138. else if (nbytes < 0)
  139. /* Bad UTF-8. */
  140. return 0;
  141. else if (c != wide[char_idx])
  142. return 0;
  143. byte_idx += nbytes;
  144. char_idx++;
  145. }
  146. return byte_idx == nlen && char_idx == wlen;
  147. }
  148. static int
  149. utf8_lookup_predicate_fn (SCM sym, void *closure)
  150. {
  151. struct utf8_lookup_data *data = closure;
  152. if (scm_i_symbol_hash (sym) != data->string_hash)
  153. return 0;
  154. if (scm_i_is_narrow_symbol (sym))
  155. return (scm_i_symbol_length (sym) == data->len
  156. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
  157. else
  158. return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
  159. data->len,
  160. scm_i_symbol_wide_chars (sym),
  161. scm_i_symbol_length (sym));
  162. }
  163. static SCM
  164. lookup_interned_utf8_symbol (const char *str, size_t len,
  165. unsigned long raw_hash,
  166. SCM obarray)
  167. {
  168. struct utf8_lookup_data data;
  169. data.str = str;
  170. data.len = len;
  171. data.string_hash = raw_hash;
  172. return scm_c_weak_set_lookup (obarray, raw_hash,
  173. utf8_lookup_predicate_fn,
  174. &data, SCM_BOOL_F);
  175. }
  176. static int
  177. symbol_lookup_predicate_fn (SCM sym, void *closure)
  178. {
  179. SCM other = SCM_PACK_POINTER (closure);
  180. if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
  181. && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
  182. {
  183. if (scm_i_is_narrow_symbol (sym))
  184. return scm_i_is_narrow_symbol (other)
  185. && (strncmp (scm_i_symbol_chars (sym),
  186. scm_i_symbol_chars (other),
  187. scm_i_symbol_length (other)) == 0);
  188. else
  189. return scm_is_true
  190. (scm_string_equal_p (scm_symbol_to_string (sym),
  191. scm_symbol_to_string (other)));
  192. }
  193. return 0;
  194. }
  195. static SCM
  196. scm_i_str2symbol (SCM str, SCM obarray)
  197. {
  198. SCM symbol;
  199. size_t raw_hash = scm_i_string_hash (str);
  200. symbol = lookup_interned_symbol (str, raw_hash, obarray);
  201. if (scm_is_true (symbol))
  202. return symbol;
  203. else
  204. {
  205. /* The symbol was not found, create it. */
  206. symbol = scm_i_make_symbol (str, 0, raw_hash,
  207. scm_cons (SCM_BOOL_F, SCM_EOL));
  208. /* Might return a different symbol, if another one was interned at
  209. the same time. */
  210. return scm_c_weak_set_add_x (obarray, raw_hash,
  211. symbol_lookup_predicate_fn,
  212. SCM_UNPACK_POINTER (symbol), symbol);
  213. }
  214. }
  215. static SCM
  216. scm_i_str2uninterned_symbol (SCM str)
  217. {
  218. size_t raw_hash = scm_i_string_hash (str);
  219. return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
  220. raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
  221. }
  222. SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
  223. (SCM obj),
  224. "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
  225. "@code{#f}.")
  226. #define FUNC_NAME s_scm_symbol_p
  227. {
  228. return scm_from_bool (scm_is_symbol (obj));
  229. }
  230. #undef FUNC_NAME
  231. SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
  232. (SCM symbol),
  233. "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
  234. "@code{#f}.")
  235. #define FUNC_NAME s_scm_symbol_interned_p
  236. {
  237. SCM_VALIDATE_SYMBOL (1, symbol);
  238. return scm_from_bool (scm_i_symbol_is_interned (symbol));
  239. }
  240. #undef FUNC_NAME
  241. SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
  242. (SCM name),
  243. "Return a new uninterned symbol with the name @var{name}. "
  244. "The returned symbol is guaranteed to be unique and future "
  245. "calls to @code{string->symbol} will not return it.")
  246. #define FUNC_NAME s_scm_make_symbol
  247. {
  248. SCM_VALIDATE_STRING (1, name);
  249. return scm_i_str2uninterned_symbol (name);
  250. }
  251. #undef FUNC_NAME
  252. SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
  253. (SCM s),
  254. "Return the name of @var{symbol} as a string. If the symbol was\n"
  255. "part of an object returned as the value of a literal expression\n"
  256. "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
  257. "Report on Scheme}) or by a call to the @code{read} procedure,\n"
  258. "and its name contains alphabetic characters, then the string\n"
  259. "returned will contain characters in the implementation's\n"
  260. "preferred standard case---some implementations will prefer\n"
  261. "upper case, others lower case. If the symbol was returned by\n"
  262. "@code{string->symbol}, the case of characters in the string\n"
  263. "returned will be the same as the case in the string that was\n"
  264. "passed to @code{string->symbol}. It is an error to apply\n"
  265. "mutation procedures like @code{string-set!} to strings returned\n"
  266. "by this procedure.\n"
  267. "\n"
  268. "The following examples assume that the implementation's\n"
  269. "standard case is lower case:\n"
  270. "\n"
  271. "@lisp\n"
  272. "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
  273. "(symbol->string 'Martin) @result{} \"martin\"\n"
  274. "(symbol->string\n"
  275. " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
  276. "@end lisp")
  277. #define FUNC_NAME s_scm_symbol_to_string
  278. {
  279. SCM_VALIDATE_SYMBOL (1, s);
  280. return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
  281. }
  282. #undef FUNC_NAME
  283. SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
  284. (SCM string),
  285. "Return the symbol whose name is @var{string}. This procedure\n"
  286. "can create symbols with names containing special characters or\n"
  287. "letters in the non-standard case, but it is usually a bad idea\n"
  288. "to create such symbols because in some implementations of\n"
  289. "Scheme they cannot be read as themselves. See\n"
  290. "@code{symbol->string}.\n"
  291. "\n"
  292. "The following examples assume that the implementation's\n"
  293. "standard case is lower case:\n"
  294. "\n"
  295. "@lisp\n"
  296. "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
  297. "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
  298. "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
  299. "(eq? 'JollyWog\n"
  300. " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
  301. "(string=? \"K. Harper, M.D.\"\n"
  302. " (symbol->string\n"
  303. " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
  304. "@end lisp")
  305. #define FUNC_NAME s_scm_string_to_symbol
  306. {
  307. SCM_VALIDATE_STRING (1, string);
  308. return scm_i_str2symbol (string, symbols);
  309. }
  310. #undef FUNC_NAME
  311. SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
  312. (SCM str),
  313. "Return the symbol whose name is @var{str}. @var{str} is\n"
  314. "converted to lowercase before the conversion is done, if Guile\n"
  315. "is currently reading symbols case-insensitively.")
  316. #define FUNC_NAME s_scm_string_ci_to_symbol
  317. {
  318. return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
  319. ? scm_string_downcase(str)
  320. : str);
  321. }
  322. #undef FUNC_NAME
  323. SCM_DEFINE (scm_make_obarray, "make-obarray", 0, 0, 0,
  324. (void),
  325. "Return a fresh obarray.")
  326. #define FUNC_NAME s_scm_make_obarray
  327. {
  328. return scm_c_make_weak_set (0);
  329. }
  330. #undef FUNC_NAME
  331. SCM_DEFINE (scm_find_symbol, "find-symbol", 1, 1, 0,
  332. (SCM string, SCM obarray),
  333. "Return the symbol named @var{string} if it is present in\n"
  334. "@var{obarray}. Return false otherwise.")
  335. #define FUNC_NAME s_scm_find_symbol
  336. {
  337. if (SCM_UNBNDP (obarray))
  338. obarray = symbols;
  339. return lookup_interned_symbol (string,
  340. scm_i_string_hash (string),
  341. obarray);
  342. }
  343. #undef FUNC_NAME
  344. SCM_DEFINE (scm_intern, "intern", 1, 1, 0,
  345. (SCM string, SCM obarray),
  346. "Intern @var{string} in @var{obarray}.")
  347. #define FUNC_NAME s_scm_intern
  348. {
  349. if (SCM_UNBNDP (obarray))
  350. obarray = symbols;
  351. SCM_VALIDATE_STRING (1, string);
  352. return scm_i_str2symbol (string, obarray);
  353. }
  354. #undef FUNC_NAME
  355. SCM_DEFINE (scm_unintern, "unintern", 1, 1, 0,
  356. (SCM symbol, SCM obarray),
  357. "Unintern @var{symbol} from @var{obarray}.")
  358. #define FUNC_NAME s_scm_unintern
  359. {
  360. if (SCM_UNBNDP (obarray))
  361. obarray = symbols;
  362. scm_weak_set_remove_x (obarray, symbol);
  363. return SCM_UNSPECIFIED;
  364. }
  365. #undef FUNC_NAME
  366. SCM_DEFINE (scm_obarray_for_each, "obarray-for-each", 1, 1, 0,
  367. (SCM proc, SCM obarray),
  368. "")
  369. #define FUNC_NAME s_scm_obarray_for_each
  370. {
  371. if (SCM_UNBNDP (obarray))
  372. obarray = symbols;
  373. scm_weak_set_for_each (proc, obarray);
  374. return SCM_UNSPECIFIED;
  375. }
  376. #undef FUNC_NAME
  377. /* The default prefix for `gensym'd symbols. */
  378. static SCM default_gensym_prefix;
  379. #define MAX_PREFIX_LENGTH 30
  380. SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
  381. (SCM prefix),
  382. "Create a new symbol with a name constructed from a prefix and\n"
  383. "a counter value. The string @var{prefix} can be specified as\n"
  384. "an optional argument. Default prefix is @code{ g}. The counter\n"
  385. "is increased by 1 at each call. There is no provision for\n"
  386. "resetting the counter.")
  387. #define FUNC_NAME s_scm_gensym
  388. {
  389. static int gensym_counter = 0;
  390. SCM suffix, name;
  391. int n, n_digits;
  392. char buf[SCM_INTBUFLEN];
  393. if (SCM_UNBNDP (prefix))
  394. prefix = default_gensym_prefix;
  395. /* mutex in case another thread looks and incs at the exact same moment */
  396. scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
  397. n = gensym_counter++;
  398. scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
  399. n_digits = scm_iint2str (n, 10, buf);
  400. suffix = scm_from_latin1_stringn (buf, n_digits);
  401. name = scm_string_append (scm_list_2 (prefix, suffix));
  402. return scm_string_to_symbol (name);
  403. }
  404. #undef FUNC_NAME
  405. SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
  406. (SCM symbol),
  407. "Return a hash value for @var{symbol}.")
  408. #define FUNC_NAME s_scm_symbol_hash
  409. {
  410. SCM_VALIDATE_SYMBOL (1, symbol);
  411. return scm_from_ulong (scm_i_symbol_hash (symbol));
  412. }
  413. #undef FUNC_NAME
  414. SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
  415. (SCM s),
  416. "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
  417. #define FUNC_NAME s_scm_symbol_fref
  418. {
  419. SCM_VALIDATE_SYMBOL (1, s);
  420. return SCM_CAR (SCM_CELL_OBJECT_3 (s));
  421. }
  422. #undef FUNC_NAME
  423. SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
  424. (SCM s),
  425. "Return the @dfn{property list} currently associated with the\n"
  426. "symbol @var{s}.")
  427. #define FUNC_NAME s_scm_symbol_pref
  428. {
  429. SCM_VALIDATE_SYMBOL (1, s);
  430. return SCM_CDR (SCM_CELL_OBJECT_3 (s));
  431. }
  432. #undef FUNC_NAME
  433. SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
  434. (SCM s, SCM val),
  435. "Change the binding of the symbol @var{s}'s function slot.")
  436. #define FUNC_NAME s_scm_symbol_fset_x
  437. {
  438. SCM_VALIDATE_SYMBOL (1, s);
  439. SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
  440. return SCM_UNSPECIFIED;
  441. }
  442. #undef FUNC_NAME
  443. SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
  444. (SCM s, SCM val),
  445. "Change the binding of the symbol @var{s}'s property slot.")
  446. #define FUNC_NAME s_scm_symbol_pset_x
  447. {
  448. SCM_VALIDATE_SYMBOL (1, s);
  449. SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
  450. return SCM_UNSPECIFIED;
  451. }
  452. #undef FUNC_NAME
  453. SCM
  454. scm_from_locale_symbol (const char *sym)
  455. {
  456. return scm_from_locale_symboln (sym, -1);
  457. }
  458. SCM
  459. scm_from_locale_symboln (const char *sym, size_t len)
  460. {
  461. SCM str = scm_from_locale_stringn (sym, len);
  462. return scm_i_str2symbol (str, symbols);
  463. }
  464. SCM
  465. scm_take_locale_symboln (char *sym, size_t len)
  466. {
  467. SCM str;
  468. str = scm_take_locale_stringn (sym, len);
  469. return scm_i_str2symbol (str, symbols);
  470. }
  471. SCM
  472. scm_take_locale_symbol (char *sym)
  473. {
  474. return scm_take_locale_symboln (sym, (size_t)-1);
  475. }
  476. SCM
  477. scm_from_latin1_symbol (const char *sym)
  478. {
  479. return scm_from_latin1_symboln (sym, -1);
  480. }
  481. SCM
  482. scm_from_latin1_symboln (const char *sym, size_t len)
  483. {
  484. unsigned long hash;
  485. SCM ret;
  486. if (len == (size_t) -1)
  487. len = strlen (sym);
  488. hash = scm_i_latin1_string_hash (sym, len);
  489. ret = lookup_interned_latin1_symbol (sym, len, hash, symbols);
  490. if (scm_is_false (ret))
  491. {
  492. SCM str = scm_from_latin1_stringn (sym, len);
  493. ret = scm_i_str2symbol (str, symbols);
  494. }
  495. return ret;
  496. }
  497. SCM
  498. scm_from_utf8_symbol (const char *sym)
  499. {
  500. return scm_from_utf8_symboln (sym, -1);
  501. }
  502. SCM
  503. scm_from_utf8_symboln (const char *sym, size_t len)
  504. {
  505. unsigned long hash;
  506. SCM ret;
  507. if (len == (size_t) -1)
  508. len = strlen (sym);
  509. hash = scm_i_utf8_string_hash (sym, len);
  510. ret = lookup_interned_utf8_symbol (sym, len, hash, symbols);
  511. if (scm_is_false (ret))
  512. {
  513. SCM str = scm_from_utf8_stringn (sym, len);
  514. ret = scm_i_str2symbol (str, symbols);
  515. }
  516. return ret;
  517. }
  518. void
  519. scm_symbols_prehistory ()
  520. {
  521. symbols = scm_c_make_weak_set (5000);
  522. }
  523. void
  524. scm_init_symbols ()
  525. {
  526. #include "libguile/symbols.x"
  527. default_gensym_prefix = scm_from_latin1_string (" g");
  528. }
  529. /*
  530. Local Variables:
  531. c-file-style: "gnu"
  532. End:
  533. */