123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- /* Copyright 1995-1998,2000-2001,2003-2004,2006,2009,2011,2013,2015,2018,2022,2023
- Free Software Foundation, Inc.
- This file is part of Guile.
- Guile is free software: you can redistribute it and/or modify it
- under the terms of the GNU Lesser General Public License as published
- by the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- Guile is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public
- License along with Guile. If not, see
- <https://www.gnu.org/licenses/>. */
- #ifdef HAVE_CONFIG_H
- # include <config.h>
- #endif
- #include <string.h>
- #include <unistr.h>
- #include "alist.h"
- #include "boolean.h"
- #include "chars.h"
- #include "eval.h"
- #include "fluids.h"
- #include "gsubr.h"
- #include "hash.h"
- #include "list.h"
- #include "modules.h"
- #include "numbers.h"
- #include "pairs.h"
- #include "private-options.h"
- #include "read.h"
- #include "smob.h"
- #include "srfi-13.h"
- #include "strings.h"
- #include "strorder.h"
- #include "threads.h"
- #include "variable.h"
- #include "vectors.h"
- #include "weak-set.h"
- #include "symbols.h"
- static SCM symbols;
- #ifdef GUILE_DEBUG
- SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
- (),
- "Return the system symbol obarray.")
- #define FUNC_NAME s_scm_sys_symbols
- {
- return symbols;
- }
- #undef FUNC_NAME
- #endif
- /* {Symbols}
- */
- unsigned long
- scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
- {
- return scm_i_symbol_hash (obj) % n;
- }
- struct string_lookup_data
- {
- SCM string;
- unsigned long string_hash;
- };
- static int
- string_lookup_predicate_fn (SCM sym, void *closure)
- {
- struct string_lookup_data *data = closure;
- if (scm_i_symbol_hash (sym) == data->string_hash
- && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
- {
- size_t n = scm_i_symbol_length (sym);
- while (n--)
- if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
- return 0;
- return 1;
- }
- else
- return 0;
- }
- static SCM
- lookup_interned_symbol (SCM name, unsigned long raw_hash)
- {
- struct string_lookup_data data;
- data.string = name;
- data.string_hash = raw_hash;
-
- return scm_c_weak_set_lookup (symbols, raw_hash,
- string_lookup_predicate_fn,
- &data, SCM_BOOL_F);
- }
- struct latin1_lookup_data
- {
- const char *str;
- size_t len;
- unsigned long string_hash;
- };
- static int
- latin1_lookup_predicate_fn (SCM sym, void *closure)
- {
- struct latin1_lookup_data *data = closure;
- return scm_i_symbol_hash (sym) == data->string_hash
- && scm_i_is_narrow_symbol (sym)
- && scm_i_symbol_length (sym) == data->len
- && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
- }
- static SCM
- lookup_interned_latin1_symbol (const char *str, size_t len,
- unsigned long raw_hash)
- {
- struct latin1_lookup_data data;
- data.str = str;
- data.len = len;
- data.string_hash = raw_hash;
-
- return scm_c_weak_set_lookup (symbols, raw_hash,
- latin1_lookup_predicate_fn,
- &data, SCM_BOOL_F);
- }
- struct utf8_lookup_data
- {
- const char *str;
- size_t len;
- unsigned long string_hash;
- };
- static int
- utf8_string_equals_wide_string (const uint8_t *narrow, size_t nlen,
- const scm_t_wchar *wide, size_t wlen)
- {
- size_t byte_idx = 0, char_idx = 0;
-
- while (byte_idx < nlen && char_idx < wlen)
- {
- ucs4_t c;
- int nbytes;
- nbytes = u8_mbtoucr (&c, narrow + byte_idx, nlen - byte_idx);
- if (nbytes == 0)
- break;
- else if (nbytes < 0)
- /* Bad UTF-8. */
- return 0;
- else if (c != wide[char_idx])
- return 0;
- byte_idx += nbytes;
- char_idx++;
- }
- return byte_idx == nlen && char_idx == wlen;
- }
- static int
- utf8_lookup_predicate_fn (SCM sym, void *closure)
- {
- struct utf8_lookup_data *data = closure;
- if (scm_i_symbol_hash (sym) != data->string_hash)
- return 0;
-
- if (scm_i_is_narrow_symbol (sym))
- return (scm_i_symbol_length (sym) == data->len
- && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
- else
- return utf8_string_equals_wide_string ((const uint8_t *) data->str,
- data->len,
- scm_i_symbol_wide_chars (sym),
- scm_i_symbol_length (sym));
- }
- static SCM
- lookup_interned_utf8_symbol (const char *str, size_t len,
- unsigned long raw_hash)
- {
- struct utf8_lookup_data data;
- data.str = str;
- data.len = len;
- data.string_hash = raw_hash;
-
- return scm_c_weak_set_lookup (symbols, raw_hash,
- utf8_lookup_predicate_fn,
- &data, SCM_BOOL_F);
- }
- static int
- symbol_lookup_predicate_fn (SCM sym, void *closure)
- {
- SCM other = SCM_PACK_POINTER (closure);
- if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
- && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
- {
- if (scm_i_is_narrow_symbol (sym))
- return scm_i_is_narrow_symbol (other)
- && (strncmp (scm_i_symbol_chars (sym),
- scm_i_symbol_chars (other),
- scm_i_symbol_length (other)) == 0);
- else
- return scm_is_true
- (scm_string_equal_p (scm_symbol_to_string (sym),
- scm_symbol_to_string (other)));
- }
- return 0;
- }
-
- static SCM
- scm_i_str2symbol (SCM str)
- {
- SCM symbol;
- unsigned long raw_hash = scm_i_string_hash (str);
- symbol = lookup_interned_symbol (str, raw_hash);
- if (scm_is_true (symbol))
- return symbol;
- else
- {
- /* The symbol was not found, create it. */
- symbol = scm_i_make_symbol (str, 0, raw_hash);
- /* Might return a different symbol, if another one was interned at
- the same time. */
- return scm_c_weak_set_add_x (symbols, raw_hash,
- symbol_lookup_predicate_fn,
- SCM_UNPACK_POINTER (symbol), symbol);
- }
- }
- static SCM
- scm_i_str2uninterned_symbol (SCM str)
- {
- unsigned long raw_hash = scm_i_string_hash (str);
- return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash);
- }
- SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
- (SCM obj),
- "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
- "@code{#f}.")
- #define FUNC_NAME s_scm_symbol_p
- {
- return scm_from_bool (scm_is_symbol (obj));
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
- (SCM symbol),
- "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
- "@code{#f}.")
- #define FUNC_NAME s_scm_symbol_interned_p
- {
- SCM_VALIDATE_SYMBOL (1, symbol);
- return scm_from_bool (scm_i_symbol_is_interned (symbol));
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
- (SCM name),
- "Return a new uninterned symbol with the name @var{name}. "
- "The returned symbol is guaranteed to be unique and future "
- "calls to @code{string->symbol} will not return it.")
- #define FUNC_NAME s_scm_make_symbol
- {
- SCM_VALIDATE_STRING (1, name);
- return scm_i_str2uninterned_symbol (name);
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
- (SCM s),
- "Return the name of @var{symbol} as a string. The resulting\n"
- "string is immutable.")
- #define FUNC_NAME s_scm_symbol_to_string
- {
- SCM_VALIDATE_SYMBOL (1, s);
- return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
- (SCM string),
- "Return the symbol whose name is @var{string}.")
- #define FUNC_NAME s_scm_string_to_symbol
- {
- SCM_VALIDATE_STRING (1, string);
- return scm_i_str2symbol (string);
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
- (SCM str),
- "Return the symbol whose name is @var{str}. @var{str} is\n"
- "converted to lowercase before the conversion is done, if Guile\n"
- "is currently reading symbols case-insensitively.")
- #define FUNC_NAME s_scm_string_ci_to_symbol
- {
- return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
- ? scm_string_downcase(str)
- : str);
- }
- #undef FUNC_NAME
- /* The default prefix for `gensym'd symbols. */
- static SCM default_gensym_prefix;
- #define MAX_PREFIX_LENGTH 30
- SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
- (SCM prefix),
- "Create a new symbol with a name constructed from a prefix and\n"
- "a counter value. The string @var{prefix} can be specified as\n"
- "an optional argument. Default prefix is @code{ g}. The counter\n"
- "is increased by 1 at each call. There is no provision for\n"
- "resetting the counter.")
- #define FUNC_NAME s_scm_gensym
- {
- static int gensym_counter = 0;
-
- SCM suffix, name;
- int n, n_digits;
- char buf[SCM_INTBUFLEN];
- if (SCM_UNBNDP (prefix))
- prefix = default_gensym_prefix;
- /* mutex in case another thread looks and incs at the exact same moment */
- scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
- n = gensym_counter++;
- scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
- n_digits = scm_iint2str (n, 10, buf);
- suffix = scm_from_latin1_stringn (buf, n_digits);
- name = scm_string_append (scm_list_2 (prefix, suffix));
- return scm_string_to_symbol (name);
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
- (SCM symbol),
- "Return a hash value for @var{symbol}.")
- #define FUNC_NAME s_scm_symbol_hash
- {
- SCM_VALIDATE_SYMBOL (1, symbol);
- return scm_from_ulong (scm_i_symbol_hash (symbol));
- }
- #undef FUNC_NAME
- SCM
- scm_from_locale_symbol (const char *sym)
- {
- return scm_from_locale_symboln (sym, -1);
- }
- SCM
- scm_from_locale_symboln (const char *sym, size_t len)
- {
- SCM str = scm_from_locale_stringn (sym, len);
- return scm_i_str2symbol (str);
- }
- SCM
- scm_take_locale_symboln (char *sym, size_t len)
- {
- SCM str;
- str = scm_take_locale_stringn (sym, len);
- return scm_i_str2symbol (str);
- }
- SCM
- scm_take_locale_symbol (char *sym)
- {
- return scm_take_locale_symboln (sym, (size_t)-1);
- }
- SCM
- scm_from_latin1_symbol (const char *sym)
- {
- return scm_from_latin1_symboln (sym, -1);
- }
- SCM
- scm_from_latin1_symboln (const char *sym, size_t len)
- {
- unsigned long hash;
- SCM ret;
- if (len == (size_t) -1)
- len = strlen (sym);
- hash = scm_i_latin1_string_hash (sym, len);
- ret = lookup_interned_latin1_symbol (sym, len, hash);
- if (scm_is_false (ret))
- {
- SCM str = scm_from_latin1_stringn (sym, len);
- ret = scm_i_str2symbol (str);
- }
- return ret;
- }
- SCM
- scm_from_utf8_symbol (const char *sym)
- {
- return scm_from_utf8_symboln (sym, -1);
- }
- SCM
- scm_from_utf8_symboln (const char *sym, size_t len)
- {
- unsigned long hash;
- SCM ret;
- if (len == (size_t) -1)
- len = strlen (sym);
- hash = scm_i_utf8_string_hash (sym, len);
- ret = lookup_interned_utf8_symbol (sym, len, hash);
- if (scm_is_false (ret))
- {
- SCM str = scm_from_utf8_stringn (sym, len);
- ret = scm_i_str2symbol (str);
- }
- return ret;
- }
- void
- scm_symbols_prehistory ()
- {
- symbols = scm_c_make_weak_set (5000);
- }
- void
- scm_init_symbols ()
- {
- #include "symbols.x"
- default_gensym_prefix = scm_from_latin1_string (" g");
- }
|