localcharset.c.diff 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. Add a variant of `locale_charset' that returns its result based solely on
  2. information from the environment. See
  3. http://lists.gnu.org/archive/html/guile-devel/2011-11/msg00040.html for the
  4. rationale.
  5. --- a/lib/localcharset.c
  6. +++ b/lib/localcharset.c
  7. @@ -544,3 +544,120 @@ locale_charset (void)
  8. return codeset;
  9. }
  10. +
  11. +/* A variant of the above, without calls to `setlocale', `nl_langinfo',
  12. + etc. */
  13. +const char *
  14. +environ_locale_charset (void)
  15. +{
  16. + static char buf[2 + 10 + 1];
  17. + const char *codeset, *aliases;
  18. + const char *locale = NULL;
  19. +
  20. + locale = getenv ("LC_ALL");
  21. + if (locale == NULL || locale[0] == '\0')
  22. + {
  23. + locale = getenv ("LC_CTYPE");
  24. + if (locale == NULL || locale[0] == '\0')
  25. + locale = getenv ("LANG");
  26. + }
  27. +
  28. + if (locale != NULL && locale[0] != '\0')
  29. + {
  30. + /* If the locale name contains an encoding after the dot, return it. */
  31. + const char *dot = strchr (locale, '.');
  32. +
  33. + if (dot != NULL)
  34. + {
  35. + const char *modifier;
  36. +
  37. + dot++;
  38. + /* Look for the possible @... trailer and remove it, if any. */
  39. + modifier = strchr (dot, '@');
  40. + if (modifier == NULL)
  41. + return dot;
  42. + if (modifier - dot < sizeof (buf))
  43. + {
  44. + memcpy (buf, dot, modifier - dot);
  45. + buf [modifier - dot] = '\0';
  46. + return buf;
  47. + }
  48. + }
  49. + else if (strcmp (locale, "C") == 0)
  50. + {
  51. + strcpy (buf, "ASCII");
  52. + return buf;
  53. + }
  54. + else
  55. + codeset = "";
  56. + }
  57. + else
  58. + codeset = "";
  59. +
  60. + /* Resolve alias. */
  61. + {
  62. +# ifdef alias_table_defined
  63. + /* On some platforms, UTF-8 locales are the most frequently used ones.
  64. + Speed up the common case and slow down the less common cases by
  65. + testing for this case first. */
  66. +# if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
  67. + if (strcmp (codeset, "UTF-8") == 0)
  68. + goto done_table_lookup;
  69. + else
  70. +# endif
  71. + {
  72. + const struct table_entry * const table = alias_table;
  73. + size_t const table_size =
  74. + sizeof (alias_table) / sizeof (struct table_entry);
  75. + /* The table is sorted. Perform a binary search. */
  76. + size_t hi = table_size;
  77. + size_t lo = 0;
  78. + while (lo < hi)
  79. + {
  80. + /* Invariant:
  81. + for i < lo, strcmp (table[i].alias, codeset) < 0,
  82. + for i >= hi, strcmp (table[i].alias, codeset) > 0. */
  83. + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
  84. + int cmp = strcmp (table[mid].alias, codeset);
  85. + if (cmp < 0)
  86. + lo = mid + 1;
  87. + else if (cmp > 0)
  88. + hi = mid;
  89. + else
  90. + {
  91. + /* Found an i with
  92. + strcmp (table[i].alias, codeset) == 0. */
  93. + codeset = table[mid].canonical;
  94. + goto done_table_lookup;
  95. + }
  96. + }
  97. + }
  98. + if (0)
  99. + done_table_lookup: ;
  100. + else
  101. +# endif
  102. + {
  103. + /* Did not find it in the table. */
  104. + /* On Mac OS X, all modern locales use the UTF-8 encoding.
  105. + BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
  106. +# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
  107. + codeset = "UTF-8";
  108. +# else
  109. + /* Don't return an empty string. GNU libc and GNU libiconv interpret
  110. + the empty string as denoting "the locale's character encoding",
  111. + thus GNU libiconv would call this function a second time. */
  112. + if (codeset[0] == '\0')
  113. + codeset = "ASCII";
  114. +# endif
  115. + }
  116. + }
  117. +
  118. + /* Don't return an empty string. GNU libc and GNU libiconv interpret
  119. + the empty string as denoting "the locale's character encoding",
  120. + thus GNU libiconv would call this function a second time. */
  121. + if (codeset[0] == '\0')
  122. + /* Default to Latin-1, for backward compatibility with Guile 1.8. */
  123. + codeset = "ISO-8859-1";
  124. +
  125. + return codeset;
  126. +}