123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- /*
- * Copyright (c) 2010 Ævar Arnfjörð Bjarmason
- */
- #include "cache.h"
- #include "exec-cmd.h"
- #include "gettext.h"
- #include "strbuf.h"
- #include "utf8.h"
- #include "config.h"
- #ifndef NO_GETTEXT
- # include <locale.h>
- # include <libintl.h>
- # ifdef GIT_WINDOWS_NATIVE
- static const char *locale_charset(void)
- {
- const char *env = getenv("LC_ALL"), *dot;
- if (!env || !*env)
- env = getenv("LC_CTYPE");
- if (!env || !*env)
- env = getenv("LANG");
- if (!env)
- return "UTF-8";
- dot = strchr(env, '.');
- return !dot ? env : dot + 1;
- }
- # elif defined HAVE_LIBCHARSET_H
- # include <libcharset.h>
- # else
- # include <langinfo.h>
- # define locale_charset() nl_langinfo(CODESET)
- # endif
- #endif
- static const char *charset;
- /*
- * Guess the user's preferred languages from the value in LANGUAGE environment
- * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
- *
- * The result can be a colon-separated list like "ko:ja:en".
- */
- const char *get_preferred_languages(void)
- {
- const char *retval;
- retval = getenv("LANGUAGE");
- if (retval && *retval)
- return retval;
- #ifndef NO_GETTEXT
- retval = setlocale(LC_MESSAGES, NULL);
- if (retval && *retval &&
- strcmp(retval, "C") &&
- strcmp(retval, "POSIX"))
- return retval;
- #endif
- return NULL;
- }
- int use_gettext_poison(void)
- {
- static int poison_requested = -1;
- if (poison_requested == -1)
- poison_requested = git_env_bool("GIT_TEST_GETTEXT_POISON", 0);
- return poison_requested;
- }
- #ifndef NO_GETTEXT
- static int test_vsnprintf(const char *fmt, ...)
- {
- char buf[26];
- int ret;
- va_list ap;
- va_start(ap, fmt);
- ret = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- return ret;
- }
- static void init_gettext_charset(const char *domain)
- {
- /*
- This trick arranges for messages to be emitted in the user's
- requested encoding, but avoids setting LC_CTYPE from the
- environment for the whole program.
- This primarily done to avoid a bug in vsnprintf in the GNU C
- Library [1]. which triggered a "your vsnprintf is broken" error
- on Git's own repository when inspecting v0.99.6~1 under a UTF-8
- locale.
- That commit contains a ISO-8859-1 encoded author name, which
- the locale aware vsnprintf(3) won't interpolate in the format
- argument, due to mismatch between the data encoding and the
- locale.
- Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
- this point, because it'd require auditing all the code that uses C
- functions whose semantics are modified by LC_CTYPE.
- But only setting LC_MESSAGES as we do creates a problem, since
- we declare the encoding of our PO files[2] the gettext
- implementation will try to recode it to the user's locale, but
- without LC_CTYPE it'll emit something like this on 'git init'
- under the Icelandic locale:
- Bj? til t?ma Git lind ? /hlagh/.git/
- Gettext knows about the encoding of our PO file, but we haven't
- told it about the user's encoding, so all the non-US-ASCII
- characters get encoded to question marks.
- But we're in luck! We can set LC_CTYPE from the environment
- only while we call nl_langinfo and
- bind_textdomain_codeset. That suffices to tell gettext what
- encoding it should emit in, so it'll now say:
- Bjó til tóma Git lind í /hlagh/.git/
- And the equivalent ISO-8859-1 string will be emitted under a
- ISO-8859-1 locale.
- With this change way we get the advantages of setting LC_CTYPE
- (talk to the user in his language/encoding), without the major
- drawbacks (changed semantics for C functions we rely on).
- However foreign functions using other message catalogs that
- aren't using our neat trick will still have a problem, e.g. if
- we have to call perror(3):
- #include <stdio.h>
- #include <locale.h>
- #include <errno.h>
- int main(void)
- {
- setlocale(LC_MESSAGES, "");
- setlocale(LC_CTYPE, "C");
- errno = ENODEV;
- perror("test");
- return 0;
- }
- Running that will give you a message with question marks:
- $ LANGUAGE= LANG=de_DE.utf8 ./test
- test: Kein passendes Ger?t gefunden
- The vsnprintf bug has been fixed since glibc 2.17.
- Then we could simply set LC_CTYPE from the environment, which would
- make things like the external perror(3) messages work.
- See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
- regression tests.
- 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
- 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
- */
- setlocale(LC_CTYPE, "");
- charset = locale_charset();
- bind_textdomain_codeset(domain, charset);
- /* the string is taken from v0.99.6~1 */
- if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
- setlocale(LC_CTYPE, "C");
- }
- void git_setup_gettext(void)
- {
- const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT);
- char *p = NULL;
- if (!podir)
- podir = p = system_path(GIT_LOCALE_PATH);
- use_gettext_poison(); /* getenv() reentrancy paranoia */
- if (!is_directory(podir)) {
- free(p);
- return;
- }
- bindtextdomain("git", podir);
- setlocale(LC_MESSAGES, "");
- setlocale(LC_TIME, "");
- init_gettext_charset("git");
- textdomain("git");
- free(p);
- }
- /* return the number of columns of string 's' in current locale */
- int gettext_width(const char *s)
- {
- static int is_utf8 = -1;
- if (is_utf8 == -1)
- is_utf8 = is_utf8_locale();
- return is_utf8 ? utf8_strwidth(s) : strlen(s);
- }
- #endif
- int is_utf8_locale(void)
- {
- #ifdef NO_GETTEXT
- if (!charset) {
- const char *env = getenv("LC_ALL");
- if (!env || !*env)
- env = getenv("LC_CTYPE");
- if (!env || !*env)
- env = getenv("LANG");
- if (!env)
- env = "";
- if (strchr(env, '.'))
- env = strchr(env, '.') + 1;
- charset = xstrdup(env);
- }
- #endif
- return is_encoding_utf8(charset);
- }
|