123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382 |
- From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
- From: Petr Stodulka <pstodulk@redhat.com>
- Date: Tue, 24 Nov 2015 17:56:11 +0100
- Subject: [PATCH] print correctly non-ascii filenames
- ---
- extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
- unzpriv.h | 7 ++
- 2 files changed, 233 insertions(+), 63 deletions(-)
- diff --git a/extract.c b/extract.c
- index 0ee4e93..741b7e0 100644
- --- a/extract.c
- +++ b/extract.c
- @@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
- } /* end function set_deferred_symlink() */
- #endif /* SYMLINKS */
-
- +/*
- + * If Unicode is supported, assume we have what we need to do this
- + * check using wide characters, avoiding MBCS issues.
- + */
-
- -
- +#ifndef UZ_FNFILTER_REPLACECHAR
- + /* A convenient choice for the replacement of unprintable char codes is
- + * the "single char wildcard", as this character is quite unlikely to
- + * appear in filenames by itself. The following default definition
- + * sets the replacement char to a question mark as the most common
- + * "single char wildcard"; this setting should be overridden in the
- + * appropiate system-specific configuration header when needed.
- + */
- +# define UZ_FNFILTER_REPLACECHAR '?'
- +#endif
-
- /*************************/
- /* Function fnfilter() */ /* here instead of in list.c for SFX */
- @@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
- extent size;
- {
- #ifndef NATIVE /* ASCII: filter ANSI escape codes, etc. */
- - ZCONST uch *r=(ZCONST uch *)raw;
- + ZCONST uch *r; // =(ZCONST uch *)raw;
- uch *s=space;
- uch *slim=NULL;
- uch *se=NULL;
- int have_overflow = FALSE;
-
- - if (size > 0) {
- - slim = space + size
- -#ifdef _MBCS
- - - (MB_CUR_MAX - 1)
- -#endif
- - - 4;
- +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
- +/* If Unicode support is enabled, and we have multi-byte characters,
- + * then do the isprint() checks by first converting to wide characters
- + * and checking those. This avoids our having to parse multi-byte
- + * characters for ourselves. After the wide-char replacements have been
- + * made, the wide string is converted back to the local character set.
- + */
- + wchar_t *wstring; /* wchar_t version of raw */
- + size_t wslen; /* length of wstring */
- + wchar_t *wostring; /* wchar_t version of output string */
- + size_t woslen; /* length of wostring */
- + char *newraw; /* new raw */
- +
- + /* 2012-11-06 SMS.
- + * Changed to check the value returned by mbstowcs(), and bypass the
- + * Unicode processing if it fails. This seems to fix a problem
- + * reported in the SourceForge forum, but it's not clear that we
- + * should be doing any Unicode processing without some evidence that
- + * the name actually is Unicode. (Check bit 11 in the flags before
- + * coming here?)
- + * http://sourceforge.net/p/infozip/bugs/40/
- + */
- +
- + if (MB_CUR_MAX <= 1)
- + {
- + /* There's no point to converting multi-byte chars if there are
- + * no multi-byte chars.
- + */
- + wslen = (size_t)-1;
- }
- - while (*r) {
- - if (size > 0 && s >= slim && se == NULL) {
- - se = s;
- + else
- + {
- + /* Get Unicode wide character count (for storage allocation). */
- + wslen = mbstowcs( NULL, raw, 0);
- + }
- +
- + if (wslen != (size_t)-1)
- + {
- + /* Apparently valid Unicode. Allocate wide-char storage. */
- + wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
- + if (wstring == NULL) {
- + strcpy( (char *)space, raw);
- + return (char *)space;
- }
- -#ifdef QDOS
- - if (qlflag & 2) {
- - if (*r == '/' || *r == '.') {
- + wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
- + if (wostring == NULL) {
- + free(wstring);
- + strcpy( (char *)space, raw);
- + return (char *)space;
- + }
- +
- + /* Convert the multi-byte Unicode to wide chars. */
- + wslen = mbstowcs(wstring, raw, wslen + 1);
- +
- + /* Filter the wide-character string. */
- + fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
- +
- + /* Convert filtered wide chars back to multi-byte. */
- + woslen = wcstombs( NULL, wostring, 0);
- + if ((newraw = malloc(woslen + 1)) == NULL) {
- + free(wstring);
- + free(wostring);
- + strcpy( (char *)space, raw);
- + return (char *)space;
- + }
- + woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
- +
- + if (size > 0) {
- + slim = space + size - 4;
- + }
- + r = (ZCONST uch *)newraw;
- + while (*r) {
- + if (size > 0 && s >= slim && se == NULL) {
- + se = s;
- + }
- +# ifdef QDOS
- + if (qlflag & 2) {
- + if (*r == '/' || *r == '.') {
- + if (se != NULL && (s > (space + (size-3)))) {
- + have_overflow = TRUE;
- + break;
- + }
- + ++r;
- + *s++ = '_';
- + continue;
- + }
- + } else
- +# endif
- + {
- if (se != NULL && (s > (space + (size-3)))) {
- have_overflow = TRUE;
- break;
- }
- - ++r;
- - *s++ = '_';
- - continue;
- + *s++ = *r++;
- }
- - } else
- + }
- + if (have_overflow) {
- + strcpy((char *)se, "...");
- + } else {
- + *s = '\0';
- + }
- +
- + free(wstring);
- + free(wostring);
- + free(newraw);
- + }
- + else
- +# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
- + {
- + /* No Unicode support, or apparently invalid Unicode. */
- + r = (ZCONST uch *)raw;
- +
- + if (size > 0) {
- + slim = space + size
- +#ifdef _MBCS
- + - (MB_CUR_MAX - 1)
- +#endif
- + - 4;
- + }
- + while (*r) {
- + if (size > 0 && s >= slim && se == NULL) {
- + se = s;
- + }
- +#ifdef QDOS
- + if (qlflag & 2) {
- + if (*r == '/' || *r == '.') {
- + if (se != NULL && (s > (space + (size-3)))) {
- + have_overflow = TRUE;
- + break;
- + }
- + ++r;
- + *s++ = '_';
- + continue;
- + }
- + } else
- #endif
- #ifdef HAVE_WORKING_ISPRINT
- -# ifndef UZ_FNFILTER_REPLACECHAR
- - /* A convenient choice for the replacement of unprintable char codes is
- - * the "single char wildcard", as this character is quite unlikely to
- - * appear in filenames by itself. The following default definition
- - * sets the replacement char to a question mark as the most common
- - * "single char wildcard"; this setting should be overridden in the
- - * appropiate system-specific configuration header when needed.
- - */
- -# define UZ_FNFILTER_REPLACECHAR '?'
- -# endif
- - if (!isprint(*r)) {
- + if (!isprint(*r)) {
- + if (*r < 32) {
- + /* ASCII control codes are escaped as "^{letter}". */
- + if (se != NULL && (s > (space + (size-4)))) {
- + have_overflow = TRUE;
- + break;
- + }
- + *s++ = '^', *s++ = (uch)(64 + *r++);
- + } else {
- + /* Other unprintable codes are replaced by the
- + * placeholder character. */
- + if (se != NULL && (s > (space + (size-3)))) {
- + have_overflow = TRUE;
- + break;
- + }
- + *s++ = UZ_FNFILTER_REPLACECHAR;
- + INCSTR(r);
- + }
- +#else /* !HAVE_WORKING_ISPRINT */
- if (*r < 32) {
- /* ASCII control codes are escaped as "^{letter}". */
- if (se != NULL && (s > (space + (size-4)))) {
- @@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
- break;
- }
- *s++ = '^', *s++ = (uch)(64 + *r++);
- +#endif /* ?HAVE_WORKING_ISPRINT */
- } else {
- - /* Other unprintable codes are replaced by the
- - * placeholder character. */
- +#ifdef _MBCS
- + unsigned i = CLEN(r);
- + if (se != NULL && (s > (space + (size-i-2)))) {
- + have_overflow = TRUE;
- + break;
- + }
- + for (; i > 0; i--)
- + *s++ = *r++;
- +#else
- if (se != NULL && (s > (space + (size-3)))) {
- have_overflow = TRUE;
- break;
- }
- - *s++ = UZ_FNFILTER_REPLACECHAR;
- - INCSTR(r);
- - }
- -#else /* !HAVE_WORKING_ISPRINT */
- - if (*r < 32) {
- - /* ASCII control codes are escaped as "^{letter}". */
- - if (se != NULL && (s > (space + (size-4)))) {
- - have_overflow = TRUE;
- - break;
- - }
- - *s++ = '^', *s++ = (uch)(64 + *r++);
- -#endif /* ?HAVE_WORKING_ISPRINT */
- - } else {
- -#ifdef _MBCS
- - unsigned i = CLEN(r);
- - if (se != NULL && (s > (space + (size-i-2)))) {
- - have_overflow = TRUE;
- - break;
- - }
- - for (; i > 0; i--)
- *s++ = *r++;
- -#else
- - if (se != NULL && (s > (space + (size-3)))) {
- - have_overflow = TRUE;
- - break;
- - }
- - *s++ = *r++;
- #endif
- - }
- - }
- - if (have_overflow) {
- - strcpy((char *)se, "...");
- - } else {
- - *s = '\0';
- + }
- + }
- + if (have_overflow) {
- + strcpy((char *)se, "...");
- + } else {
- + *s = '\0';
- + }
- }
-
- #ifdef WINDLL
- @@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
- } /* end function fnfilter() */
-
-
- +#if defined( UNICODE_SUPPORT) && defined( _MBCS)
- +
- +/****************************/
- +/* Function fnfilter[w]() */ /* (Here instead of in list.c for SFX.) */
- +/****************************/
- +
- +/* fnfilterw() - Convert wide name to safely printable form. */
- +
- +/* fnfilterw() - Convert wide-character name to safely printable form. */
- +
- +wchar_t *fnfilterw( src, dst, siz)
- + ZCONST wchar_t *src; /* Pointer to source char (string). */
- + wchar_t *dst; /* Pointer to destination char (string). */
- + extent siz; /* Not used (!). */
- +{
- + wchar_t *dsx = dst;
- +
- + /* Filter the wide chars. */
- + while (*src)
- + {
- + if (iswprint( *src))
- + {
- + /* Printable code. Copy it. */
- + *dst++ = *src;
- + }
- + else
- + {
- + /* Unprintable code. Substitute something printable for it. */
- + if (*src < 32)
- + {
- + /* Replace ASCII control code with "^{letter}". */
- + *dst++ = (wchar_t)'^';
- + *dst++ = (wchar_t)(64 + *src);
- + }
- + else
- + {
- + /* Replace other unprintable code with the placeholder. */
- + *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
- + }
- + }
- + src++;
- + }
- + *dst = (wchar_t)0; /* NUL-terminate the destination string. */
- + return dsx;
- +} /* fnfilterw(). */
- +
- +#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
-
-
- #ifdef SET_DIR_ATTRIB
- diff --git a/unzpriv.h b/unzpriv.h
- index 22d3923..e48a652 100644
- --- a/unzpriv.h
- +++ b/unzpriv.h
- @@ -1212,6 +1212,7 @@
- # ifdef UNICODE_WCHAR
- # if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
- # include <wchar.h>
- +# include <wctype.h>
- # endif
- # endif
- # ifndef _MBCS /* no need to include <locale.h> twice, see below */
- @@ -2410,6 +2411,12 @@ int memflush OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
- char *fnfilter OF((ZCONST char *raw, uch *space,
- extent size));
-
- +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
- +wchar_t *fnfilterw OF((ZCONST wchar_t *src, wchar_t *dst,
- + extent siz));
- +#endif
- +
- +
- /*---------------------------------------------------------------------------
- Decompression functions:
- ---------------------------------------------------------------------------*/
- --
- 2.4.3
|