123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- From: Giovanni Scafora <giovanni.archlinux.org>
- Subject: unzip files encoded with non-latin, non-unicode file names
- Last-Update: 2015-02-11
- Updated 2015-02-11 by Marc Deslauriers <marc.deslauriers@canonical.com>
- to fix buffer overflow in charset_to_intern()
- Index: unzip-6.0/unix/unix.c
- ===================================================================
- --- unzip-6.0.orig/unix/unix.c 2015-02-11 08:46:43.675324290 -0500
- +++ unzip-6.0/unix/unix.c 2015-02-11 09:18:04.902081319 -0500
- @@ -30,6 +30,9 @@
- #define UNZIP_INTERNAL
- #include "unzip.h"
-
- +#include <iconv.h>
- +#include <langinfo.h>
- +
- #ifdef SCO_XENIX
- # define SYSNDIR
- #else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */
- @@ -1874,3 +1877,102 @@
- }
- }
- #endif /* QLZIP */
- +
- +
- +typedef struct {
- + char *local_charset;
- + char *archive_charset;
- +} CHARSET_MAP;
- +
- +/* A mapping of local <-> archive charsets used by default to convert filenames
- + * of DOS/Windows Zip archives. Currently very basic. */
- +static CHARSET_MAP dos_charset_map[] = {
- + { "ANSI_X3.4-1968", "CP850" },
- + { "ISO-8859-1", "CP850" },
- + { "CP1252", "CP850" },
- + { "UTF-8", "CP866" },
- + { "KOI8-R", "CP866" },
- + { "KOI8-U", "CP866" },
- + { "ISO-8859-5", "CP866" }
- +};
- +
- +char OEM_CP[MAX_CP_NAME] = "";
- +char ISO_CP[MAX_CP_NAME] = "";
- +
- +/* Try to guess the default value of OEM_CP based on the current locale.
- + * ISO_CP is left alone for now. */
- +void init_conversion_charsets()
- +{
- + const char *local_charset;
- + int i;
- +
- + /* Make a guess only if OEM_CP not already set. */
- + if(*OEM_CP == '\0') {
- + local_charset = nl_langinfo(CODESET);
- + for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++)
- + if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) {
- + strncpy(OEM_CP, dos_charset_map[i].archive_charset,
- + sizeof(OEM_CP));
- + break;
- + }
- + }
- +}
- +
- +/* Convert a string from one encoding to the current locale using iconv().
- + * Be as non-intrusive as possible. If error is encountered during covertion
- + * just leave the string intact. */
- +static void charset_to_intern(char *string, char *from_charset)
- +{
- + iconv_t cd;
- + char *s,*d, *buf;
- + size_t slen, dlen, buflen;
- + const char *local_charset;
- +
- + if(*from_charset == '\0')
- + return;
- +
- + buf = NULL;
- + local_charset = nl_langinfo(CODESET);
- +
- + if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1)
- + return;
- +
- + slen = strlen(string);
- + s = string;
- +
- + /* Make sure OUTBUFSIZ + 1 never ends up smaller than FILNAMSIZ
- + * as this function also gets called with G.outbuf in fileio.c
- + */
- + buflen = FILNAMSIZ;
- + if (OUTBUFSIZ + 1 < FILNAMSIZ)
- + {
- + buflen = OUTBUFSIZ + 1;
- + }
- +
- + d = buf = malloc(buflen);
- + if(!d)
- + goto cleanup;
- +
- + bzero(buf,buflen);
- + dlen = buflen - 1;
- +
- + if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1)
- + goto cleanup;
- + strncpy(string, buf, buflen);
- +
- + cleanup:
- + free(buf);
- + iconv_close(cd);
- +}
- +
- +/* Convert a string from OEM_CP to the current locale charset. */
- +inline void oem_intern(char *string)
- +{
- + charset_to_intern(string, OEM_CP);
- +}
- +
- +/* Convert a string from ISO_CP to the current locale charset. */
- +inline void iso_intern(char *string)
- +{
- + charset_to_intern(string, ISO_CP);
- +}
- Index: unzip-6.0/unix/unxcfg.h
- ===================================================================
- --- unzip-6.0.orig/unix/unxcfg.h 2015-02-11 08:46:43.675324290 -0500
- +++ unzip-6.0/unix/unxcfg.h 2015-02-11 08:46:43.671324260 -0500
- @@ -228,4 +228,30 @@
- /* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */
- /* and notfirstcall are used by do_wild(). */
-
- +
- +#define MAX_CP_NAME 25
- +
- +#ifdef SETLOCALE
- +# undef SETLOCALE
- +#endif
- +#define SETLOCALE(category, locale) setlocale(category, locale)
- +#include <locale.h>
- +
- +#ifdef _ISO_INTERN
- +# undef _ISO_INTERN
- +#endif
- +#define _ISO_INTERN(str1) iso_intern(str1)
- +
- +#ifdef _OEM_INTERN
- +# undef _OEM_INTERN
- +#endif
- +#ifndef IZ_OEM2ISO_ARRAY
- +# define IZ_OEM2ISO_ARRAY
- +#endif
- +#define _OEM_INTERN(str1) oem_intern(str1)
- +
- +void iso_intern(char *);
- +void oem_intern(char *);
- +void init_conversion_charsets(void);
- +
- #endif /* !__unxcfg_h */
- Index: unzip-6.0/unzip.c
- ===================================================================
- --- unzip-6.0.orig/unzip.c 2015-02-11 08:46:43.675324290 -0500
- +++ unzip-6.0/unzip.c 2015-02-11 08:46:43.675324290 -0500
- @@ -327,11 +327,21 @@
- -2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\
- -v verbose, multi-page format\n";
-
- +#ifndef UNIX
- static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
- -h print header line -t print totals for listed files or for all\n\
- -z print zipfile comment -T print file times in sortable decimal format\
- \n -C be case-insensitive %s\
- -x exclude filenames that follow from listing\n";
- +#else /* UNIX */
- +static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
- + -h print header line -t print totals for listed files or for all\n\
- + -z print zipfile comment %c-T%c print file times in sortable decimal format\
- +\n %c-C%c be case-insensitive %s\
- + -x exclude filenames that follow from listing\n\
- + -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\
- + -I CHARSET specify a character encoding for UNIX and other archives\n";
- +#endif /* !UNIX */
- #ifdef MORE
- static ZCONST char Far ZipInfoUsageLine4[] =
- " -M page output through built-in \"more\"\n";
- @@ -664,6 +674,17 @@
- -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\
- -C match filenames case-insensitively -L make (some) names \
- lowercase\n %-42s -V retain VMS version numbers\n%s";
- +#elif (defined UNIX)
- +static ZCONST char Far UnzipUsageLine4[] = "\
- +modifiers:\n\
- + -n never overwrite existing files -q quiet mode (-qq => quieter)\n\
- + -o overwrite files WITHOUT prompting -a auto-convert any text files\n\
- + -j junk paths (do not make directories) -aa treat ALL files as text\n\
- + -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\
- + -C match filenames case-insensitively -L make (some) names \
- +lowercase\n %-42s -V retain VMS version numbers\n%s\
- + -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\
- + -I CHARSET specify a character encoding for UNIX and other archives\n\n";
- #else /* !VMS */
- static ZCONST char Far UnzipUsageLine4[] = "\
- modifiers:\n\
- @@ -802,6 +823,10 @@
- #endif /* UNICODE_SUPPORT */
-
-
- +#ifdef UNIX
- + init_conversion_charsets();
- +#endif
- +
- #if (defined(__IBMC__) && defined(__DEBUG_ALLOC__))
- extern void DebugMalloc(void);
-
- @@ -1335,6 +1360,11 @@
- argc = *pargc;
- argv = *pargv;
-
- +#ifdef UNIX
- + extern char OEM_CP[MAX_CP_NAME];
- + extern char ISO_CP[MAX_CP_NAME];
- +#endif
- +
- while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) {
- s = *argv + 1;
- while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */
- @@ -1516,6 +1546,35 @@
- }
- break;
- #endif /* MACOS */
- +#ifdef UNIX
- + case ('I'):
- + if (negative) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: encodings can't be negated"));
- + return(PK_PARAM);
- + } else {
- + if(*s) { /* Handle the -Icharset case */
- + /* Assume that charsets can't start with a dash to spot arguments misuse */
- + if(*s == '-') {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + strncpy(ISO_CP, s, sizeof(ISO_CP));
- + } else { /* -I charset */
- + ++argv;
- + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + s = *argv;
- + strncpy(ISO_CP, s, sizeof(ISO_CP));
- + }
- + while(*(++s)); /* No params straight after charset name */
- + }
- + break;
- +#endif /* ?UNIX */
- case ('j'): /* junk pathnames/directory structure */
- if (negative)
- uO.jflag = FALSE, negative = 0;
- @@ -1591,6 +1650,35 @@
- } else
- ++uO.overwrite_all;
- break;
- +#ifdef UNIX
- + case ('O'):
- + if (negative) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: encodings can't be negated"));
- + return(PK_PARAM);
- + } else {
- + if(*s) { /* Handle the -Ocharset case */
- + /* Assume that charsets can't start with a dash to spot arguments misuse */
- + if(*s == '-') {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + strncpy(OEM_CP, s, sizeof(OEM_CP));
- + } else { /* -O charset */
- + ++argv;
- + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -O argument"));
- + return(PK_PARAM);
- + }
- + s = *argv;
- + strncpy(OEM_CP, s, sizeof(OEM_CP));
- + }
- + while(*(++s)); /* No params straight after charset name */
- + }
- + break;
- +#endif /* ?UNIX */
- case ('p'): /* pipes: extract to stdout, no messages */
- if (negative) {
- uO.cflag = FALSE;
- Index: unzip-6.0/unzpriv.h
- ===================================================================
- --- unzip-6.0.orig/unzpriv.h 2015-02-11 08:46:43.675324290 -0500
- +++ unzip-6.0/unzpriv.h 2015-02-11 08:46:43.675324290 -0500
- @@ -3008,7 +3008,7 @@
- !(((islochdr) || (isuxatt)) && \
- ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \
- (hostnum) == FS_HPFS_ || \
- - ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \
- + ((hostnum) == FS_NTFS_ /* && (hostver) == 50 */ )) { \
- _OEM_INTERN((string)); \
- } else { \
- _ISO_INTERN((string)); \
- Index: unzip-6.0/zipinfo.c
- ===================================================================
- --- unzip-6.0.orig/zipinfo.c 2015-02-11 08:46:43.675324290 -0500
- +++ unzip-6.0/zipinfo.c 2015-02-11 08:46:43.675324290 -0500
- @@ -457,6 +457,10 @@
- int tflag_slm=TRUE, tflag_2v=FALSE;
- int explicit_h=FALSE, explicit_t=FALSE;
-
- +#ifdef UNIX
- + extern char OEM_CP[MAX_CP_NAME];
- + extern char ISO_CP[MAX_CP_NAME];
- +#endif
-
- #ifdef MACOS
- uO.lflag = LFLAG; /* reset default on each call */
- @@ -501,6 +505,35 @@
- uO.lflag = 0;
- }
- break;
- +#ifdef UNIX
- + case ('I'):
- + if (negative) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: encodings can't be negated"));
- + return(PK_PARAM);
- + } else {
- + if(*s) { /* Handle the -Icharset case */
- + /* Assume that charsets can't start with a dash to spot arguments misuse */
- + if(*s == '-') {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + strncpy(ISO_CP, s, sizeof(ISO_CP));
- + } else { /* -I charset */
- + ++argv;
- + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + s = *argv;
- + strncpy(ISO_CP, s, sizeof(ISO_CP));
- + }
- + while(*(++s)); /* No params straight after charset name */
- + }
- + break;
- +#endif /* ?UNIX */
- case 'l': /* longer form of "ls -l" type listing */
- if (negative)
- uO.lflag = -2, negative = 0;
- @@ -521,6 +554,35 @@
- G.M_flag = TRUE;
- break;
- #endif
- +#ifdef UNIX
- + case ('O'):
- + if (negative) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: encodings can't be negated"));
- + return(PK_PARAM);
- + } else {
- + if(*s) { /* Handle the -Ocharset case */
- + /* Assume that charsets can't start with a dash to spot arguments misuse */
- + if(*s == '-') {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -I argument"));
- + return(PK_PARAM);
- + }
- + strncpy(OEM_CP, s, sizeof(OEM_CP));
- + } else { /* -O charset */
- + ++argv;
- + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
- + Info(slide, 0x401, ((char *)slide,
- + "error: a valid character encoding should follow the -O argument"));
- + return(PK_PARAM);
- + }
- + s = *argv;
- + strncpy(OEM_CP, s, sizeof(OEM_CP));
- + }
- + while(*(++s)); /* No params straight after charset name */
- + }
- + break;
- +#endif /* ?UNIX */
- case 's': /* default: shorter "ls -l" type listing */
- if (negative)
- uO.lflag = -2, negative = 0;
|