unzip-6.0-alt-iconv-utf8.patch 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. From: Giovanni Scafora <giovanni.archlinux.org>
  2. Subject: unzip files encoded with non-latin, non-unicode file names
  3. Last-Update: 2015-02-11
  4. Updated 2015-02-11 by Marc Deslauriers <marc.deslauriers@canonical.com>
  5. to fix buffer overflow in charset_to_intern()
  6. Index: unzip-6.0/unix/unix.c
  7. ===================================================================
  8. --- unzip-6.0.orig/unix/unix.c 2015-02-11 08:46:43.675324290 -0500
  9. +++ unzip-6.0/unix/unix.c 2015-02-11 09:18:04.902081319 -0500
  10. @@ -30,6 +30,9 @@
  11. #define UNZIP_INTERNAL
  12. #include "unzip.h"
  13. +#include <iconv.h>
  14. +#include <langinfo.h>
  15. +
  16. #ifdef SCO_XENIX
  17. # define SYSNDIR
  18. #else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */
  19. @@ -1874,3 +1877,102 @@
  20. }
  21. }
  22. #endif /* QLZIP */
  23. +
  24. +
  25. +typedef struct {
  26. + char *local_charset;
  27. + char *archive_charset;
  28. +} CHARSET_MAP;
  29. +
  30. +/* A mapping of local <-> archive charsets used by default to convert filenames
  31. + * of DOS/Windows Zip archives. Currently very basic. */
  32. +static CHARSET_MAP dos_charset_map[] = {
  33. + { "ANSI_X3.4-1968", "CP850" },
  34. + { "ISO-8859-1", "CP850" },
  35. + { "CP1252", "CP850" },
  36. + { "UTF-8", "CP866" },
  37. + { "KOI8-R", "CP866" },
  38. + { "KOI8-U", "CP866" },
  39. + { "ISO-8859-5", "CP866" }
  40. +};
  41. +
  42. +char OEM_CP[MAX_CP_NAME] = "";
  43. +char ISO_CP[MAX_CP_NAME] = "";
  44. +
  45. +/* Try to guess the default value of OEM_CP based on the current locale.
  46. + * ISO_CP is left alone for now. */
  47. +void init_conversion_charsets()
  48. +{
  49. + const char *local_charset;
  50. + int i;
  51. +
  52. + /* Make a guess only if OEM_CP not already set. */
  53. + if(*OEM_CP == '\0') {
  54. + local_charset = nl_langinfo(CODESET);
  55. + for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++)
  56. + if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) {
  57. + strncpy(OEM_CP, dos_charset_map[i].archive_charset,
  58. + sizeof(OEM_CP));
  59. + break;
  60. + }
  61. + }
  62. +}
  63. +
  64. +/* Convert a string from one encoding to the current locale using iconv().
  65. + * Be as non-intrusive as possible. If error is encountered during covertion
  66. + * just leave the string intact. */
  67. +static void charset_to_intern(char *string, char *from_charset)
  68. +{
  69. + iconv_t cd;
  70. + char *s,*d, *buf;
  71. + size_t slen, dlen, buflen;
  72. + const char *local_charset;
  73. +
  74. + if(*from_charset == '\0')
  75. + return;
  76. +
  77. + buf = NULL;
  78. + local_charset = nl_langinfo(CODESET);
  79. +
  80. + if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1)
  81. + return;
  82. +
  83. + slen = strlen(string);
  84. + s = string;
  85. +
  86. + /* Make sure OUTBUFSIZ + 1 never ends up smaller than FILNAMSIZ
  87. + * as this function also gets called with G.outbuf in fileio.c
  88. + */
  89. + buflen = FILNAMSIZ;
  90. + if (OUTBUFSIZ + 1 < FILNAMSIZ)
  91. + {
  92. + buflen = OUTBUFSIZ + 1;
  93. + }
  94. +
  95. + d = buf = malloc(buflen);
  96. + if(!d)
  97. + goto cleanup;
  98. +
  99. + bzero(buf,buflen);
  100. + dlen = buflen - 1;
  101. +
  102. + if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1)
  103. + goto cleanup;
  104. + strncpy(string, buf, buflen);
  105. +
  106. + cleanup:
  107. + free(buf);
  108. + iconv_close(cd);
  109. +}
  110. +
  111. +/* Convert a string from OEM_CP to the current locale charset. */
  112. +inline void oem_intern(char *string)
  113. +{
  114. + charset_to_intern(string, OEM_CP);
  115. +}
  116. +
  117. +/* Convert a string from ISO_CP to the current locale charset. */
  118. +inline void iso_intern(char *string)
  119. +{
  120. + charset_to_intern(string, ISO_CP);
  121. +}
  122. Index: unzip-6.0/unix/unxcfg.h
  123. ===================================================================
  124. --- unzip-6.0.orig/unix/unxcfg.h 2015-02-11 08:46:43.675324290 -0500
  125. +++ unzip-6.0/unix/unxcfg.h 2015-02-11 08:46:43.671324260 -0500
  126. @@ -228,4 +228,30 @@
  127. /* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */
  128. /* and notfirstcall are used by do_wild(). */
  129. +
  130. +#define MAX_CP_NAME 25
  131. +
  132. +#ifdef SETLOCALE
  133. +# undef SETLOCALE
  134. +#endif
  135. +#define SETLOCALE(category, locale) setlocale(category, locale)
  136. +#include <locale.h>
  137. +
  138. +#ifdef _ISO_INTERN
  139. +# undef _ISO_INTERN
  140. +#endif
  141. +#define _ISO_INTERN(str1) iso_intern(str1)
  142. +
  143. +#ifdef _OEM_INTERN
  144. +# undef _OEM_INTERN
  145. +#endif
  146. +#ifndef IZ_OEM2ISO_ARRAY
  147. +# define IZ_OEM2ISO_ARRAY
  148. +#endif
  149. +#define _OEM_INTERN(str1) oem_intern(str1)
  150. +
  151. +void iso_intern(char *);
  152. +void oem_intern(char *);
  153. +void init_conversion_charsets(void);
  154. +
  155. #endif /* !__unxcfg_h */
  156. Index: unzip-6.0/unzip.c
  157. ===================================================================
  158. --- unzip-6.0.orig/unzip.c 2015-02-11 08:46:43.675324290 -0500
  159. +++ unzip-6.0/unzip.c 2015-02-11 08:46:43.675324290 -0500
  160. @@ -327,11 +327,21 @@
  161. -2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\
  162. -v verbose, multi-page format\n";
  163. +#ifndef UNIX
  164. static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
  165. -h print header line -t print totals for listed files or for all\n\
  166. -z print zipfile comment -T print file times in sortable decimal format\
  167. \n -C be case-insensitive %s\
  168. -x exclude filenames that follow from listing\n";
  169. +#else /* UNIX */
  170. +static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
  171. + -h print header line -t print totals for listed files or for all\n\
  172. + -z print zipfile comment %c-T%c print file times in sortable decimal format\
  173. +\n %c-C%c be case-insensitive %s\
  174. + -x exclude filenames that follow from listing\n\
  175. + -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\
  176. + -I CHARSET specify a character encoding for UNIX and other archives\n";
  177. +#endif /* !UNIX */
  178. #ifdef MORE
  179. static ZCONST char Far ZipInfoUsageLine4[] =
  180. " -M page output through built-in \"more\"\n";
  181. @@ -664,6 +674,17 @@
  182. -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\
  183. -C match filenames case-insensitively -L make (some) names \
  184. lowercase\n %-42s -V retain VMS version numbers\n%s";
  185. +#elif (defined UNIX)
  186. +static ZCONST char Far UnzipUsageLine4[] = "\
  187. +modifiers:\n\
  188. + -n never overwrite existing files -q quiet mode (-qq => quieter)\n\
  189. + -o overwrite files WITHOUT prompting -a auto-convert any text files\n\
  190. + -j junk paths (do not make directories) -aa treat ALL files as text\n\
  191. + -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\
  192. + -C match filenames case-insensitively -L make (some) names \
  193. +lowercase\n %-42s -V retain VMS version numbers\n%s\
  194. + -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\
  195. + -I CHARSET specify a character encoding for UNIX and other archives\n\n";
  196. #else /* !VMS */
  197. static ZCONST char Far UnzipUsageLine4[] = "\
  198. modifiers:\n\
  199. @@ -802,6 +823,10 @@
  200. #endif /* UNICODE_SUPPORT */
  201. +#ifdef UNIX
  202. + init_conversion_charsets();
  203. +#endif
  204. +
  205. #if (defined(__IBMC__) && defined(__DEBUG_ALLOC__))
  206. extern void DebugMalloc(void);
  207. @@ -1335,6 +1360,11 @@
  208. argc = *pargc;
  209. argv = *pargv;
  210. +#ifdef UNIX
  211. + extern char OEM_CP[MAX_CP_NAME];
  212. + extern char ISO_CP[MAX_CP_NAME];
  213. +#endif
  214. +
  215. while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) {
  216. s = *argv + 1;
  217. while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */
  218. @@ -1516,6 +1546,35 @@
  219. }
  220. break;
  221. #endif /* MACOS */
  222. +#ifdef UNIX
  223. + case ('I'):
  224. + if (negative) {
  225. + Info(slide, 0x401, ((char *)slide,
  226. + "error: encodings can't be negated"));
  227. + return(PK_PARAM);
  228. + } else {
  229. + if(*s) { /* Handle the -Icharset case */
  230. + /* Assume that charsets can't start with a dash to spot arguments misuse */
  231. + if(*s == '-') {
  232. + Info(slide, 0x401, ((char *)slide,
  233. + "error: a valid character encoding should follow the -I argument"));
  234. + return(PK_PARAM);
  235. + }
  236. + strncpy(ISO_CP, s, sizeof(ISO_CP));
  237. + } else { /* -I charset */
  238. + ++argv;
  239. + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
  240. + Info(slide, 0x401, ((char *)slide,
  241. + "error: a valid character encoding should follow the -I argument"));
  242. + return(PK_PARAM);
  243. + }
  244. + s = *argv;
  245. + strncpy(ISO_CP, s, sizeof(ISO_CP));
  246. + }
  247. + while(*(++s)); /* No params straight after charset name */
  248. + }
  249. + break;
  250. +#endif /* ?UNIX */
  251. case ('j'): /* junk pathnames/directory structure */
  252. if (negative)
  253. uO.jflag = FALSE, negative = 0;
  254. @@ -1591,6 +1650,35 @@
  255. } else
  256. ++uO.overwrite_all;
  257. break;
  258. +#ifdef UNIX
  259. + case ('O'):
  260. + if (negative) {
  261. + Info(slide, 0x401, ((char *)slide,
  262. + "error: encodings can't be negated"));
  263. + return(PK_PARAM);
  264. + } else {
  265. + if(*s) { /* Handle the -Ocharset case */
  266. + /* Assume that charsets can't start with a dash to spot arguments misuse */
  267. + if(*s == '-') {
  268. + Info(slide, 0x401, ((char *)slide,
  269. + "error: a valid character encoding should follow the -I argument"));
  270. + return(PK_PARAM);
  271. + }
  272. + strncpy(OEM_CP, s, sizeof(OEM_CP));
  273. + } else { /* -O charset */
  274. + ++argv;
  275. + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
  276. + Info(slide, 0x401, ((char *)slide,
  277. + "error: a valid character encoding should follow the -O argument"));
  278. + return(PK_PARAM);
  279. + }
  280. + s = *argv;
  281. + strncpy(OEM_CP, s, sizeof(OEM_CP));
  282. + }
  283. + while(*(++s)); /* No params straight after charset name */
  284. + }
  285. + break;
  286. +#endif /* ?UNIX */
  287. case ('p'): /* pipes: extract to stdout, no messages */
  288. if (negative) {
  289. uO.cflag = FALSE;
  290. Index: unzip-6.0/unzpriv.h
  291. ===================================================================
  292. --- unzip-6.0.orig/unzpriv.h 2015-02-11 08:46:43.675324290 -0500
  293. +++ unzip-6.0/unzpriv.h 2015-02-11 08:46:43.675324290 -0500
  294. @@ -3008,7 +3008,7 @@
  295. !(((islochdr) || (isuxatt)) && \
  296. ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \
  297. (hostnum) == FS_HPFS_ || \
  298. - ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \
  299. + ((hostnum) == FS_NTFS_ /* && (hostver) == 50 */ )) { \
  300. _OEM_INTERN((string)); \
  301. } else { \
  302. _ISO_INTERN((string)); \
  303. Index: unzip-6.0/zipinfo.c
  304. ===================================================================
  305. --- unzip-6.0.orig/zipinfo.c 2015-02-11 08:46:43.675324290 -0500
  306. +++ unzip-6.0/zipinfo.c 2015-02-11 08:46:43.675324290 -0500
  307. @@ -457,6 +457,10 @@
  308. int tflag_slm=TRUE, tflag_2v=FALSE;
  309. int explicit_h=FALSE, explicit_t=FALSE;
  310. +#ifdef UNIX
  311. + extern char OEM_CP[MAX_CP_NAME];
  312. + extern char ISO_CP[MAX_CP_NAME];
  313. +#endif
  314. #ifdef MACOS
  315. uO.lflag = LFLAG; /* reset default on each call */
  316. @@ -501,6 +505,35 @@
  317. uO.lflag = 0;
  318. }
  319. break;
  320. +#ifdef UNIX
  321. + case ('I'):
  322. + if (negative) {
  323. + Info(slide, 0x401, ((char *)slide,
  324. + "error: encodings can't be negated"));
  325. + return(PK_PARAM);
  326. + } else {
  327. + if(*s) { /* Handle the -Icharset case */
  328. + /* Assume that charsets can't start with a dash to spot arguments misuse */
  329. + if(*s == '-') {
  330. + Info(slide, 0x401, ((char *)slide,
  331. + "error: a valid character encoding should follow the -I argument"));
  332. + return(PK_PARAM);
  333. + }
  334. + strncpy(ISO_CP, s, sizeof(ISO_CP));
  335. + } else { /* -I charset */
  336. + ++argv;
  337. + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
  338. + Info(slide, 0x401, ((char *)slide,
  339. + "error: a valid character encoding should follow the -I argument"));
  340. + return(PK_PARAM);
  341. + }
  342. + s = *argv;
  343. + strncpy(ISO_CP, s, sizeof(ISO_CP));
  344. + }
  345. + while(*(++s)); /* No params straight after charset name */
  346. + }
  347. + break;
  348. +#endif /* ?UNIX */
  349. case 'l': /* longer form of "ls -l" type listing */
  350. if (negative)
  351. uO.lflag = -2, negative = 0;
  352. @@ -521,6 +554,35 @@
  353. G.M_flag = TRUE;
  354. break;
  355. #endif
  356. +#ifdef UNIX
  357. + case ('O'):
  358. + if (negative) {
  359. + Info(slide, 0x401, ((char *)slide,
  360. + "error: encodings can't be negated"));
  361. + return(PK_PARAM);
  362. + } else {
  363. + if(*s) { /* Handle the -Ocharset case */
  364. + /* Assume that charsets can't start with a dash to spot arguments misuse */
  365. + if(*s == '-') {
  366. + Info(slide, 0x401, ((char *)slide,
  367. + "error: a valid character encoding should follow the -I argument"));
  368. + return(PK_PARAM);
  369. + }
  370. + strncpy(OEM_CP, s, sizeof(OEM_CP));
  371. + } else { /* -O charset */
  372. + ++argv;
  373. + if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
  374. + Info(slide, 0x401, ((char *)slide,
  375. + "error: a valid character encoding should follow the -O argument"));
  376. + return(PK_PARAM);
  377. + }
  378. + s = *argv;
  379. + strncpy(OEM_CP, s, sizeof(OEM_CP));
  380. + }
  381. + while(*(++s)); /* No params straight after charset name */
  382. + }
  383. + break;
  384. +#endif /* ?UNIX */
  385. case 's': /* default: shorter "ls -l" type listing */
  386. if (negative)
  387. uO.lflag = -2, negative = 0;