unzip-6.0-alt-iconv-utf8-print.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
  2. From: Petr Stodulka <pstodulk@redhat.com>
  3. Date: Tue, 24 Nov 2015 17:56:11 +0100
  4. Subject: [PATCH] print correctly non-ascii filenames
  5. ---
  6. extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
  7. unzpriv.h | 7 ++
  8. 2 files changed, 233 insertions(+), 63 deletions(-)
  9. diff --git a/extract.c b/extract.c
  10. index 0ee4e93..741b7e0 100644
  11. --- a/extract.c
  12. +++ b/extract.c
  13. @@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
  14. } /* end function set_deferred_symlink() */
  15. #endif /* SYMLINKS */
  16. +/*
  17. + * If Unicode is supported, assume we have what we need to do this
  18. + * check using wide characters, avoiding MBCS issues.
  19. + */
  20. -
  21. +#ifndef UZ_FNFILTER_REPLACECHAR
  22. + /* A convenient choice for the replacement of unprintable char codes is
  23. + * the "single char wildcard", as this character is quite unlikely to
  24. + * appear in filenames by itself. The following default definition
  25. + * sets the replacement char to a question mark as the most common
  26. + * "single char wildcard"; this setting should be overridden in the
  27. + * appropiate system-specific configuration header when needed.
  28. + */
  29. +# define UZ_FNFILTER_REPLACECHAR '?'
  30. +#endif
  31. /*************************/
  32. /* Function fnfilter() */ /* here instead of in list.c for SFX */
  33. @@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
  34. extent size;
  35. {
  36. #ifndef NATIVE /* ASCII: filter ANSI escape codes, etc. */
  37. - ZCONST uch *r=(ZCONST uch *)raw;
  38. + ZCONST uch *r; // =(ZCONST uch *)raw;
  39. uch *s=space;
  40. uch *slim=NULL;
  41. uch *se=NULL;
  42. int have_overflow = FALSE;
  43. - if (size > 0) {
  44. - slim = space + size
  45. -#ifdef _MBCS
  46. - - (MB_CUR_MAX - 1)
  47. -#endif
  48. - - 4;
  49. +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
  50. +/* If Unicode support is enabled, and we have multi-byte characters,
  51. + * then do the isprint() checks by first converting to wide characters
  52. + * and checking those. This avoids our having to parse multi-byte
  53. + * characters for ourselves. After the wide-char replacements have been
  54. + * made, the wide string is converted back to the local character set.
  55. + */
  56. + wchar_t *wstring; /* wchar_t version of raw */
  57. + size_t wslen; /* length of wstring */
  58. + wchar_t *wostring; /* wchar_t version of output string */
  59. + size_t woslen; /* length of wostring */
  60. + char *newraw; /* new raw */
  61. +
  62. + /* 2012-11-06 SMS.
  63. + * Changed to check the value returned by mbstowcs(), and bypass the
  64. + * Unicode processing if it fails. This seems to fix a problem
  65. + * reported in the SourceForge forum, but it's not clear that we
  66. + * should be doing any Unicode processing without some evidence that
  67. + * the name actually is Unicode. (Check bit 11 in the flags before
  68. + * coming here?)
  69. + * http://sourceforge.net/p/infozip/bugs/40/
  70. + */
  71. +
  72. + if (MB_CUR_MAX <= 1)
  73. + {
  74. + /* There's no point to converting multi-byte chars if there are
  75. + * no multi-byte chars.
  76. + */
  77. + wslen = (size_t)-1;
  78. }
  79. - while (*r) {
  80. - if (size > 0 && s >= slim && se == NULL) {
  81. - se = s;
  82. + else
  83. + {
  84. + /* Get Unicode wide character count (for storage allocation). */
  85. + wslen = mbstowcs( NULL, raw, 0);
  86. + }
  87. +
  88. + if (wslen != (size_t)-1)
  89. + {
  90. + /* Apparently valid Unicode. Allocate wide-char storage. */
  91. + wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
  92. + if (wstring == NULL) {
  93. + strcpy( (char *)space, raw);
  94. + return (char *)space;
  95. }
  96. -#ifdef QDOS
  97. - if (qlflag & 2) {
  98. - if (*r == '/' || *r == '.') {
  99. + wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
  100. + if (wostring == NULL) {
  101. + free(wstring);
  102. + strcpy( (char *)space, raw);
  103. + return (char *)space;
  104. + }
  105. +
  106. + /* Convert the multi-byte Unicode to wide chars. */
  107. + wslen = mbstowcs(wstring, raw, wslen + 1);
  108. +
  109. + /* Filter the wide-character string. */
  110. + fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
  111. +
  112. + /* Convert filtered wide chars back to multi-byte. */
  113. + woslen = wcstombs( NULL, wostring, 0);
  114. + if ((newraw = malloc(woslen + 1)) == NULL) {
  115. + free(wstring);
  116. + free(wostring);
  117. + strcpy( (char *)space, raw);
  118. + return (char *)space;
  119. + }
  120. + woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
  121. +
  122. + if (size > 0) {
  123. + slim = space + size - 4;
  124. + }
  125. + r = (ZCONST uch *)newraw;
  126. + while (*r) {
  127. + if (size > 0 && s >= slim && se == NULL) {
  128. + se = s;
  129. + }
  130. +# ifdef QDOS
  131. + if (qlflag & 2) {
  132. + if (*r == '/' || *r == '.') {
  133. + if (se != NULL && (s > (space + (size-3)))) {
  134. + have_overflow = TRUE;
  135. + break;
  136. + }
  137. + ++r;
  138. + *s++ = '_';
  139. + continue;
  140. + }
  141. + } else
  142. +# endif
  143. + {
  144. if (se != NULL && (s > (space + (size-3)))) {
  145. have_overflow = TRUE;
  146. break;
  147. }
  148. - ++r;
  149. - *s++ = '_';
  150. - continue;
  151. + *s++ = *r++;
  152. }
  153. - } else
  154. + }
  155. + if (have_overflow) {
  156. + strcpy((char *)se, "...");
  157. + } else {
  158. + *s = '\0';
  159. + }
  160. +
  161. + free(wstring);
  162. + free(wostring);
  163. + free(newraw);
  164. + }
  165. + else
  166. +# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
  167. + {
  168. + /* No Unicode support, or apparently invalid Unicode. */
  169. + r = (ZCONST uch *)raw;
  170. +
  171. + if (size > 0) {
  172. + slim = space + size
  173. +#ifdef _MBCS
  174. + - (MB_CUR_MAX - 1)
  175. +#endif
  176. + - 4;
  177. + }
  178. + while (*r) {
  179. + if (size > 0 && s >= slim && se == NULL) {
  180. + se = s;
  181. + }
  182. +#ifdef QDOS
  183. + if (qlflag & 2) {
  184. + if (*r == '/' || *r == '.') {
  185. + if (se != NULL && (s > (space + (size-3)))) {
  186. + have_overflow = TRUE;
  187. + break;
  188. + }
  189. + ++r;
  190. + *s++ = '_';
  191. + continue;
  192. + }
  193. + } else
  194. #endif
  195. #ifdef HAVE_WORKING_ISPRINT
  196. -# ifndef UZ_FNFILTER_REPLACECHAR
  197. - /* A convenient choice for the replacement of unprintable char codes is
  198. - * the "single char wildcard", as this character is quite unlikely to
  199. - * appear in filenames by itself. The following default definition
  200. - * sets the replacement char to a question mark as the most common
  201. - * "single char wildcard"; this setting should be overridden in the
  202. - * appropiate system-specific configuration header when needed.
  203. - */
  204. -# define UZ_FNFILTER_REPLACECHAR '?'
  205. -# endif
  206. - if (!isprint(*r)) {
  207. + if (!isprint(*r)) {
  208. + if (*r < 32) {
  209. + /* ASCII control codes are escaped as "^{letter}". */
  210. + if (se != NULL && (s > (space + (size-4)))) {
  211. + have_overflow = TRUE;
  212. + break;
  213. + }
  214. + *s++ = '^', *s++ = (uch)(64 + *r++);
  215. + } else {
  216. + /* Other unprintable codes are replaced by the
  217. + * placeholder character. */
  218. + if (se != NULL && (s > (space + (size-3)))) {
  219. + have_overflow = TRUE;
  220. + break;
  221. + }
  222. + *s++ = UZ_FNFILTER_REPLACECHAR;
  223. + INCSTR(r);
  224. + }
  225. +#else /* !HAVE_WORKING_ISPRINT */
  226. if (*r < 32) {
  227. /* ASCII control codes are escaped as "^{letter}". */
  228. if (se != NULL && (s > (space + (size-4)))) {
  229. @@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
  230. break;
  231. }
  232. *s++ = '^', *s++ = (uch)(64 + *r++);
  233. +#endif /* ?HAVE_WORKING_ISPRINT */
  234. } else {
  235. - /* Other unprintable codes are replaced by the
  236. - * placeholder character. */
  237. +#ifdef _MBCS
  238. + unsigned i = CLEN(r);
  239. + if (se != NULL && (s > (space + (size-i-2)))) {
  240. + have_overflow = TRUE;
  241. + break;
  242. + }
  243. + for (; i > 0; i--)
  244. + *s++ = *r++;
  245. +#else
  246. if (se != NULL && (s > (space + (size-3)))) {
  247. have_overflow = TRUE;
  248. break;
  249. }
  250. - *s++ = UZ_FNFILTER_REPLACECHAR;
  251. - INCSTR(r);
  252. - }
  253. -#else /* !HAVE_WORKING_ISPRINT */
  254. - if (*r < 32) {
  255. - /* ASCII control codes are escaped as "^{letter}". */
  256. - if (se != NULL && (s > (space + (size-4)))) {
  257. - have_overflow = TRUE;
  258. - break;
  259. - }
  260. - *s++ = '^', *s++ = (uch)(64 + *r++);
  261. -#endif /* ?HAVE_WORKING_ISPRINT */
  262. - } else {
  263. -#ifdef _MBCS
  264. - unsigned i = CLEN(r);
  265. - if (se != NULL && (s > (space + (size-i-2)))) {
  266. - have_overflow = TRUE;
  267. - break;
  268. - }
  269. - for (; i > 0; i--)
  270. *s++ = *r++;
  271. -#else
  272. - if (se != NULL && (s > (space + (size-3)))) {
  273. - have_overflow = TRUE;
  274. - break;
  275. - }
  276. - *s++ = *r++;
  277. #endif
  278. - }
  279. - }
  280. - if (have_overflow) {
  281. - strcpy((char *)se, "...");
  282. - } else {
  283. - *s = '\0';
  284. + }
  285. + }
  286. + if (have_overflow) {
  287. + strcpy((char *)se, "...");
  288. + } else {
  289. + *s = '\0';
  290. + }
  291. }
  292. #ifdef WINDLL
  293. @@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
  294. } /* end function fnfilter() */
  295. +#if defined( UNICODE_SUPPORT) && defined( _MBCS)
  296. +
  297. +/****************************/
  298. +/* Function fnfilter[w]() */ /* (Here instead of in list.c for SFX.) */
  299. +/****************************/
  300. +
  301. +/* fnfilterw() - Convert wide name to safely printable form. */
  302. +
  303. +/* fnfilterw() - Convert wide-character name to safely printable form. */
  304. +
  305. +wchar_t *fnfilterw( src, dst, siz)
  306. + ZCONST wchar_t *src; /* Pointer to source char (string). */
  307. + wchar_t *dst; /* Pointer to destination char (string). */
  308. + extent siz; /* Not used (!). */
  309. +{
  310. + wchar_t *dsx = dst;
  311. +
  312. + /* Filter the wide chars. */
  313. + while (*src)
  314. + {
  315. + if (iswprint( *src))
  316. + {
  317. + /* Printable code. Copy it. */
  318. + *dst++ = *src;
  319. + }
  320. + else
  321. + {
  322. + /* Unprintable code. Substitute something printable for it. */
  323. + if (*src < 32)
  324. + {
  325. + /* Replace ASCII control code with "^{letter}". */
  326. + *dst++ = (wchar_t)'^';
  327. + *dst++ = (wchar_t)(64 + *src);
  328. + }
  329. + else
  330. + {
  331. + /* Replace other unprintable code with the placeholder. */
  332. + *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
  333. + }
  334. + }
  335. + src++;
  336. + }
  337. + *dst = (wchar_t)0; /* NUL-terminate the destination string. */
  338. + return dsx;
  339. +} /* fnfilterw(). */
  340. +
  341. +#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
  342. #ifdef SET_DIR_ATTRIB
  343. diff --git a/unzpriv.h b/unzpriv.h
  344. index 22d3923..e48a652 100644
  345. --- a/unzpriv.h
  346. +++ b/unzpriv.h
  347. @@ -1212,6 +1212,7 @@
  348. # ifdef UNICODE_WCHAR
  349. # if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
  350. # include <wchar.h>
  351. +# include <wctype.h>
  352. # endif
  353. # endif
  354. # ifndef _MBCS /* no need to include <locale.h> twice, see below */
  355. @@ -2410,6 +2411,12 @@ int memflush OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
  356. char *fnfilter OF((ZCONST char *raw, uch *space,
  357. extent size));
  358. +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
  359. +wchar_t *fnfilterw OF((ZCONST wchar_t *src, wchar_t *dst,
  360. + extent siz));
  361. +#endif
  362. +
  363. +
  364. /*---------------------------------------------------------------------------
  365. Decompression functions:
  366. ---------------------------------------------------------------------------*/
  367. --
  368. 2.4.3