archive_write_set_format_pax.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_format_pax.c,v 1.49 2008/09/30 03:57:07 kientzle Exp $");
  27. #ifdef HAVE_ERRNO_H
  28. #include <errno.h>
  29. #endif
  30. #ifdef HAVE_STDLIB_H
  31. #include <stdlib.h>
  32. #endif
  33. #ifdef HAVE_STRING_H
  34. #include <string.h>
  35. #endif
  36. #include "archive.h"
  37. #include "archive_entry.h"
  38. #include "archive_private.h"
  39. #include "archive_write_private.h"
  40. struct pax {
  41. uint64_t entry_bytes_remaining;
  42. uint64_t entry_padding;
  43. struct archive_string pax_header;
  44. };
  45. static void add_pax_attr(struct archive_string *, const char *key,
  46. const char *value);
  47. static void add_pax_attr_int(struct archive_string *,
  48. const char *key, int64_t value);
  49. static void add_pax_attr_time(struct archive_string *,
  50. const char *key, int64_t sec,
  51. unsigned long nanos);
  52. static void add_pax_attr_w(struct archive_string *,
  53. const char *key, const wchar_t *wvalue);
  54. static ssize_t archive_write_pax_data(struct archive_write *,
  55. const void *, size_t);
  56. static int archive_write_pax_data_skip(struct archive_write *,
  57. off_t);
  58. static int archive_write_pax_finish(struct archive_write *);
  59. static int archive_write_pax_destroy(struct archive_write *);
  60. static int archive_write_pax_finish_entry(struct archive_write *);
  61. static int archive_write_pax_header(struct archive_write *,
  62. struct archive_entry *);
  63. static char *base64_encode(const char *src, size_t len);
  64. static char *build_pax_attribute_name(char *dest, const char *src);
  65. static char *build_ustar_entry_name(char *dest, const char *src,
  66. size_t src_length, const char *insert);
  67. static char *format_int(char *dest, int64_t);
  68. static int has_non_ASCII(const wchar_t *);
  69. static char *url_encode(const char *in);
  70. static int write_nulls(struct archive_write *, size_t);
  71. static void
  72. errmsg(const char *m)
  73. {
  74. size_t s = strlen(m);
  75. ssize_t written;
  76. while (s > 0) {
  77. written = write(2, m, strlen(m));
  78. if (written <= 0)
  79. return;
  80. m += written;
  81. s -= written;
  82. }
  83. }
  84. /*
  85. * Set output format to 'restricted pax' format.
  86. *
  87. * This is the same as normal 'pax', but tries to suppress
  88. * the pax header whenever possible. This is the default for
  89. * bsdtar, for instance.
  90. */
  91. int
  92. archive_write_set_format_pax_restricted(struct archive *_a)
  93. {
  94. struct archive_write *a = (struct archive_write *)_a;
  95. int r;
  96. r = archive_write_set_format_pax(&a->archive);
  97. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED;
  98. a->archive.archive_format_name = "restricted POSIX pax interchange";
  99. return (r);
  100. }
  101. /*
  102. * Set output format to 'pax' format.
  103. */
  104. int
  105. archive_write_set_format_pax(struct archive *_a)
  106. {
  107. struct archive_write *a = (struct archive_write *)_a;
  108. struct pax *pax;
  109. if (a->format_destroy != NULL)
  110. (a->format_destroy)(a);
  111. pax = (struct pax *)malloc(sizeof(*pax));
  112. if (pax == NULL) {
  113. archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data");
  114. return (ARCHIVE_FATAL);
  115. }
  116. memset(pax, 0, sizeof(*pax));
  117. a->format_data = pax;
  118. a->pad_uncompressed = 1;
  119. a->format_name = "pax";
  120. a->format_write_header = archive_write_pax_header;
  121. a->format_write_data = archive_write_pax_data;
  122. a->format_finish = archive_write_pax_finish;
  123. a->format_destroy = archive_write_pax_destroy;
  124. a->format_finish_entry = archive_write_pax_finish_entry;
  125. a->format_skip_data = archive_write_pax_data_skip;
  126. a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
  127. a->archive.archive_format_name = "POSIX pax interchange";
  128. return (ARCHIVE_OK);
  129. }
  130. /*
  131. * Note: This code assumes that 'nanos' has the same sign as 'sec',
  132. * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
  133. * and not -0.8 seconds. This is a pretty pedantic point, as we're
  134. * unlikely to encounter many real files created before Jan 1, 1970,
  135. * much less ones with timestamps recorded to sub-second resolution.
  136. */
  137. static void
  138. add_pax_attr_time(struct archive_string *as, const char *key,
  139. int64_t sec, unsigned long nanos)
  140. {
  141. int digit, i;
  142. char *t;
  143. /*
  144. * Note that each byte contributes fewer than 3 base-10
  145. * digits, so this will always be big enough.
  146. */
  147. char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)];
  148. tmp[sizeof(tmp) - 1] = 0;
  149. t = tmp + sizeof(tmp) - 1;
  150. /* Skip trailing zeros in the fractional part. */
  151. for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
  152. digit = nanos % 10;
  153. nanos /= 10;
  154. }
  155. /* Only format the fraction if it's non-zero. */
  156. if (i > 0) {
  157. while (i > 0) {
  158. *--t = "0123456789"[digit];
  159. digit = nanos % 10;
  160. nanos /= 10;
  161. i--;
  162. }
  163. *--t = '.';
  164. }
  165. t = format_int(t, sec);
  166. add_pax_attr(as, key, t);
  167. }
  168. static char *
  169. format_int(char *t, int64_t i)
  170. {
  171. uint64_t ui;
  172. if (i < 0)
  173. ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i);
  174. else
  175. ui = i;
  176. do {
  177. *--t = "0123456789"[ui % 10];
  178. } while (ui /= 10);
  179. if (i < 0)
  180. *--t = '-';
  181. return (t);
  182. }
  183. static void
  184. add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
  185. {
  186. char tmp[1 + 3 * sizeof(value)];
  187. tmp[sizeof(tmp) - 1] = 0;
  188. add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
  189. }
  190. static char *
  191. utf8_encode(const wchar_t *wval)
  192. {
  193. int utf8len;
  194. const wchar_t *wp;
  195. unsigned long wc;
  196. char *utf8_value, *p;
  197. utf8len = 0;
  198. for (wp = wval; *wp != L'\0'; ) {
  199. wc = *wp++;
  200. if (wc >= 0xd800 && wc <= 0xdbff
  201. && *wp >= 0xdc00 && *wp <= 0xdfff) {
  202. /* This is a surrogate pair. Combine into a
  203. * full Unicode value before encoding into
  204. * UTF-8. */
  205. wc = (wc - 0xd800) << 10; /* High 10 bits */
  206. wc += (*wp++ - 0xdc00); /* Low 10 bits */
  207. wc += 0x10000; /* Skip BMP */
  208. }
  209. if (wc <= 0x7f)
  210. utf8len++;
  211. else if (wc <= 0x7ff)
  212. utf8len += 2;
  213. else if (wc <= 0xffff)
  214. utf8len += 3;
  215. else if (wc <= 0x1fffff)
  216. utf8len += 4;
  217. else if (wc <= 0x3ffffff)
  218. utf8len += 5;
  219. else if (wc <= 0x7fffffff)
  220. utf8len += 6;
  221. /* Ignore larger values; UTF-8 can't encode them. */
  222. }
  223. utf8_value = (char *)malloc(utf8len + 1);
  224. if (utf8_value == NULL) {
  225. __archive_errx(1, "Not enough memory for attributes");
  226. return (NULL);
  227. }
  228. for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
  229. wc = *wp++;
  230. if (wc >= 0xd800 && wc <= 0xdbff
  231. && *wp >= 0xdc00 && *wp <= 0xdfff) {
  232. /* Combine surrogate pair. */
  233. wc = (wc - 0xd800) << 10;
  234. wc += *wp++ - 0xdc00 + 0x10000;
  235. }
  236. if (wc <= 0x7f) {
  237. *p++ = (char)wc;
  238. } else if (wc <= 0x7ff) {
  239. p[0] = 0xc0 | ((wc >> 6) & 0x1f);
  240. p[1] = 0x80 | (wc & 0x3f);
  241. p += 2;
  242. } else if (wc <= 0xffff) {
  243. p[0] = 0xe0 | ((wc >> 12) & 0x0f);
  244. p[1] = 0x80 | ((wc >> 6) & 0x3f);
  245. p[2] = 0x80 | (wc & 0x3f);
  246. p += 3;
  247. } else if (wc <= 0x1fffff) {
  248. p[0] = 0xf0 | ((wc >> 18) & 0x07);
  249. p[1] = 0x80 | ((wc >> 12) & 0x3f);
  250. p[2] = 0x80 | ((wc >> 6) & 0x3f);
  251. p[3] = 0x80 | (wc & 0x3f);
  252. p += 4;
  253. } else if (wc <= 0x3ffffff) {
  254. p[0] = 0xf8 | ((wc >> 24) & 0x03);
  255. p[1] = 0x80 | ((wc >> 18) & 0x3f);
  256. p[2] = 0x80 | ((wc >> 12) & 0x3f);
  257. p[3] = 0x80 | ((wc >> 6) & 0x3f);
  258. p[4] = 0x80 | (wc & 0x3f);
  259. p += 5;
  260. } else if (wc <= 0x7fffffff) {
  261. p[0] = 0xfc | ((wc >> 30) & 0x01);
  262. p[1] = 0x80 | ((wc >> 24) & 0x3f);
  263. p[1] = 0x80 | ((wc >> 18) & 0x3f);
  264. p[2] = 0x80 | ((wc >> 12) & 0x3f);
  265. p[3] = 0x80 | ((wc >> 6) & 0x3f);
  266. p[4] = 0x80 | (wc & 0x3f);
  267. p += 6;
  268. }
  269. /* Ignore larger values; UTF-8 can't encode them. */
  270. }
  271. *p = '\0';
  272. return (utf8_value);
  273. }
  274. static void
  275. add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
  276. {
  277. char *utf8_value = utf8_encode(wval);
  278. if (utf8_value == NULL)
  279. return;
  280. add_pax_attr(as, key, utf8_value);
  281. free(utf8_value);
  282. }
  283. /*
  284. * Add a key/value attribute to the pax header. This function handles
  285. * the length field and various other syntactic requirements.
  286. */
  287. static void
  288. add_pax_attr(struct archive_string *as, const char *key, const char *value)
  289. {
  290. int digits, i, len, next_ten;
  291. char tmp[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */
  292. /*-
  293. * PAX attributes have the following layout:
  294. * <len> <space> <key> <=> <value> <nl>
  295. */
  296. len = 1 + strlen(key) + 1 + strlen(value) + 1;
  297. /*
  298. * The <len> field includes the length of the <len> field, so
  299. * computing the correct length is tricky. I start by
  300. * counting the number of base-10 digits in 'len' and
  301. * computing the next higher power of 10.
  302. */
  303. next_ten = 1;
  304. digits = 0;
  305. i = len;
  306. while (i > 0) {
  307. i = i / 10;
  308. digits++;
  309. next_ten = next_ten * 10;
  310. }
  311. /*
  312. * For example, if string without the length field is 99
  313. * chars, then adding the 2 digit length "99" will force the
  314. * total length past 100, requiring an extra digit. The next
  315. * statement adjusts for this effect.
  316. */
  317. if (len + digits >= next_ten)
  318. digits++;
  319. /* Now, we have the right length so we can build the line. */
  320. tmp[sizeof(tmp) - 1] = 0; /* Null-terminate the work area. */
  321. archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits));
  322. archive_strappend_char(as, ' ');
  323. archive_strcat(as, key);
  324. archive_strappend_char(as, '=');
  325. archive_strcat(as, value);
  326. archive_strappend_char(as, '\n');
  327. }
  328. static void
  329. archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry)
  330. {
  331. struct archive_string s;
  332. int i = archive_entry_xattr_reset(entry);
  333. while (i--) {
  334. const char *name;
  335. const void *value;
  336. char *encoded_value;
  337. char *url_encoded_name = NULL, *encoded_name = NULL;
  338. wchar_t *wcs_name = NULL;
  339. size_t size;
  340. archive_entry_xattr_next(entry, &name, &value, &size);
  341. /* Name is URL-encoded, then converted to wchar_t,
  342. * then UTF-8 encoded. */
  343. url_encoded_name = url_encode(name);
  344. if (url_encoded_name != NULL) {
  345. /* Convert narrow-character to wide-character. */
  346. int wcs_length = strlen(url_encoded_name);
  347. wcs_name = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
  348. if (wcs_name == NULL)
  349. __archive_errx(1, "No memory for xattr conversion");
  350. mbstowcs(wcs_name, url_encoded_name, wcs_length);
  351. wcs_name[wcs_length] = 0;
  352. free(url_encoded_name); /* Done with this. */
  353. }
  354. if (wcs_name != NULL) {
  355. encoded_name = utf8_encode(wcs_name);
  356. free(wcs_name); /* Done with wchar_t name. */
  357. }
  358. encoded_value = base64_encode((const char *)value, size);
  359. if (encoded_name != NULL && encoded_value != NULL) {
  360. archive_string_init(&s);
  361. archive_strcpy(&s, "LIBARCHIVE.xattr.");
  362. archive_strcat(&s, encoded_name);
  363. add_pax_attr(&(pax->pax_header), s.s, encoded_value);
  364. archive_string_free(&s);
  365. }
  366. free(encoded_name);
  367. free(encoded_value);
  368. }
  369. }
  370. /*
  371. * TODO: Consider adding 'comment' and 'charset' fields to
  372. * archive_entry so that clients can specify them. Also, consider
  373. * adding generic key/value tags so clients can add arbitrary
  374. * key/value data.
  375. */
  376. static int
  377. archive_write_pax_header(struct archive_write *a,
  378. struct archive_entry *entry_original)
  379. {
  380. struct archive_entry *entry_main;
  381. const char *p;
  382. char *t;
  383. const wchar_t *wp;
  384. const char *suffix;
  385. int need_extension, r, ret;
  386. struct pax *pax;
  387. const char *hdrcharset = NULL;
  388. const char *hardlink;
  389. const char *path = NULL, *linkpath = NULL;
  390. const char *uname = NULL, *gname = NULL;
  391. const wchar_t *path_w = NULL, *linkpath_w = NULL;
  392. const wchar_t *uname_w = NULL, *gname_w = NULL;
  393. char paxbuff[512];
  394. char ustarbuff[512];
  395. char ustar_entry_name[256];
  396. char pax_entry_name[256];
  397. ret = ARCHIVE_OK;
  398. need_extension = 0;
  399. pax = (struct pax *)a->format_data;
  400. hardlink = archive_entry_hardlink(entry_original);
  401. /* Make sure this is a type of entry that we can handle here */
  402. if (hardlink == NULL) {
  403. switch (archive_entry_filetype(entry_original)) {
  404. case AE_IFBLK:
  405. case AE_IFCHR:
  406. case AE_IFIFO:
  407. case AE_IFLNK:
  408. case AE_IFREG:
  409. break;
  410. case AE_IFDIR:
  411. /*
  412. * Ensure a trailing '/'. Modify the original
  413. * entry so the client sees the change.
  414. */
  415. p = archive_entry_pathname(entry_original);
  416. if (p[strlen(p) - 1] != '/') {
  417. t = (char *)malloc(strlen(p) + 2);
  418. if (t == NULL) {
  419. archive_set_error(&a->archive, ENOMEM,
  420. "Can't allocate pax data");
  421. return(ARCHIVE_FATAL);
  422. }
  423. strcpy(t, p);
  424. strcat(t, "/");
  425. archive_entry_copy_pathname(entry_original, t);
  426. free(t);
  427. }
  428. break;
  429. default:
  430. archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
  431. "tar format cannot archive this (type=0%lo)",
  432. (unsigned long)archive_entry_filetype(entry_original));
  433. return (ARCHIVE_WARN);
  434. }
  435. }
  436. /* Copy entry so we can modify it as needed. */
  437. entry_main = archive_entry_clone(entry_original);
  438. archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
  439. /*
  440. * First, check the name fields and see if any of them
  441. * require binary coding. If any of them does, then all of
  442. * them do.
  443. */
  444. hdrcharset = NULL;
  445. path = archive_entry_pathname(entry_main);
  446. path_w = archive_entry_pathname_w(entry_main);
  447. if (path != NULL && path_w == NULL) {
  448. archive_set_error(&a->archive, EILSEQ,
  449. "Can't translate pathname '%s' to UTF-8", path);
  450. ret = ARCHIVE_WARN;
  451. hdrcharset = "BINARY";
  452. }
  453. uname = archive_entry_uname(entry_main);
  454. uname_w = archive_entry_uname_w(entry_main);
  455. if (uname != NULL && uname_w == NULL) {
  456. archive_set_error(&a->archive, EILSEQ,
  457. "Can't translate uname '%s' to UTF-8", uname);
  458. ret = ARCHIVE_WARN;
  459. hdrcharset = "BINARY";
  460. }
  461. gname = archive_entry_gname(entry_main);
  462. gname_w = archive_entry_gname_w(entry_main);
  463. if (gname != NULL && gname_w == NULL) {
  464. archive_set_error(&a->archive, EILSEQ,
  465. "Can't translate gname '%s' to UTF-8", gname);
  466. ret = ARCHIVE_WARN;
  467. hdrcharset = "BINARY";
  468. }
  469. linkpath = hardlink;
  470. if (linkpath != NULL) {
  471. linkpath_w = archive_entry_hardlink_w(entry_main);
  472. } else {
  473. linkpath = archive_entry_symlink(entry_main);
  474. if (linkpath != NULL)
  475. linkpath_w = archive_entry_symlink_w(entry_main);
  476. }
  477. if (linkpath != NULL && linkpath_w == NULL) {
  478. archive_set_error(&a->archive, EILSEQ,
  479. "Can't translate linkpath '%s' to UTF-8", linkpath);
  480. ret = ARCHIVE_WARN;
  481. hdrcharset = "BINARY";
  482. }
  483. /* Store the header encoding first, to be nice to readers. */
  484. if (hdrcharset != NULL)
  485. add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset);
  486. /*
  487. * If name is too long, or has non-ASCII characters, add
  488. * 'path' to pax extended attrs. (Note that an unconvertible
  489. * name must have non-ASCII characters.)
  490. */
  491. if (path == NULL) {
  492. /* We don't have a narrow version, so we have to store
  493. * the wide version. */
  494. add_pax_attr_w(&(pax->pax_header), "path", path_w);
  495. archive_entry_set_pathname(entry_main, "@WidePath");
  496. need_extension = 1;
  497. } else if (has_non_ASCII(path_w)) {
  498. /* We have non-ASCII characters. */
  499. if (path_w == NULL || hdrcharset != NULL) {
  500. /* Can't do UTF-8, so store it raw. */
  501. add_pax_attr(&(pax->pax_header), "path", path);
  502. } else {
  503. /* Store UTF-8 */
  504. add_pax_attr_w(&(pax->pax_header),
  505. "path", path_w);
  506. }
  507. archive_entry_set_pathname(entry_main,
  508. build_ustar_entry_name(ustar_entry_name,
  509. path, strlen(path), NULL));
  510. need_extension = 1;
  511. } else {
  512. /* We have an all-ASCII path; we'd like to just store
  513. * it in the ustar header if it will fit. Yes, this
  514. * duplicates some of the logic in
  515. * archive_write_set_format_ustar.c
  516. */
  517. if (strlen(path) <= 100) {
  518. /* Fits in the old 100-char tar name field. */
  519. } else {
  520. /* Find largest suffix that will fit. */
  521. /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
  522. suffix = strchr(path + strlen(path) - 100 - 1, '/');
  523. /* Don't attempt an empty prefix. */
  524. if (suffix == path)
  525. suffix = strchr(suffix + 1, '/');
  526. /* We can put it in the ustar header if it's
  527. * all ASCII and it's either <= 100 characters
  528. * or can be split at a '/' into a prefix <=
  529. * 155 chars and a suffix <= 100 chars. (Note
  530. * the strchr() above will return NULL exactly
  531. * when the path can't be split.)
  532. */
  533. if (suffix == NULL /* Suffix > 100 chars. */
  534. || suffix[1] == '\0' /* empty suffix */
  535. || suffix - path > 155) /* Prefix > 155 chars */
  536. {
  537. if (path_w == NULL || hdrcharset != NULL) {
  538. /* Can't do UTF-8, so store it raw. */
  539. add_pax_attr(&(pax->pax_header),
  540. "path", path);
  541. } else {
  542. /* Store UTF-8 */
  543. add_pax_attr_w(&(pax->pax_header),
  544. "path", path_w);
  545. }
  546. archive_entry_set_pathname(entry_main,
  547. build_ustar_entry_name(ustar_entry_name,
  548. path, strlen(path), NULL));
  549. need_extension = 1;
  550. }
  551. }
  552. }
  553. if (linkpath != NULL) {
  554. /* If link name is too long or has non-ASCII characters, add
  555. * 'linkpath' to pax extended attrs. */
  556. if (strlen(linkpath) > 100 || linkpath_w == NULL
  557. || linkpath_w == NULL || has_non_ASCII(linkpath_w)) {
  558. if (linkpath_w == NULL || hdrcharset != NULL)
  559. /* If the linkpath is not convertible
  560. * to wide, or we're encoding in
  561. * binary anyway, store it raw. */
  562. add_pax_attr(&(pax->pax_header),
  563. "linkpath", linkpath);
  564. else
  565. /* If the link is long or has a
  566. * non-ASCII character, store it as a
  567. * pax extended attribute. */
  568. add_pax_attr_w(&(pax->pax_header),
  569. "linkpath", linkpath_w);
  570. if (strlen(linkpath) > 100) {
  571. if (hardlink != NULL)
  572. archive_entry_set_hardlink(entry_main,
  573. "././@LongHardLink");
  574. else
  575. archive_entry_set_symlink(entry_main,
  576. "././@LongSymLink");
  577. }
  578. need_extension = 1;
  579. }
  580. }
  581. /* If file size is too large, add 'size' to pax extended attrs. */
  582. if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) {
  583. add_pax_attr_int(&(pax->pax_header), "size",
  584. archive_entry_size(entry_main));
  585. need_extension = 1;
  586. }
  587. /* If numeric GID is too large, add 'gid' to pax extended attrs. */
  588. if (archive_entry_gid(entry_main) >= (1 << 18)) {
  589. add_pax_attr_int(&(pax->pax_header), "gid",
  590. archive_entry_gid(entry_main));
  591. need_extension = 1;
  592. }
  593. /* If group name is too large or has non-ASCII characters, add
  594. * 'gname' to pax extended attrs. */
  595. if (gname != NULL) {
  596. if (strlen(gname) > 31
  597. || gname_w == NULL
  598. || has_non_ASCII(gname_w))
  599. {
  600. if (gname_w == NULL || hdrcharset != NULL) {
  601. add_pax_attr(&(pax->pax_header),
  602. "gname", gname);
  603. } else {
  604. add_pax_attr_w(&(pax->pax_header),
  605. "gname", gname_w);
  606. }
  607. need_extension = 1;
  608. }
  609. }
  610. /* If numeric UID is too large, add 'uid' to pax extended attrs. */
  611. if (archive_entry_uid(entry_main) >= (1 << 18)) {
  612. add_pax_attr_int(&(pax->pax_header), "uid",
  613. archive_entry_uid(entry_main));
  614. need_extension = 1;
  615. }
  616. /* Add 'uname' to pax extended attrs if necessary. */
  617. if (uname != NULL) {
  618. if (strlen(uname) > 31
  619. || uname_w == NULL
  620. || has_non_ASCII(uname_w))
  621. {
  622. if (uname_w == NULL || hdrcharset != NULL) {
  623. add_pax_attr(&(pax->pax_header),
  624. "uname", uname);
  625. } else {
  626. add_pax_attr_w(&(pax->pax_header),
  627. "uname", uname_w);
  628. }
  629. need_extension = 1;
  630. }
  631. }
  632. /*
  633. * POSIX/SUSv3 doesn't provide a standard key for large device
  634. * numbers. I use the same keys here that Joerg Schilling
  635. * used for 'star.' (Which, somewhat confusingly, are called
  636. * "devXXX" even though they code "rdev" values.) No doubt,
  637. * other implementations use other keys. Note that there's no
  638. * reason we can't write the same information into a number of
  639. * different keys.
  640. *
  641. * Of course, this is only needed for block or char device entries.
  642. */
  643. if (archive_entry_filetype(entry_main) == AE_IFBLK
  644. || archive_entry_filetype(entry_main) == AE_IFCHR) {
  645. /*
  646. * If rdevmajor is too large, add 'SCHILY.devmajor' to
  647. * extended attributes.
  648. */
  649. dev_t rdevmajor, rdevminor;
  650. rdevmajor = archive_entry_rdevmajor(entry_main);
  651. rdevminor = archive_entry_rdevminor(entry_main);
  652. if (rdevmajor >= (1 << 18)) {
  653. add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor",
  654. rdevmajor);
  655. /*
  656. * Non-strict formatting below means we don't
  657. * have to truncate here. Not truncating improves
  658. * the chance that some more modern tar archivers
  659. * (such as GNU tar 1.13) can restore the full
  660. * value even if they don't understand the pax
  661. * extended attributes. See my rant below about
  662. * file size fields for additional details.
  663. */
  664. /* archive_entry_set_rdevmajor(entry_main,
  665. rdevmajor & ((1 << 18) - 1)); */
  666. need_extension = 1;
  667. }
  668. /*
  669. * If devminor is too large, add 'SCHILY.devminor' to
  670. * extended attributes.
  671. */
  672. if (rdevminor >= (1 << 18)) {
  673. add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor",
  674. rdevminor);
  675. /* Truncation is not necessary here, either. */
  676. /* archive_entry_set_rdevminor(entry_main,
  677. rdevminor & ((1 << 18) - 1)); */
  678. need_extension = 1;
  679. }
  680. }
  681. /*
  682. * Technically, the mtime field in the ustar header can
  683. * support 33 bits, but many platforms use signed 32-bit time
  684. * values. The cutoff of 0x7fffffff here is a compromise.
  685. * Yes, this check is duplicated just below; this helps to
  686. * avoid writing an mtime attribute just to handle a
  687. * high-resolution timestamp in "restricted pax" mode.
  688. */
  689. if (!need_extension &&
  690. ((archive_entry_mtime(entry_main) < 0)
  691. || (archive_entry_mtime(entry_main) >= 0x7fffffff)))
  692. need_extension = 1;
  693. /* I use a star-compatible file flag attribute. */
  694. p = archive_entry_fflags_text(entry_main);
  695. if (!need_extension && p != NULL && *p != '\0')
  696. need_extension = 1;
  697. /* If there are non-trivial ACL entries, we need an extension. */
  698. if (!need_extension && archive_entry_acl_count(entry_original,
  699. ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0)
  700. need_extension = 1;
  701. /* If there are non-trivial ACL entries, we need an extension. */
  702. if (!need_extension && archive_entry_acl_count(entry_original,
  703. ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0)
  704. need_extension = 1;
  705. /* If there are extended attributes, we need an extension */
  706. if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
  707. need_extension = 1;
  708. /*
  709. * The following items are handled differently in "pax
  710. * restricted" format. In particular, in "pax restricted"
  711. * format they won't be added unless need_extension is
  712. * already set (we're already generating an extended header, so
  713. * may as well include these).
  714. */
  715. if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED ||
  716. need_extension) {
  717. if (archive_entry_mtime(entry_main) < 0 ||
  718. archive_entry_mtime(entry_main) >= 0x7fffffff ||
  719. archive_entry_mtime_nsec(entry_main) != 0)
  720. add_pax_attr_time(&(pax->pax_header), "mtime",
  721. archive_entry_mtime(entry_main),
  722. archive_entry_mtime_nsec(entry_main));
  723. if (archive_entry_ctime(entry_main) != 0 ||
  724. archive_entry_ctime_nsec(entry_main) != 0)
  725. add_pax_attr_time(&(pax->pax_header), "ctime",
  726. archive_entry_ctime(entry_main),
  727. archive_entry_ctime_nsec(entry_main));
  728. if (archive_entry_atime(entry_main) != 0 ||
  729. archive_entry_atime_nsec(entry_main) != 0)
  730. add_pax_attr_time(&(pax->pax_header), "atime",
  731. archive_entry_atime(entry_main),
  732. archive_entry_atime_nsec(entry_main));
  733. /* Store birth/creationtime only if it's earlier than mtime */
  734. if (archive_entry_birthtime_is_set(entry_main) &&
  735. archive_entry_birthtime(entry_main)
  736. < archive_entry_mtime(entry_main))
  737. add_pax_attr_time(&(pax->pax_header),
  738. "LIBARCHIVE.creationtime",
  739. archive_entry_birthtime(entry_main),
  740. archive_entry_birthtime_nsec(entry_main));
  741. /* I use a star-compatible file flag attribute. */
  742. p = archive_entry_fflags_text(entry_main);
  743. if (p != NULL && *p != '\0')
  744. add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p);
  745. /* I use star-compatible ACL attributes. */
  746. wp = archive_entry_acl_text_w(entry_original,
  747. ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
  748. ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
  749. if (wp != NULL && *wp != L'\0')
  750. add_pax_attr_w(&(pax->pax_header),
  751. "SCHILY.acl.access", wp);
  752. wp = archive_entry_acl_text_w(entry_original,
  753. ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
  754. ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
  755. if (wp != NULL && *wp != L'\0')
  756. add_pax_attr_w(&(pax->pax_header),
  757. "SCHILY.acl.default", wp);
  758. /* Include star-compatible metadata info. */
  759. /* Note: "SCHILY.dev{major,minor}" are NOT the
  760. * major/minor portions of "SCHILY.dev". */
  761. add_pax_attr_int(&(pax->pax_header), "SCHILY.dev",
  762. archive_entry_dev(entry_main));
  763. add_pax_attr_int(&(pax->pax_header), "SCHILY.ino",
  764. archive_entry_ino(entry_main));
  765. add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink",
  766. archive_entry_nlink(entry_main));
  767. /* Store extended attributes */
  768. archive_write_pax_header_xattrs(pax, entry_original);
  769. }
  770. /* Only regular files have data. */
  771. if (archive_entry_filetype(entry_main) != AE_IFREG)
  772. archive_entry_set_size(entry_main, 0);
  773. /*
  774. * Pax-restricted does not store data for hardlinks, in order
  775. * to improve compatibility with ustar.
  776. */
  777. if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE &&
  778. hardlink != NULL)
  779. archive_entry_set_size(entry_main, 0);
  780. /*
  781. * XXX Full pax interchange format does permit a hardlink
  782. * entry to have data associated with it. I'm not supporting
  783. * that here because the client expects me to tell them whether
  784. * or not this format expects data for hardlinks. If I
  785. * don't check here, then every pax archive will end up with
  786. * duplicated data for hardlinks. Someday, there may be
  787. * need to select this behavior, in which case the following
  788. * will need to be revisited. XXX
  789. */
  790. if (hardlink != NULL)
  791. archive_entry_set_size(entry_main, 0);
  792. /* Format 'ustar' header for main entry.
  793. *
  794. * The trouble with file size: If the reader can't understand
  795. * the file size, they may not be able to locate the next
  796. * entry and the rest of the archive is toast. Pax-compliant
  797. * readers are supposed to ignore the file size in the main
  798. * header, so the question becomes how to maximize portability
  799. * for readers that don't support pax attribute extensions.
  800. * For maximum compatibility, I permit numeric extensions in
  801. * the main header so that the file size stored will always be
  802. * correct, even if it's in a format that only some
  803. * implementations understand. The technique used here is:
  804. *
  805. * a) If possible, follow the standard exactly. This handles
  806. * files up to 8 gigabytes minus 1.
  807. *
  808. * b) If that fails, try octal but omit the field terminator.
  809. * That handles files up to 64 gigabytes minus 1.
  810. *
  811. * c) Otherwise, use base-256 extensions. That handles files
  812. * up to 2^63 in this implementation, with the potential to
  813. * go up to 2^94. That should hold us for a while. ;-)
  814. *
  815. * The non-strict formatter uses similar logic for other
  816. * numeric fields, though they're less critical.
  817. */
  818. __archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0);
  819. /* If we built any extended attributes, write that entry first. */
  820. if (archive_strlen(&(pax->pax_header)) > 0) {
  821. struct archive_entry *pax_attr_entry;
  822. time_t s;
  823. uid_t uid;
  824. gid_t gid;
  825. mode_t mode;
  826. long ns;
  827. pax_attr_entry = archive_entry_new();
  828. p = archive_entry_pathname(entry_main);
  829. archive_entry_set_pathname(pax_attr_entry,
  830. build_pax_attribute_name(pax_entry_name, p));
  831. archive_entry_set_size(pax_attr_entry,
  832. archive_strlen(&(pax->pax_header)));
  833. /* Copy uid/gid (but clip to ustar limits). */
  834. uid = archive_entry_uid(entry_main);
  835. if (uid >= 1 << 18)
  836. uid = (1 << 18) - 1;
  837. archive_entry_set_uid(pax_attr_entry, uid);
  838. gid = archive_entry_gid(entry_main);
  839. if (gid >= 1 << 18)
  840. gid = (1 << 18) - 1;
  841. archive_entry_set_gid(pax_attr_entry, gid);
  842. /* Copy mode over (but not setuid/setgid bits) */
  843. mode = archive_entry_mode(entry_main);
  844. #ifdef S_ISUID
  845. mode &= ~S_ISUID;
  846. #endif
  847. #ifdef S_ISGID
  848. mode &= ~S_ISGID;
  849. #endif
  850. #ifdef S_ISVTX
  851. mode &= ~S_ISVTX;
  852. #endif
  853. archive_entry_set_mode(pax_attr_entry, mode);
  854. /* Copy uname/gname. */
  855. archive_entry_set_uname(pax_attr_entry,
  856. archive_entry_uname(entry_main));
  857. archive_entry_set_gname(pax_attr_entry,
  858. archive_entry_gname(entry_main));
  859. /* Copy mtime, but clip to ustar limits. */
  860. s = archive_entry_mtime(entry_main);
  861. ns = archive_entry_mtime_nsec(entry_main);
  862. if (s < 0) { s = 0; ns = 0; }
  863. if (s > 0x7fffffff) { s = 0x7fffffff; ns = 0; }
  864. archive_entry_set_mtime(pax_attr_entry, s, ns);
  865. /* Ditto for atime. */
  866. s = archive_entry_atime(entry_main);
  867. ns = archive_entry_atime_nsec(entry_main);
  868. if (s < 0) { s = 0; ns = 0; }
  869. if (s > 0x7fffffff) { s = 0x7fffffff; ns = 0; }
  870. archive_entry_set_atime(pax_attr_entry, s, ns);
  871. /* Standard ustar doesn't support ctime. */
  872. archive_entry_set_ctime(pax_attr_entry, 0, 0);
  873. r = __archive_write_format_header_ustar(a, paxbuff,
  874. pax_attr_entry, 'x', 1);
  875. archive_entry_free(pax_attr_entry);
  876. /* Note that the 'x' header shouldn't ever fail to format */
  877. if (r != 0) {
  878. const char *msg = "archive_write_pax_header: "
  879. "'x' header failed?! This can't happen.\n";
  880. errmsg(msg);
  881. exit(1);
  882. }
  883. r = (a->compressor.write)(a, paxbuff, 512);
  884. if (r != ARCHIVE_OK) {
  885. pax->entry_bytes_remaining = 0;
  886. pax->entry_padding = 0;
  887. return (ARCHIVE_FATAL);
  888. }
  889. pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header));
  890. pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining);
  891. r = (a->compressor.write)(a, pax->pax_header.s,
  892. archive_strlen(&(pax->pax_header)));
  893. if (r != ARCHIVE_OK) {
  894. /* If a write fails, we're pretty much toast. */
  895. return (ARCHIVE_FATAL);
  896. }
  897. /* Pad out the end of the entry. */
  898. r = write_nulls(a, pax->entry_padding);
  899. if (r != ARCHIVE_OK) {
  900. /* If a write fails, we're pretty much toast. */
  901. return (ARCHIVE_FATAL);
  902. }
  903. pax->entry_bytes_remaining = pax->entry_padding = 0;
  904. }
  905. /* Write the header for main entry. */
  906. r = (a->compressor.write)(a, ustarbuff, 512);
  907. if (r != ARCHIVE_OK)
  908. return (r);
  909. /*
  910. * Inform the client of the on-disk size we're using, so
  911. * they can avoid unnecessarily writing a body for something
  912. * that we're just going to ignore.
  913. */
  914. archive_entry_set_size(entry_original, archive_entry_size(entry_main));
  915. pax->entry_bytes_remaining = archive_entry_size(entry_main);
  916. pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining);
  917. archive_entry_free(entry_main);
  918. return (ret);
  919. }
  920. /*
  921. * We need a valid name for the regular 'ustar' entry. This routine
  922. * tries to hack something more-or-less reasonable.
  923. *
  924. * The approach here tries to preserve leading dir names. We do so by
  925. * working with four sections:
  926. * 1) "prefix" directory names,
  927. * 2) "suffix" directory names,
  928. * 3) inserted dir name (optional),
  929. * 4) filename.
  930. *
  931. * These sections must satisfy the following requirements:
  932. * * Parts 1 & 2 together form an initial portion of the dir name.
  933. * * Part 3 is specified by the caller. (It should not contain a leading
  934. * or trailing '/'.)
  935. * * Part 4 forms an initial portion of the base filename.
  936. * * The filename must be <= 99 chars to fit the ustar 'name' field.
  937. * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
  938. * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
  939. * * If the original name ends in a '/', the new name must also end in a '/'
  940. * * Trailing '/.' sequences may be stripped.
  941. *
  942. * Note: Recall that the ustar format does not store the '/' separating
  943. * parts 1 & 2, but does store the '/' separating parts 2 & 3.
  944. */
  945. static char *
  946. build_ustar_entry_name(char *dest, const char *src, size_t src_length,
  947. const char *insert)
  948. {
  949. const char *prefix, *prefix_end;
  950. const char *suffix, *suffix_end;
  951. const char *filename, *filename_end;
  952. char *p;
  953. int need_slash = 0; /* Was there a trailing slash? */
  954. size_t suffix_length = 99;
  955. int insert_length;
  956. /* Length of additional dir element to be added. */
  957. if (insert == NULL)
  958. insert_length = 0;
  959. else
  960. /* +2 here allows for '/' before and after the insert. */
  961. insert_length = strlen(insert) + 2;
  962. /* Step 0: Quick bailout in a common case. */
  963. if (src_length < 100 && insert == NULL) {
  964. strncpy(dest, src, src_length);
  965. dest[src_length] = '\0';
  966. return (dest);
  967. }
  968. /* Step 1: Locate filename and enforce the length restriction. */
  969. filename_end = src + src_length;
  970. /* Remove trailing '/' chars and '/.' pairs. */
  971. for (;;) {
  972. if (filename_end > src && filename_end[-1] == '/') {
  973. filename_end --;
  974. need_slash = 1; /* Remember to restore trailing '/'. */
  975. continue;
  976. }
  977. if (filename_end > src + 1 && filename_end[-1] == '.'
  978. && filename_end[-2] == '/') {
  979. filename_end -= 2;
  980. need_slash = 1; /* "foo/." will become "foo/" */
  981. continue;
  982. }
  983. break;
  984. }
  985. if (need_slash)
  986. suffix_length--;
  987. /* Find start of filename. */
  988. filename = filename_end - 1;
  989. while ((filename > src) && (*filename != '/'))
  990. filename --;
  991. if ((*filename == '/') && (filename < filename_end - 1))
  992. filename ++;
  993. /* Adjust filename_end so that filename + insert fits in 99 chars. */
  994. suffix_length -= insert_length;
  995. if (filename_end > filename + suffix_length)
  996. filename_end = filename + suffix_length;
  997. /* Calculate max size for "suffix" section (#3 above). */
  998. suffix_length -= filename_end - filename;
  999. /* Step 2: Locate the "prefix" section of the dirname, including
  1000. * trailing '/'. */
  1001. prefix = src;
  1002. prefix_end = prefix + 155;
  1003. if (prefix_end > filename)
  1004. prefix_end = filename;
  1005. while (prefix_end > prefix && *prefix_end != '/')
  1006. prefix_end--;
  1007. if ((prefix_end < filename) && (*prefix_end == '/'))
  1008. prefix_end++;
  1009. /* Step 3: Locate the "suffix" section of the dirname,
  1010. * including trailing '/'. */
  1011. suffix = prefix_end;
  1012. suffix_end = suffix + suffix_length; /* Enforce limit. */
  1013. if (suffix_end > filename)
  1014. suffix_end = filename;
  1015. if (suffix_end < suffix)
  1016. suffix_end = suffix;
  1017. while (suffix_end > suffix && *suffix_end != '/')
  1018. suffix_end--;
  1019. if ((suffix_end < filename) && (*suffix_end == '/'))
  1020. suffix_end++;
  1021. /* Step 4: Build the new name. */
  1022. /* The OpenBSD strlcpy function is safer, but less portable. */
  1023. /* Rather than maintain two versions, just use the strncpy version. */
  1024. p = dest;
  1025. if (prefix_end > prefix) {
  1026. strncpy(p, prefix, prefix_end - prefix);
  1027. p += prefix_end - prefix;
  1028. }
  1029. if (suffix_end > suffix) {
  1030. strncpy(p, suffix, suffix_end - suffix);
  1031. p += suffix_end - suffix;
  1032. }
  1033. if (insert != NULL) {
  1034. /* Note: assume insert does not have leading or trailing '/' */
  1035. strcpy(p, insert);
  1036. p += strlen(insert);
  1037. *p++ = '/';
  1038. }
  1039. strncpy(p, filename, filename_end - filename);
  1040. p += filename_end - filename;
  1041. if (need_slash)
  1042. *p++ = '/';
  1043. *p++ = '\0';
  1044. return (dest);
  1045. }
  1046. /*
  1047. * The ustar header for the pax extended attributes must have a
  1048. * reasonable name: SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename'
  1049. * where 'pid' is the PID of the archiving process. Unfortunately,
  1050. * that makes testing a pain since the output varies for each run,
  1051. * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename'
  1052. * for now. (Someday, I'll make this settable. Then I can use the
  1053. * SUS recommendation as default and test harnesses can override it
  1054. * to get predictable results.)
  1055. *
  1056. * Joerg Schilling has argued that this is unnecessary because, in
  1057. * practice, if the pax extended attributes get extracted as regular
  1058. * files, no one is going to bother reading those attributes to
  1059. * manually restore them. Based on this, 'star' uses
  1060. * /tmp/PaxHeader/'basename' as the ustar header name. This is a
  1061. * tempting argument, in part because it's simpler than the SUSv3
  1062. * recommendation, but I'm not entirely convinced. I'm also
  1063. * uncomfortable with the fact that "/tmp" is a Unix-ism.
  1064. *
  1065. * The following routine leverages build_ustar_entry_name() above and
  1066. * so is simpler than you might think. It just needs to provide the
  1067. * additional path element and handle a few pathological cases).
  1068. */
  1069. static char *
  1070. build_pax_attribute_name(char *dest, const char *src)
  1071. {
  1072. char buff[64];
  1073. const char *p;
  1074. /* Handle the null filename case. */
  1075. if (src == NULL || *src == '\0') {
  1076. strcpy(dest, "PaxHeader/blank");
  1077. return (dest);
  1078. }
  1079. /* Prune final '/' and other unwanted final elements. */
  1080. p = src + strlen(src);
  1081. for (;;) {
  1082. /* Ends in "/", remove the '/' */
  1083. if (p > src && p[-1] == '/') {
  1084. --p;
  1085. continue;
  1086. }
  1087. /* Ends in "/.", remove the '.' */
  1088. if (p > src + 1 && p[-1] == '.'
  1089. && p[-2] == '/') {
  1090. --p;
  1091. continue;
  1092. }
  1093. break;
  1094. }
  1095. /* Pathological case: After above, there was nothing left.
  1096. * This includes "/." "/./." "/.//./." etc. */
  1097. if (p == src) {
  1098. strcpy(dest, "/PaxHeader/rootdir");
  1099. return (dest);
  1100. }
  1101. /* Convert unadorned "." into a suitable filename. */
  1102. if (*src == '.' && p == src + 1) {
  1103. strcpy(dest, "PaxHeader/currentdir");
  1104. return (dest);
  1105. }
  1106. /*
  1107. * TODO: Push this string into the 'pax' structure to avoid
  1108. * recomputing it every time. That will also open the door
  1109. * to having clients override it.
  1110. */
  1111. #if HAVE_GETPID && 0 /* Disable this for now; see above comment. */
  1112. sprintf(buff, "PaxHeader.%d", getpid());
  1113. #else
  1114. /* If the platform can't fetch the pid, don't include it. */
  1115. strcpy(buff, "PaxHeader");
  1116. #endif
  1117. /* General case: build a ustar-compatible name adding "/PaxHeader/". */
  1118. build_ustar_entry_name(dest, src, p - src, buff);
  1119. return (dest);
  1120. }
  1121. /* Write two null blocks for the end of archive */
  1122. static int
  1123. archive_write_pax_finish(struct archive_write *a)
  1124. {
  1125. if (a->compressor.write == NULL)
  1126. return (ARCHIVE_OK);
  1127. return (write_nulls(a, 512 * 2));
  1128. }
  1129. static int
  1130. archive_write_pax_destroy(struct archive_write *a)
  1131. {
  1132. struct pax *pax;
  1133. pax = (struct pax *)a->format_data;
  1134. if (pax == NULL)
  1135. return (ARCHIVE_OK);
  1136. archive_string_free(&pax->pax_header);
  1137. free(pax);
  1138. a->format_data = NULL;
  1139. return (ARCHIVE_OK);
  1140. }
  1141. static int
  1142. archive_write_pax_finish_entry(struct archive_write *a)
  1143. {
  1144. struct pax *pax;
  1145. int ret;
  1146. pax = (struct pax *)a->format_data;
  1147. ret = write_nulls(a, pax->entry_bytes_remaining + pax->entry_padding);
  1148. pax->entry_bytes_remaining = pax->entry_padding = 0;
  1149. return (ret);
  1150. }
  1151. static int
  1152. write_nulls(struct archive_write *a, size_t padding)
  1153. {
  1154. int ret, to_write;
  1155. while (padding > 0) {
  1156. to_write = padding < a->null_length ? padding : a->null_length;
  1157. ret = (a->compressor.write)(a, a->nulls, to_write);
  1158. if (ret != ARCHIVE_OK)
  1159. return (ret);
  1160. padding -= to_write;
  1161. }
  1162. return (ARCHIVE_OK);
  1163. }
  1164. static ssize_t
  1165. archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
  1166. {
  1167. struct pax *pax;
  1168. int ret;
  1169. pax = (struct pax *)a->format_data;
  1170. if (s > pax->entry_bytes_remaining)
  1171. s = pax->entry_bytes_remaining;
  1172. ret = (a->compressor.write)(a, buff, s);
  1173. pax->entry_bytes_remaining -= s;
  1174. if (ret == ARCHIVE_OK)
  1175. return (s);
  1176. else
  1177. return (ret);
  1178. }
  1179. static int
  1180. archive_write_pax_data_skip(struct archive_write *a, off_t s)
  1181. {
  1182. struct pax *pax;
  1183. off_t skiplen;
  1184. pax = (struct pax *)a->format_data;
  1185. /* Skip entry bytes. */
  1186. if (s > (off_t)(pax->entry_bytes_remaining))
  1187. skiplen = pax->entry_bytes_remaining;
  1188. else
  1189. skiplen = s;
  1190. pax->entry_bytes_remaining -= skiplen;
  1191. s -= skiplen;
  1192. /* Skip padding bytes. */
  1193. if (s > (off_t)(pax->entry_padding))
  1194. skiplen = pax->entry_padding;
  1195. else
  1196. skiplen = s;
  1197. pax->entry_padding -= skiplen;
  1198. s -= skiplen;
  1199. /* Anything left is an error. */
  1200. if (s > 0) {
  1201. archive_set_error(&a->archive, E2BIG,
  1202. "Skip length too long");
  1203. return (ARCHIVE_FATAL);
  1204. }
  1205. return (ARCHIVE_OK);
  1206. }
  1207. static int
  1208. has_non_ASCII(const wchar_t *wp)
  1209. {
  1210. if (wp == NULL)
  1211. return (1);
  1212. while (*wp != L'\0' && *wp < 128)
  1213. wp++;
  1214. return (*wp != L'\0');
  1215. }
  1216. /*
  1217. * Used by extended attribute support; encodes the name
  1218. * so that there will be no '=' characters in the result.
  1219. */
  1220. static char *
  1221. url_encode(const char *in)
  1222. {
  1223. const char *s;
  1224. char *d;
  1225. int out_len = 0;
  1226. char *out;
  1227. for (s = in; *s != '\0'; s++) {
  1228. if (*s < 33 || *s > 126 || *s == '%' || *s == '=')
  1229. out_len += 3;
  1230. else
  1231. out_len++;
  1232. }
  1233. out = (char *)malloc(out_len + 1);
  1234. if (out == NULL)
  1235. return (NULL);
  1236. for (s = in, d = out; *s != '\0'; s++) {
  1237. /* encode any non-printable ASCII character or '%' or '=' */
  1238. if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
  1239. /* URL encoding is '%' followed by two hex digits */
  1240. *d++ = '%';
  1241. *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
  1242. *d++ = "0123456789ABCDEF"[0x0f & *s];
  1243. } else {
  1244. *d++ = *s;
  1245. }
  1246. }
  1247. *d = '\0';
  1248. return (out);
  1249. }
  1250. /*
  1251. * Encode a sequence of bytes into a C string using base-64 encoding.
  1252. *
  1253. * Returns a null-terminated C string allocated with malloc(); caller
  1254. * is responsible for freeing the result.
  1255. */
  1256. static char *
  1257. base64_encode(const char *s, size_t len)
  1258. {
  1259. static const char digits[64] =
  1260. { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
  1261. 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
  1262. 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
  1263. 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
  1264. '8','9','+','/' };
  1265. int v;
  1266. char *d, *out;
  1267. /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
  1268. out = (char *)malloc((len * 4 + 2) / 3 + 1);
  1269. if (out == NULL)
  1270. return (NULL);
  1271. d = out;
  1272. /* Convert each group of 3 bytes into 4 characters. */
  1273. while (len >= 3) {
  1274. v = (((int)s[0] << 16) & 0xff0000)
  1275. | (((int)s[1] << 8) & 0xff00)
  1276. | (((int)s[2]) & 0x00ff);
  1277. s += 3;
  1278. len -= 3;
  1279. *d++ = digits[(v >> 18) & 0x3f];
  1280. *d++ = digits[(v >> 12) & 0x3f];
  1281. *d++ = digits[(v >> 6) & 0x3f];
  1282. *d++ = digits[(v) & 0x3f];
  1283. }
  1284. /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
  1285. switch (len) {
  1286. case 0: break;
  1287. case 1:
  1288. v = (((int)s[0] << 16) & 0xff0000);
  1289. *d++ = digits[(v >> 18) & 0x3f];
  1290. *d++ = digits[(v >> 12) & 0x3f];
  1291. break;
  1292. case 2:
  1293. v = (((int)s[0] << 16) & 0xff0000)
  1294. | (((int)s[1] << 8) & 0xff00);
  1295. *d++ = digits[(v >> 18) & 0x3f];
  1296. *d++ = digits[(v >> 12) & 0x3f];
  1297. *d++ = digits[(v >> 6) & 0x3f];
  1298. break;
  1299. }
  1300. /* Add trailing NUL character so output is a valid C string. */
  1301. *d++ = '\0';
  1302. return (out);
  1303. }