archive_read_support_format_ar.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. /*-
  2. * Copyright (c) 2007 Kai Wang
  3. * Copyright (c) 2007 Tim Kientzle
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer
  11. * in this position and unchanged.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "archive_platform.h"
  28. __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.12 2008/12/17 19:02:42 kientzle Exp $");
  29. #ifdef HAVE_SYS_STAT_H
  30. #include <sys/stat.h>
  31. #endif
  32. #ifdef HAVE_ERRNO_H
  33. #include <errno.h>
  34. #endif
  35. #ifdef HAVE_STDLIB_H
  36. #include <stdlib.h>
  37. #endif
  38. #ifdef HAVE_STRING_H
  39. #include <string.h>
  40. #endif
  41. #include "archive.h"
  42. #include "archive_entry.h"
  43. #include "archive_private.h"
  44. #include "archive_read_private.h"
  45. struct ar {
  46. off_t entry_bytes_remaining;
  47. off_t entry_offset;
  48. off_t entry_padding;
  49. char *strtab;
  50. size_t strtab_size;
  51. };
  52. /*
  53. * Define structure of the "ar" header.
  54. */
  55. #define AR_name_offset 0
  56. #define AR_name_size 16
  57. #define AR_date_offset 16
  58. #define AR_date_size 12
  59. #define AR_uid_offset 28
  60. #define AR_uid_size 6
  61. #define AR_gid_offset 34
  62. #define AR_gid_size 6
  63. #define AR_mode_offset 40
  64. #define AR_mode_size 8
  65. #define AR_size_offset 48
  66. #define AR_size_size 10
  67. #define AR_fmag_offset 58
  68. #define AR_fmag_size 2
  69. static int archive_read_format_ar_bid(struct archive_read *a);
  70. static int archive_read_format_ar_cleanup(struct archive_read *a);
  71. static int archive_read_format_ar_read_data(struct archive_read *a,
  72. const void **buff, size_t *size, off_t *offset);
  73. static int archive_read_format_ar_skip(struct archive_read *a);
  74. static int archive_read_format_ar_read_header(struct archive_read *a,
  75. struct archive_entry *e);
  76. static uint64_t ar_atol8(const char *p, unsigned char_cnt);
  77. static uint64_t ar_atol10(const char *p, unsigned char_cnt);
  78. static int ar_parse_gnu_filename_table(struct archive_read *a);
  79. static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
  80. const char *h);
  81. int
  82. archive_read_support_format_ar(struct archive *_a)
  83. {
  84. struct archive_read *a = (struct archive_read *)_a;
  85. struct ar *ar;
  86. int r;
  87. ar = (struct ar *)malloc(sizeof(*ar));
  88. if (ar == NULL) {
  89. archive_set_error(&a->archive, ENOMEM,
  90. "Can't allocate ar data");
  91. return (ARCHIVE_FATAL);
  92. }
  93. memset(ar, 0, sizeof(*ar));
  94. ar->strtab = NULL;
  95. r = __archive_read_register_format(a,
  96. ar,
  97. "ar",
  98. archive_read_format_ar_bid,
  99. NULL,
  100. archive_read_format_ar_read_header,
  101. archive_read_format_ar_read_data,
  102. NULL,
  103. NULL,
  104. archive_read_format_ar_skip,
  105. archive_read_format_ar_cleanup);
  106. if (r != ARCHIVE_OK) {
  107. free(ar);
  108. return (r);
  109. }
  110. return (ARCHIVE_OK);
  111. }
  112. static int
  113. archive_read_format_ar_cleanup(struct archive_read *a)
  114. {
  115. struct ar *ar;
  116. ar = (struct ar *)(a->format->data);
  117. if (ar->strtab)
  118. free(ar->strtab);
  119. free(ar);
  120. (a->format->data) = NULL;
  121. return (ARCHIVE_OK);
  122. }
  123. static int
  124. archive_read_format_ar_bid(struct archive_read *a)
  125. {
  126. const void *h;
  127. if (a->archive.archive_format != 0 &&
  128. (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
  129. ARCHIVE_FORMAT_AR)
  130. return(0);
  131. /*
  132. * Verify the 8-byte file signature.
  133. * TODO: Do we need to check more than this?
  134. */
  135. if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
  136. return (-1);
  137. if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
  138. return (64);
  139. }
  140. return (-1);
  141. }
  142. static int
  143. archive_read_format_ar_read_header(struct archive_read *a,
  144. struct archive_entry *entry)
  145. {
  146. char filename[AR_name_size + 1];
  147. struct ar *ar;
  148. uint64_t number; /* Used to hold parsed numbers before validation. */
  149. ssize_t bytes_read;
  150. size_t bsd_name_length, entry_size;
  151. char *p, *st;
  152. const void *b;
  153. const char *h;
  154. int r;
  155. ar = (struct ar*)(a->format->data);
  156. if (a->archive.file_position == 0) {
  157. /*
  158. * We are now at the beginning of the archive,
  159. * so we need first consume the ar global header.
  160. */
  161. __archive_read_consume(a, 8);
  162. /* Set a default format code for now. */
  163. a->archive.archive_format = ARCHIVE_FORMAT_AR;
  164. }
  165. /* Read the header for the next file entry. */
  166. if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
  167. /* Broken header. */
  168. return (ARCHIVE_EOF);
  169. __archive_read_consume(a, 60);
  170. h = (const char *)b;
  171. /* Verify the magic signature on the file header. */
  172. if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
  173. archive_set_error(&a->archive, EINVAL,
  174. "Incorrect file header signature");
  175. return (ARCHIVE_WARN);
  176. }
  177. /* Copy filename into work buffer. */
  178. strncpy(filename, h + AR_name_offset, AR_name_size);
  179. filename[AR_name_size] = '\0';
  180. /*
  181. * Guess the format variant based on the filename.
  182. */
  183. if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
  184. /* We don't already know the variant, so let's guess. */
  185. /*
  186. * Biggest clue is presence of '/': GNU starts special
  187. * filenames with '/', appends '/' as terminator to
  188. * non-special names, so anything with '/' should be
  189. * GNU except for BSD long filenames.
  190. */
  191. if (strncmp(filename, "#1/", 3) == 0)
  192. a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
  193. else if (strchr(filename, '/') != NULL)
  194. a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
  195. else if (strncmp(filename, "__.SYMDEF", 9) == 0)
  196. a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
  197. /*
  198. * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
  199. * if name exactly fills 16-byte field? If so, we
  200. * can't assume entries without '/' are BSD. XXX
  201. */
  202. }
  203. /* Update format name from the code. */
  204. if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
  205. a->archive.archive_format_name = "ar (GNU/SVR4)";
  206. else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
  207. a->archive.archive_format_name = "ar (BSD)";
  208. else
  209. a->archive.archive_format_name = "ar";
  210. /*
  211. * Remove trailing spaces from the filename. GNU and BSD
  212. * variants both pad filename area out with spaces.
  213. * This will only be wrong if GNU/SVR4 'ar' implementations
  214. * omit trailing '/' for 16-char filenames and we have
  215. * a 16-char filename that ends in ' '.
  216. */
  217. p = filename + AR_name_size - 1;
  218. while (p >= filename && *p == ' ') {
  219. *p = '\0';
  220. p--;
  221. }
  222. /*
  223. * Remove trailing slash unless first character is '/'.
  224. * (BSD entries never end in '/', so this will only trim
  225. * GNU-format entries. GNU special entries start with '/'
  226. * and are not terminated in '/', so we don't trim anything
  227. * that starts with '/'.)
  228. */
  229. if (filename[0] != '/' && p > filename && *p == '/') {
  230. *p = '\0';
  231. }
  232. if (p < filename) {
  233. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  234. "Found entry with empty filename");
  235. return (ARCHIVE_FATAL);
  236. }
  237. /*
  238. * '//' is the GNU filename table.
  239. * Later entries can refer to names in this table.
  240. */
  241. if (strcmp(filename, "//") == 0) {
  242. /* This must come before any call to _read_ahead. */
  243. ar_parse_common_header(ar, entry, h);
  244. archive_entry_copy_pathname(entry, filename);
  245. archive_entry_set_filetype(entry, AE_IFREG);
  246. /* Get the size of the filename table. */
  247. number = ar_atol10(h + AR_size_offset, AR_size_size);
  248. if (number > SIZE_MAX) {
  249. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  250. "Filename table too large");
  251. return (ARCHIVE_FATAL);
  252. }
  253. entry_size = (size_t)number;
  254. if (entry_size == 0) {
  255. archive_set_error(&a->archive, EINVAL,
  256. "Invalid string table");
  257. return (ARCHIVE_WARN);
  258. }
  259. if (ar->strtab != NULL) {
  260. archive_set_error(&a->archive, EINVAL,
  261. "More than one string tables exist");
  262. return (ARCHIVE_WARN);
  263. }
  264. /* Read the filename table into memory. */
  265. st = malloc(entry_size);
  266. if (st == NULL) {
  267. archive_set_error(&a->archive, ENOMEM,
  268. "Can't allocate filename table buffer");
  269. return (ARCHIVE_FATAL);
  270. }
  271. ar->strtab = st;
  272. ar->strtab_size = entry_size;
  273. if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
  274. return (ARCHIVE_FATAL);
  275. memcpy(st, b, entry_size);
  276. __archive_read_consume(a, entry_size);
  277. /* All contents are consumed. */
  278. ar->entry_bytes_remaining = 0;
  279. archive_entry_set_size(entry, ar->entry_bytes_remaining);
  280. /* Parse the filename table. */
  281. return (ar_parse_gnu_filename_table(a));
  282. }
  283. /*
  284. * GNU variant handles long filenames by storing /<number>
  285. * to indicate a name stored in the filename table.
  286. * XXX TODO: Verify that it's all digits... Don't be fooled
  287. * by "/9xyz" XXX
  288. */
  289. if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
  290. number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
  291. /*
  292. * If we can't look up the real name, warn and return
  293. * the entry with the wrong name.
  294. */
  295. if (ar->strtab == NULL || number > ar->strtab_size) {
  296. archive_set_error(&a->archive, EINVAL,
  297. "Can't find long filename for entry");
  298. archive_entry_copy_pathname(entry, filename);
  299. /* Parse the time, owner, mode, size fields. */
  300. ar_parse_common_header(ar, entry, h);
  301. return (ARCHIVE_WARN);
  302. }
  303. archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
  304. /* Parse the time, owner, mode, size fields. */
  305. return (ar_parse_common_header(ar, entry, h));
  306. }
  307. /*
  308. * BSD handles long filenames by storing "#1/" followed by the
  309. * length of filename as a decimal number, then prepends the
  310. * the filename to the file contents.
  311. */
  312. if (strncmp(filename, "#1/", 3) == 0) {
  313. /* Parse the time, owner, mode, size fields. */
  314. /* This must occur before _read_ahead is called again. */
  315. ar_parse_common_header(ar, entry, h);
  316. /* Parse the size of the name, adjust the file size. */
  317. number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
  318. bsd_name_length = (size_t)number;
  319. /* Guard against the filename + trailing NUL
  320. * overflowing a size_t and against the filename size
  321. * being larger than the entire entry. */
  322. if (number > (uint64_t)(bsd_name_length + 1)
  323. || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
  324. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  325. "Bad input file size");
  326. return (ARCHIVE_FATAL);
  327. }
  328. ar->entry_bytes_remaining -= bsd_name_length;
  329. /* Adjust file size reported to client. */
  330. archive_entry_set_size(entry, ar->entry_bytes_remaining);
  331. /* Read the long name into memory. */
  332. if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
  333. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  334. "Truncated input file");
  335. return (ARCHIVE_FATAL);
  336. }
  337. __archive_read_consume(a, bsd_name_length);
  338. /* Store it in the entry. */
  339. p = (char *)malloc(bsd_name_length + 1);
  340. if (p == NULL) {
  341. archive_set_error(&a->archive, ENOMEM,
  342. "Can't allocate fname buffer");
  343. return (ARCHIVE_FATAL);
  344. }
  345. strncpy(p, b, bsd_name_length);
  346. p[bsd_name_length] = '\0';
  347. archive_entry_copy_pathname(entry, p);
  348. free(p);
  349. return (ARCHIVE_OK);
  350. }
  351. /*
  352. * "/" is the SVR4/GNU archive symbol table.
  353. */
  354. if (strcmp(filename, "/") == 0) {
  355. archive_entry_copy_pathname(entry, "/");
  356. /* Parse the time, owner, mode, size fields. */
  357. r = ar_parse_common_header(ar, entry, h);
  358. /* Force the file type to a regular file. */
  359. archive_entry_set_filetype(entry, AE_IFREG);
  360. return (r);
  361. }
  362. /*
  363. * "__.SYMDEF" is a BSD archive symbol table.
  364. */
  365. if (strcmp(filename, "__.SYMDEF") == 0) {
  366. archive_entry_copy_pathname(entry, filename);
  367. /* Parse the time, owner, mode, size fields. */
  368. return (ar_parse_common_header(ar, entry, h));
  369. }
  370. /*
  371. * Otherwise, this is a standard entry. The filename
  372. * has already been trimmed as much as possible, based
  373. * on our current knowledge of the format.
  374. */
  375. archive_entry_copy_pathname(entry, filename);
  376. return (ar_parse_common_header(ar, entry, h));
  377. }
  378. static int
  379. ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
  380. const char *h)
  381. {
  382. uint64_t n;
  383. /* Copy remaining header */
  384. archive_entry_set_mtime(entry,
  385. (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
  386. archive_entry_set_uid(entry,
  387. (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
  388. archive_entry_set_gid(entry,
  389. (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
  390. archive_entry_set_mode(entry,
  391. (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
  392. n = ar_atol10(h + AR_size_offset, AR_size_size);
  393. ar->entry_offset = 0;
  394. ar->entry_padding = n % 2;
  395. archive_entry_set_size(entry, n);
  396. ar->entry_bytes_remaining = n;
  397. return (ARCHIVE_OK);
  398. }
  399. static int
  400. archive_read_format_ar_read_data(struct archive_read *a,
  401. const void **buff, size_t *size, off_t *offset)
  402. {
  403. ssize_t bytes_read;
  404. struct ar *ar;
  405. ar = (struct ar *)(a->format->data);
  406. if (ar->entry_bytes_remaining > 0) {
  407. *buff = __archive_read_ahead(a, 1, &bytes_read);
  408. if (bytes_read == 0) {
  409. archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
  410. "Truncated ar archive");
  411. return (ARCHIVE_FATAL);
  412. }
  413. if (bytes_read < 0)
  414. return (ARCHIVE_FATAL);
  415. if (bytes_read > ar->entry_bytes_remaining)
  416. bytes_read = (ssize_t)ar->entry_bytes_remaining;
  417. *size = bytes_read;
  418. *offset = ar->entry_offset;
  419. ar->entry_offset += bytes_read;
  420. ar->entry_bytes_remaining -= bytes_read;
  421. __archive_read_consume(a, (size_t)bytes_read);
  422. return (ARCHIVE_OK);
  423. } else {
  424. while (ar->entry_padding > 0) {
  425. *buff = __archive_read_ahead(a, 1, &bytes_read);
  426. if (bytes_read <= 0)
  427. return (ARCHIVE_FATAL);
  428. if (bytes_read > ar->entry_padding)
  429. bytes_read = (ssize_t)ar->entry_padding;
  430. __archive_read_consume(a, (size_t)bytes_read);
  431. ar->entry_padding -= bytes_read;
  432. }
  433. *buff = NULL;
  434. *size = 0;
  435. *offset = ar->entry_offset;
  436. return (ARCHIVE_EOF);
  437. }
  438. }
  439. static int
  440. archive_read_format_ar_skip(struct archive_read *a)
  441. {
  442. off_t bytes_skipped;
  443. struct ar* ar;
  444. ar = (struct ar *)(a->format->data);
  445. bytes_skipped = __archive_read_skip(a,
  446. ar->entry_bytes_remaining + ar->entry_padding);
  447. if (bytes_skipped < 0)
  448. return (ARCHIVE_FATAL);
  449. ar->entry_bytes_remaining = 0;
  450. ar->entry_padding = 0;
  451. return (ARCHIVE_OK);
  452. }
  453. static int
  454. ar_parse_gnu_filename_table(struct archive_read *a)
  455. {
  456. struct ar *ar;
  457. char *p;
  458. size_t size;
  459. ar = (struct ar*)(a->format->data);
  460. size = ar->strtab_size;
  461. for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
  462. if (*p == '/') {
  463. *p++ = '\0';
  464. if (*p != '\n')
  465. goto bad_string_table;
  466. *p = '\0';
  467. }
  468. }
  469. /*
  470. * GNU ar always pads the table to an even size.
  471. * The pad character is either '\n' or '`'.
  472. */
  473. if (p != ar->strtab + size && *p != '\n' && *p != '`')
  474. goto bad_string_table;
  475. /* Enforce zero termination. */
  476. ar->strtab[size - 1] = '\0';
  477. return (ARCHIVE_OK);
  478. bad_string_table:
  479. archive_set_error(&a->archive, EINVAL,
  480. "Invalid string table");
  481. free(ar->strtab);
  482. ar->strtab = NULL;
  483. return (ARCHIVE_WARN);
  484. }
  485. static uint64_t
  486. ar_atol8(const char *p, unsigned char_cnt)
  487. {
  488. uint64_t l, limit, last_digit_limit;
  489. unsigned int digit, base;
  490. base = 8;
  491. limit = UINT64_MAX / base;
  492. last_digit_limit = UINT64_MAX % base;
  493. while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
  494. p++;
  495. l = 0;
  496. digit = *p - '0';
  497. while (*p >= '0' && digit < base && char_cnt-- > 0) {
  498. if (l>limit || (l == limit && digit > last_digit_limit)) {
  499. l = UINT64_MAX; /* Truncate on overflow. */
  500. break;
  501. }
  502. l = (l * base) + digit;
  503. digit = *++p - '0';
  504. }
  505. return (l);
  506. }
  507. static uint64_t
  508. ar_atol10(const char *p, unsigned char_cnt)
  509. {
  510. uint64_t l, limit, last_digit_limit;
  511. unsigned int base, digit;
  512. base = 10;
  513. limit = UINT64_MAX / base;
  514. last_digit_limit = UINT64_MAX % base;
  515. while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
  516. p++;
  517. l = 0;
  518. digit = *p - '0';
  519. while (*p >= '0' && digit < base && char_cnt-- > 0) {
  520. if (l > limit || (l == limit && digit > last_digit_limit)) {
  521. l = UINT64_MAX; /* Truncate on overflow. */
  522. break;
  523. }
  524. l = (l * base) + digit;
  525. digit = *++p - '0';
  526. }
  527. return (l);
  528. }