123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- /*-
- * Copyright (c) 2007 Kai Wang
- * Copyright (c) 2007 Tim Kientzle
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer
- * in this position and unchanged.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "archive_platform.h"
- __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.12 2008/12/17 19:02:42 kientzle Exp $");
- #ifdef HAVE_SYS_STAT_H
- #include <sys/stat.h>
- #endif
- #ifdef HAVE_ERRNO_H
- #include <errno.h>
- #endif
- #ifdef HAVE_STDLIB_H
- #include <stdlib.h>
- #endif
- #ifdef HAVE_STRING_H
- #include <string.h>
- #endif
- #include "archive.h"
- #include "archive_entry.h"
- #include "archive_private.h"
- #include "archive_read_private.h"
- struct ar {
- off_t entry_bytes_remaining;
- off_t entry_offset;
- off_t entry_padding;
- char *strtab;
- size_t strtab_size;
- };
- /*
- * Define structure of the "ar" header.
- */
- #define AR_name_offset 0
- #define AR_name_size 16
- #define AR_date_offset 16
- #define AR_date_size 12
- #define AR_uid_offset 28
- #define AR_uid_size 6
- #define AR_gid_offset 34
- #define AR_gid_size 6
- #define AR_mode_offset 40
- #define AR_mode_size 8
- #define AR_size_offset 48
- #define AR_size_size 10
- #define AR_fmag_offset 58
- #define AR_fmag_size 2
- static int archive_read_format_ar_bid(struct archive_read *a);
- static int archive_read_format_ar_cleanup(struct archive_read *a);
- static int archive_read_format_ar_read_data(struct archive_read *a,
- const void **buff, size_t *size, off_t *offset);
- static int archive_read_format_ar_skip(struct archive_read *a);
- static int archive_read_format_ar_read_header(struct archive_read *a,
- struct archive_entry *e);
- static uint64_t ar_atol8(const char *p, unsigned char_cnt);
- static uint64_t ar_atol10(const char *p, unsigned char_cnt);
- static int ar_parse_gnu_filename_table(struct archive_read *a);
- static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
- const char *h);
- int
- archive_read_support_format_ar(struct archive *_a)
- {
- struct archive_read *a = (struct archive_read *)_a;
- struct ar *ar;
- int r;
- ar = (struct ar *)malloc(sizeof(*ar));
- if (ar == NULL) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate ar data");
- return (ARCHIVE_FATAL);
- }
- memset(ar, 0, sizeof(*ar));
- ar->strtab = NULL;
- r = __archive_read_register_format(a,
- ar,
- "ar",
- archive_read_format_ar_bid,
- NULL,
- archive_read_format_ar_read_header,
- archive_read_format_ar_read_data,
- NULL,
- NULL,
- archive_read_format_ar_skip,
- archive_read_format_ar_cleanup);
- if (r != ARCHIVE_OK) {
- free(ar);
- return (r);
- }
- return (ARCHIVE_OK);
- }
- static int
- archive_read_format_ar_cleanup(struct archive_read *a)
- {
- struct ar *ar;
- ar = (struct ar *)(a->format->data);
- if (ar->strtab)
- free(ar->strtab);
- free(ar);
- (a->format->data) = NULL;
- return (ARCHIVE_OK);
- }
- static int
- archive_read_format_ar_bid(struct archive_read *a)
- {
- const void *h;
- if (a->archive.archive_format != 0 &&
- (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
- ARCHIVE_FORMAT_AR)
- return(0);
- /*
- * Verify the 8-byte file signature.
- * TODO: Do we need to check more than this?
- */
- if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
- return (-1);
- if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
- return (64);
- }
- return (-1);
- }
- static int
- archive_read_format_ar_read_header(struct archive_read *a,
- struct archive_entry *entry)
- {
- char filename[AR_name_size + 1];
- struct ar *ar;
- uint64_t number; /* Used to hold parsed numbers before validation. */
- ssize_t bytes_read;
- size_t bsd_name_length, entry_size;
- char *p, *st;
- const void *b;
- const char *h;
- int r;
- ar = (struct ar*)(a->format->data);
- if (a->archive.file_position == 0) {
- /*
- * We are now at the beginning of the archive,
- * so we need first consume the ar global header.
- */
- __archive_read_consume(a, 8);
- /* Set a default format code for now. */
- a->archive.archive_format = ARCHIVE_FORMAT_AR;
- }
- /* Read the header for the next file entry. */
- if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
- /* Broken header. */
- return (ARCHIVE_EOF);
- __archive_read_consume(a, 60);
- h = (const char *)b;
- /* Verify the magic signature on the file header. */
- if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
- archive_set_error(&a->archive, EINVAL,
- "Incorrect file header signature");
- return (ARCHIVE_WARN);
- }
- /* Copy filename into work buffer. */
- strncpy(filename, h + AR_name_offset, AR_name_size);
- filename[AR_name_size] = '\0';
- /*
- * Guess the format variant based on the filename.
- */
- if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
- /* We don't already know the variant, so let's guess. */
- /*
- * Biggest clue is presence of '/': GNU starts special
- * filenames with '/', appends '/' as terminator to
- * non-special names, so anything with '/' should be
- * GNU except for BSD long filenames.
- */
- if (strncmp(filename, "#1/", 3) == 0)
- a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
- else if (strchr(filename, '/') != NULL)
- a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
- else if (strncmp(filename, "__.SYMDEF", 9) == 0)
- a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
- /*
- * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
- * if name exactly fills 16-byte field? If so, we
- * can't assume entries without '/' are BSD. XXX
- */
- }
- /* Update format name from the code. */
- if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
- a->archive.archive_format_name = "ar (GNU/SVR4)";
- else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
- a->archive.archive_format_name = "ar (BSD)";
- else
- a->archive.archive_format_name = "ar";
- /*
- * Remove trailing spaces from the filename. GNU and BSD
- * variants both pad filename area out with spaces.
- * This will only be wrong if GNU/SVR4 'ar' implementations
- * omit trailing '/' for 16-char filenames and we have
- * a 16-char filename that ends in ' '.
- */
- p = filename + AR_name_size - 1;
- while (p >= filename && *p == ' ') {
- *p = '\0';
- p--;
- }
- /*
- * Remove trailing slash unless first character is '/'.
- * (BSD entries never end in '/', so this will only trim
- * GNU-format entries. GNU special entries start with '/'
- * and are not terminated in '/', so we don't trim anything
- * that starts with '/'.)
- */
- if (filename[0] != '/' && p > filename && *p == '/') {
- *p = '\0';
- }
- if (p < filename) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Found entry with empty filename");
- return (ARCHIVE_FATAL);
- }
- /*
- * '//' is the GNU filename table.
- * Later entries can refer to names in this table.
- */
- if (strcmp(filename, "//") == 0) {
- /* This must come before any call to _read_ahead. */
- ar_parse_common_header(ar, entry, h);
- archive_entry_copy_pathname(entry, filename);
- archive_entry_set_filetype(entry, AE_IFREG);
- /* Get the size of the filename table. */
- number = ar_atol10(h + AR_size_offset, AR_size_size);
- if (number > SIZE_MAX) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Filename table too large");
- return (ARCHIVE_FATAL);
- }
- entry_size = (size_t)number;
- if (entry_size == 0) {
- archive_set_error(&a->archive, EINVAL,
- "Invalid string table");
- return (ARCHIVE_WARN);
- }
- if (ar->strtab != NULL) {
- archive_set_error(&a->archive, EINVAL,
- "More than one string tables exist");
- return (ARCHIVE_WARN);
- }
- /* Read the filename table into memory. */
- st = malloc(entry_size);
- if (st == NULL) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate filename table buffer");
- return (ARCHIVE_FATAL);
- }
- ar->strtab = st;
- ar->strtab_size = entry_size;
- if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
- return (ARCHIVE_FATAL);
- memcpy(st, b, entry_size);
- __archive_read_consume(a, entry_size);
- /* All contents are consumed. */
- ar->entry_bytes_remaining = 0;
- archive_entry_set_size(entry, ar->entry_bytes_remaining);
- /* Parse the filename table. */
- return (ar_parse_gnu_filename_table(a));
- }
- /*
- * GNU variant handles long filenames by storing /<number>
- * to indicate a name stored in the filename table.
- * XXX TODO: Verify that it's all digits... Don't be fooled
- * by "/9xyz" XXX
- */
- if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
- number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
- /*
- * If we can't look up the real name, warn and return
- * the entry with the wrong name.
- */
- if (ar->strtab == NULL || number > ar->strtab_size) {
- archive_set_error(&a->archive, EINVAL,
- "Can't find long filename for entry");
- archive_entry_copy_pathname(entry, filename);
- /* Parse the time, owner, mode, size fields. */
- ar_parse_common_header(ar, entry, h);
- return (ARCHIVE_WARN);
- }
- archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
- /* Parse the time, owner, mode, size fields. */
- return (ar_parse_common_header(ar, entry, h));
- }
- /*
- * BSD handles long filenames by storing "#1/" followed by the
- * length of filename as a decimal number, then prepends the
- * the filename to the file contents.
- */
- if (strncmp(filename, "#1/", 3) == 0) {
- /* Parse the time, owner, mode, size fields. */
- /* This must occur before _read_ahead is called again. */
- ar_parse_common_header(ar, entry, h);
- /* Parse the size of the name, adjust the file size. */
- number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
- bsd_name_length = (size_t)number;
- /* Guard against the filename + trailing NUL
- * overflowing a size_t and against the filename size
- * being larger than the entire entry. */
- if (number > (uint64_t)(bsd_name_length + 1)
- || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Bad input file size");
- return (ARCHIVE_FATAL);
- }
- ar->entry_bytes_remaining -= bsd_name_length;
- /* Adjust file size reported to client. */
- archive_entry_set_size(entry, ar->entry_bytes_remaining);
- /* Read the long name into memory. */
- if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Truncated input file");
- return (ARCHIVE_FATAL);
- }
- __archive_read_consume(a, bsd_name_length);
- /* Store it in the entry. */
- p = (char *)malloc(bsd_name_length + 1);
- if (p == NULL) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate fname buffer");
- return (ARCHIVE_FATAL);
- }
- strncpy(p, b, bsd_name_length);
- p[bsd_name_length] = '\0';
- archive_entry_copy_pathname(entry, p);
- free(p);
- return (ARCHIVE_OK);
- }
- /*
- * "/" is the SVR4/GNU archive symbol table.
- */
- if (strcmp(filename, "/") == 0) {
- archive_entry_copy_pathname(entry, "/");
- /* Parse the time, owner, mode, size fields. */
- r = ar_parse_common_header(ar, entry, h);
- /* Force the file type to a regular file. */
- archive_entry_set_filetype(entry, AE_IFREG);
- return (r);
- }
- /*
- * "__.SYMDEF" is a BSD archive symbol table.
- */
- if (strcmp(filename, "__.SYMDEF") == 0) {
- archive_entry_copy_pathname(entry, filename);
- /* Parse the time, owner, mode, size fields. */
- return (ar_parse_common_header(ar, entry, h));
- }
- /*
- * Otherwise, this is a standard entry. The filename
- * has already been trimmed as much as possible, based
- * on our current knowledge of the format.
- */
- archive_entry_copy_pathname(entry, filename);
- return (ar_parse_common_header(ar, entry, h));
- }
- static int
- ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
- const char *h)
- {
- uint64_t n;
- /* Copy remaining header */
- archive_entry_set_mtime(entry,
- (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
- archive_entry_set_uid(entry,
- (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
- archive_entry_set_gid(entry,
- (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
- archive_entry_set_mode(entry,
- (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
- n = ar_atol10(h + AR_size_offset, AR_size_size);
- ar->entry_offset = 0;
- ar->entry_padding = n % 2;
- archive_entry_set_size(entry, n);
- ar->entry_bytes_remaining = n;
- return (ARCHIVE_OK);
- }
- static int
- archive_read_format_ar_read_data(struct archive_read *a,
- const void **buff, size_t *size, off_t *offset)
- {
- ssize_t bytes_read;
- struct ar *ar;
- ar = (struct ar *)(a->format->data);
- if (ar->entry_bytes_remaining > 0) {
- *buff = __archive_read_ahead(a, 1, &bytes_read);
- if (bytes_read == 0) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Truncated ar archive");
- return (ARCHIVE_FATAL);
- }
- if (bytes_read < 0)
- return (ARCHIVE_FATAL);
- if (bytes_read > ar->entry_bytes_remaining)
- bytes_read = (ssize_t)ar->entry_bytes_remaining;
- *size = bytes_read;
- *offset = ar->entry_offset;
- ar->entry_offset += bytes_read;
- ar->entry_bytes_remaining -= bytes_read;
- __archive_read_consume(a, (size_t)bytes_read);
- return (ARCHIVE_OK);
- } else {
- while (ar->entry_padding > 0) {
- *buff = __archive_read_ahead(a, 1, &bytes_read);
- if (bytes_read <= 0)
- return (ARCHIVE_FATAL);
- if (bytes_read > ar->entry_padding)
- bytes_read = (ssize_t)ar->entry_padding;
- __archive_read_consume(a, (size_t)bytes_read);
- ar->entry_padding -= bytes_read;
- }
- *buff = NULL;
- *size = 0;
- *offset = ar->entry_offset;
- return (ARCHIVE_EOF);
- }
- }
- static int
- archive_read_format_ar_skip(struct archive_read *a)
- {
- off_t bytes_skipped;
- struct ar* ar;
- ar = (struct ar *)(a->format->data);
- bytes_skipped = __archive_read_skip(a,
- ar->entry_bytes_remaining + ar->entry_padding);
- if (bytes_skipped < 0)
- return (ARCHIVE_FATAL);
- ar->entry_bytes_remaining = 0;
- ar->entry_padding = 0;
- return (ARCHIVE_OK);
- }
- static int
- ar_parse_gnu_filename_table(struct archive_read *a)
- {
- struct ar *ar;
- char *p;
- size_t size;
- ar = (struct ar*)(a->format->data);
- size = ar->strtab_size;
- for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
- if (*p == '/') {
- *p++ = '\0';
- if (*p != '\n')
- goto bad_string_table;
- *p = '\0';
- }
- }
- /*
- * GNU ar always pads the table to an even size.
- * The pad character is either '\n' or '`'.
- */
- if (p != ar->strtab + size && *p != '\n' && *p != '`')
- goto bad_string_table;
- /* Enforce zero termination. */
- ar->strtab[size - 1] = '\0';
- return (ARCHIVE_OK);
- bad_string_table:
- archive_set_error(&a->archive, EINVAL,
- "Invalid string table");
- free(ar->strtab);
- ar->strtab = NULL;
- return (ARCHIVE_WARN);
- }
- static uint64_t
- ar_atol8(const char *p, unsigned char_cnt)
- {
- uint64_t l, limit, last_digit_limit;
- unsigned int digit, base;
- base = 8;
- limit = UINT64_MAX / base;
- last_digit_limit = UINT64_MAX % base;
- while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
- p++;
- l = 0;
- digit = *p - '0';
- while (*p >= '0' && digit < base && char_cnt-- > 0) {
- if (l>limit || (l == limit && digit > last_digit_limit)) {
- l = UINT64_MAX; /* Truncate on overflow. */
- break;
- }
- l = (l * base) + digit;
- digit = *++p - '0';
- }
- return (l);
- }
- static uint64_t
- ar_atol10(const char *p, unsigned char_cnt)
- {
- uint64_t l, limit, last_digit_limit;
- unsigned int base, digit;
- base = 10;
- limit = UINT64_MAX / base;
- last_digit_limit = UINT64_MAX % base;
- while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
- p++;
- l = 0;
- digit = *p - '0';
- while (*p >= '0' && digit < base && char_cnt-- > 0) {
- if (l > limit || (l == limit && digit > last_digit_limit)) {
- l = UINT64_MAX; /* Truncate on overflow. */
- break;
- }
- l = (l * base) + digit;
- digit = *++p - '0';
- }
- return (l);
- }
|