pack-write.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #include "cache.h"
  2. #include "pack.h"
  3. #include "csum-file.h"
  4. void reset_pack_idx_option(struct pack_idx_option *opts)
  5. {
  6. memset(opts, 0, sizeof(*opts));
  7. opts->version = 2;
  8. opts->off32_limit = 0x7fffffff;
  9. }
  10. static int sha1_compare(const void *_a, const void *_b)
  11. {
  12. struct pack_idx_entry *a = *(struct pack_idx_entry **)_a;
  13. struct pack_idx_entry *b = *(struct pack_idx_entry **)_b;
  14. return oidcmp(&a->oid, &b->oid);
  15. }
  16. static int cmp_uint32(const void *a_, const void *b_)
  17. {
  18. uint32_t a = *((uint32_t *)a_);
  19. uint32_t b = *((uint32_t *)b_);
  20. return (a < b) ? -1 : (a != b);
  21. }
  22. static int need_large_offset(off_t offset, const struct pack_idx_option *opts)
  23. {
  24. uint32_t ofsval;
  25. if ((offset >> 31) || (opts->off32_limit < offset))
  26. return 1;
  27. if (!opts->anomaly_nr)
  28. return 0;
  29. ofsval = offset;
  30. return !!bsearch(&ofsval, opts->anomaly, opts->anomaly_nr,
  31. sizeof(ofsval), cmp_uint32);
  32. }
  33. /*
  34. * The *sha1 contains the pack content SHA1 hash.
  35. * The objects array passed in will be sorted by SHA1 on exit.
  36. */
  37. const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects,
  38. int nr_objects, const struct pack_idx_option *opts,
  39. const unsigned char *sha1)
  40. {
  41. struct hashfile *f;
  42. struct pack_idx_entry **sorted_by_sha, **list, **last;
  43. off_t last_obj_offset = 0;
  44. uint32_t array[256];
  45. int i, fd;
  46. uint32_t index_version;
  47. if (nr_objects) {
  48. sorted_by_sha = objects;
  49. list = sorted_by_sha;
  50. last = sorted_by_sha + nr_objects;
  51. for (i = 0; i < nr_objects; ++i) {
  52. if (objects[i]->offset > last_obj_offset)
  53. last_obj_offset = objects[i]->offset;
  54. }
  55. QSORT(sorted_by_sha, nr_objects, sha1_compare);
  56. }
  57. else
  58. sorted_by_sha = list = last = NULL;
  59. if (opts->flags & WRITE_IDX_VERIFY) {
  60. assert(index_name);
  61. f = hashfd_check(index_name);
  62. } else {
  63. if (!index_name) {
  64. struct strbuf tmp_file = STRBUF_INIT;
  65. fd = odb_mkstemp(&tmp_file, "pack/tmp_idx_XXXXXX");
  66. index_name = strbuf_detach(&tmp_file, NULL);
  67. } else {
  68. unlink(index_name);
  69. fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
  70. if (fd < 0)
  71. die_errno("unable to create '%s'", index_name);
  72. }
  73. f = hashfd(fd, index_name);
  74. }
  75. /* if last object's offset is >= 2^31 we should use index V2 */
  76. index_version = need_large_offset(last_obj_offset, opts) ? 2 : opts->version;
  77. /* index versions 2 and above need a header */
  78. if (index_version >= 2) {
  79. struct pack_idx_header hdr;
  80. hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
  81. hdr.idx_version = htonl(index_version);
  82. hashwrite(f, &hdr, sizeof(hdr));
  83. }
  84. /*
  85. * Write the first-level table (the list is sorted,
  86. * but we use a 256-entry lookup to be able to avoid
  87. * having to do eight extra binary search iterations).
  88. */
  89. for (i = 0; i < 256; i++) {
  90. struct pack_idx_entry **next = list;
  91. while (next < last) {
  92. struct pack_idx_entry *obj = *next;
  93. if (obj->oid.hash[0] != i)
  94. break;
  95. next++;
  96. }
  97. array[i] = htonl(next - sorted_by_sha);
  98. list = next;
  99. }
  100. hashwrite(f, array, 256 * 4);
  101. /*
  102. * Write the actual SHA1 entries..
  103. */
  104. list = sorted_by_sha;
  105. for (i = 0; i < nr_objects; i++) {
  106. struct pack_idx_entry *obj = *list++;
  107. if (index_version < 2)
  108. hashwrite_be32(f, obj->offset);
  109. hashwrite(f, obj->oid.hash, the_hash_algo->rawsz);
  110. if ((opts->flags & WRITE_IDX_STRICT) &&
  111. (i && oideq(&list[-2]->oid, &obj->oid)))
  112. die("The same object %s appears twice in the pack",
  113. oid_to_hex(&obj->oid));
  114. }
  115. if (index_version >= 2) {
  116. unsigned int nr_large_offset = 0;
  117. /* write the crc32 table */
  118. list = sorted_by_sha;
  119. for (i = 0; i < nr_objects; i++) {
  120. struct pack_idx_entry *obj = *list++;
  121. hashwrite_be32(f, obj->crc32);
  122. }
  123. /* write the 32-bit offset table */
  124. list = sorted_by_sha;
  125. for (i = 0; i < nr_objects; i++) {
  126. struct pack_idx_entry *obj = *list++;
  127. uint32_t offset;
  128. offset = (need_large_offset(obj->offset, opts)
  129. ? (0x80000000 | nr_large_offset++)
  130. : obj->offset);
  131. hashwrite_be32(f, offset);
  132. }
  133. /* write the large offset table */
  134. list = sorted_by_sha;
  135. while (nr_large_offset) {
  136. struct pack_idx_entry *obj = *list++;
  137. uint64_t offset = obj->offset;
  138. uint32_t split[2];
  139. if (!need_large_offset(offset, opts))
  140. continue;
  141. split[0] = htonl(offset >> 32);
  142. split[1] = htonl(offset & 0xffffffff);
  143. hashwrite(f, split, 8);
  144. nr_large_offset--;
  145. }
  146. }
  147. hashwrite(f, sha1, the_hash_algo->rawsz);
  148. finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
  149. ((opts->flags & WRITE_IDX_VERIFY)
  150. ? 0 : CSUM_FSYNC));
  151. return index_name;
  152. }
  153. off_t write_pack_header(struct hashfile *f, uint32_t nr_entries)
  154. {
  155. struct pack_header hdr;
  156. hdr.hdr_signature = htonl(PACK_SIGNATURE);
  157. hdr.hdr_version = htonl(PACK_VERSION);
  158. hdr.hdr_entries = htonl(nr_entries);
  159. hashwrite(f, &hdr, sizeof(hdr));
  160. return sizeof(hdr);
  161. }
  162. /*
  163. * Update pack header with object_count and compute new SHA1 for pack data
  164. * associated to pack_fd, and write that SHA1 at the end. That new SHA1
  165. * is also returned in new_pack_sha1.
  166. *
  167. * If partial_pack_sha1 is non null, then the SHA1 of the existing pack
  168. * (without the header update) is computed and validated against the
  169. * one provided in partial_pack_sha1. The validation is performed at
  170. * partial_pack_offset bytes in the pack file. The SHA1 of the remaining
  171. * data (i.e. from partial_pack_offset to the end) is then computed and
  172. * returned in partial_pack_sha1.
  173. *
  174. * Note that new_pack_sha1 is updated last, so both new_pack_sha1 and
  175. * partial_pack_sha1 can refer to the same buffer if the caller is not
  176. * interested in the resulting SHA1 of pack data above partial_pack_offset.
  177. */
  178. void fixup_pack_header_footer(int pack_fd,
  179. unsigned char *new_pack_hash,
  180. const char *pack_name,
  181. uint32_t object_count,
  182. unsigned char *partial_pack_hash,
  183. off_t partial_pack_offset)
  184. {
  185. int aligned_sz, buf_sz = 8 * 1024;
  186. git_hash_ctx old_hash_ctx, new_hash_ctx;
  187. struct pack_header hdr;
  188. char *buf;
  189. ssize_t read_result;
  190. the_hash_algo->init_fn(&old_hash_ctx);
  191. the_hash_algo->init_fn(&new_hash_ctx);
  192. if (lseek(pack_fd, 0, SEEK_SET) != 0)
  193. die_errno("Failed seeking to start of '%s'", pack_name);
  194. read_result = read_in_full(pack_fd, &hdr, sizeof(hdr));
  195. if (read_result < 0)
  196. die_errno("Unable to reread header of '%s'", pack_name);
  197. else if (read_result != sizeof(hdr))
  198. die_errno("Unexpected short read for header of '%s'",
  199. pack_name);
  200. if (lseek(pack_fd, 0, SEEK_SET) != 0)
  201. die_errno("Failed seeking to start of '%s'", pack_name);
  202. the_hash_algo->update_fn(&old_hash_ctx, &hdr, sizeof(hdr));
  203. hdr.hdr_entries = htonl(object_count);
  204. the_hash_algo->update_fn(&new_hash_ctx, &hdr, sizeof(hdr));
  205. write_or_die(pack_fd, &hdr, sizeof(hdr));
  206. partial_pack_offset -= sizeof(hdr);
  207. buf = xmalloc(buf_sz);
  208. aligned_sz = buf_sz - sizeof(hdr);
  209. for (;;) {
  210. ssize_t m, n;
  211. m = (partial_pack_hash && partial_pack_offset < aligned_sz) ?
  212. partial_pack_offset : aligned_sz;
  213. n = xread(pack_fd, buf, m);
  214. if (!n)
  215. break;
  216. if (n < 0)
  217. die_errno("Failed to checksum '%s'", pack_name);
  218. the_hash_algo->update_fn(&new_hash_ctx, buf, n);
  219. aligned_sz -= n;
  220. if (!aligned_sz)
  221. aligned_sz = buf_sz;
  222. if (!partial_pack_hash)
  223. continue;
  224. the_hash_algo->update_fn(&old_hash_ctx, buf, n);
  225. partial_pack_offset -= n;
  226. if (partial_pack_offset == 0) {
  227. unsigned char hash[GIT_MAX_RAWSZ];
  228. the_hash_algo->final_fn(hash, &old_hash_ctx);
  229. if (!hasheq(hash, partial_pack_hash))
  230. die("Unexpected checksum for %s "
  231. "(disk corruption?)", pack_name);
  232. /*
  233. * Now let's compute the SHA1 of the remainder of the
  234. * pack, which also means making partial_pack_offset
  235. * big enough not to matter anymore.
  236. */
  237. the_hash_algo->init_fn(&old_hash_ctx);
  238. partial_pack_offset = ~partial_pack_offset;
  239. partial_pack_offset -= MSB(partial_pack_offset, 1);
  240. }
  241. }
  242. free(buf);
  243. if (partial_pack_hash)
  244. the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
  245. the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
  246. write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
  247. fsync_or_die(pack_fd, pack_name);
  248. }
  249. char *index_pack_lockfile(int ip_out)
  250. {
  251. char packname[GIT_MAX_HEXSZ + 6];
  252. const int len = the_hash_algo->hexsz + 6;
  253. /*
  254. * The first thing we expect from index-pack's output
  255. * is "pack\t%40s\n" or "keep\t%40s\n" (46 bytes) where
  256. * %40s is the newly created pack SHA1 name. In the "keep"
  257. * case, we need it to remove the corresponding .keep file
  258. * later on. If we don't get that then tough luck with it.
  259. */
  260. if (read_in_full(ip_out, packname, len) == len && packname[len-1] == '\n') {
  261. const char *name;
  262. packname[len-1] = 0;
  263. if (skip_prefix(packname, "keep\t", &name))
  264. return xstrfmt("%s/pack/pack-%s.keep",
  265. get_object_directory(), name);
  266. }
  267. return NULL;
  268. }
  269. /*
  270. * The per-object header is a pretty dense thing, which is
  271. * - first byte: low four bits are "size", then three bits of "type",
  272. * and the high bit is "size continues".
  273. * - each byte afterwards: low seven bits are size continuation,
  274. * with the high bit being "size continues"
  275. */
  276. int encode_in_pack_object_header(unsigned char *hdr, int hdr_len,
  277. enum object_type type, uintmax_t size)
  278. {
  279. int n = 1;
  280. unsigned char c;
  281. if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
  282. die("bad type %d", type);
  283. c = (type << 4) | (size & 15);
  284. size >>= 4;
  285. while (size) {
  286. if (n == hdr_len)
  287. die("object size is too enormous to format");
  288. *hdr++ = c | 0x80;
  289. c = size & 0x7f;
  290. size >>= 7;
  291. n++;
  292. }
  293. *hdr = c;
  294. return n;
  295. }
  296. struct hashfile *create_tmp_packfile(char **pack_tmp_name)
  297. {
  298. struct strbuf tmpname = STRBUF_INIT;
  299. int fd;
  300. fd = odb_mkstemp(&tmpname, "pack/tmp_pack_XXXXXX");
  301. *pack_tmp_name = strbuf_detach(&tmpname, NULL);
  302. return hashfd(fd, *pack_tmp_name);
  303. }
  304. void finish_tmp_packfile(struct strbuf *name_buffer,
  305. const char *pack_tmp_name,
  306. struct pack_idx_entry **written_list,
  307. uint32_t nr_written,
  308. struct pack_idx_option *pack_idx_opts,
  309. unsigned char hash[])
  310. {
  311. const char *idx_tmp_name;
  312. int basename_len = name_buffer->len;
  313. if (adjust_shared_perm(pack_tmp_name))
  314. die_errno("unable to make temporary pack file readable");
  315. idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
  316. pack_idx_opts, hash);
  317. if (adjust_shared_perm(idx_tmp_name))
  318. die_errno("unable to make temporary index file readable");
  319. strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
  320. if (rename(pack_tmp_name, name_buffer->buf))
  321. die_errno("unable to rename temporary pack file");
  322. strbuf_setlen(name_buffer, basename_len);
  323. strbuf_addf(name_buffer, "%s.idx", hash_to_hex(hash));
  324. if (rename(idx_tmp_name, name_buffer->buf))
  325. die_errno("unable to rename temporary index file");
  326. strbuf_setlen(name_buffer, basename_len);
  327. free((void *)idx_tmp_name);
  328. }