fast-export.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313
  1. /*
  2. * "git fast-export" builtin command
  3. *
  4. * Copyright (C) 2007 Johannes E. Schindelin
  5. */
  6. #include "builtin.h"
  7. #include "cache.h"
  8. #include "config.h"
  9. #include "refs.h"
  10. #include "refspec.h"
  11. #include "object-store.h"
  12. #include "commit.h"
  13. #include "object.h"
  14. #include "tag.h"
  15. #include "diff.h"
  16. #include "diffcore.h"
  17. #include "log-tree.h"
  18. #include "revision.h"
  19. #include "decorate.h"
  20. #include "string-list.h"
  21. #include "utf8.h"
  22. #include "parse-options.h"
  23. #include "quote.h"
  24. #include "remote.h"
  25. #include "blob.h"
  26. #include "commit-slab.h"
  27. static const char *fast_export_usage[] = {
  28. N_("git fast-export [rev-list-opts]"),
  29. NULL
  30. };
  31. static int progress;
  32. static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
  33. static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
  34. static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
  35. static int fake_missing_tagger;
  36. static int use_done_feature;
  37. static int no_data;
  38. static int full_tree;
  39. static int reference_excluded_commits;
  40. static int show_original_ids;
  41. static int mark_tags;
  42. static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
  43. static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
  44. static struct refspec refspecs = REFSPEC_INIT_FETCH;
  45. static int anonymize;
  46. static struct hashmap anonymized_seeds;
  47. static struct revision_sources revision_sources;
  48. static int parse_opt_signed_tag_mode(const struct option *opt,
  49. const char *arg, int unset)
  50. {
  51. if (unset || !strcmp(arg, "abort"))
  52. signed_tag_mode = SIGNED_TAG_ABORT;
  53. else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
  54. signed_tag_mode = VERBATIM;
  55. else if (!strcmp(arg, "warn"))
  56. signed_tag_mode = WARN;
  57. else if (!strcmp(arg, "warn-strip"))
  58. signed_tag_mode = WARN_STRIP;
  59. else if (!strcmp(arg, "strip"))
  60. signed_tag_mode = STRIP;
  61. else
  62. return error("Unknown signed-tags mode: %s", arg);
  63. return 0;
  64. }
  65. static int parse_opt_tag_of_filtered_mode(const struct option *opt,
  66. const char *arg, int unset)
  67. {
  68. if (unset || !strcmp(arg, "abort"))
  69. tag_of_filtered_mode = TAG_FILTERING_ABORT;
  70. else if (!strcmp(arg, "drop"))
  71. tag_of_filtered_mode = DROP;
  72. else if (!strcmp(arg, "rewrite"))
  73. tag_of_filtered_mode = REWRITE;
  74. else
  75. return error("Unknown tag-of-filtered mode: %s", arg);
  76. return 0;
  77. }
  78. static int parse_opt_reencode_mode(const struct option *opt,
  79. const char *arg, int unset)
  80. {
  81. if (unset) {
  82. reencode_mode = REENCODE_ABORT;
  83. return 0;
  84. }
  85. switch (git_parse_maybe_bool(arg)) {
  86. case 0:
  87. reencode_mode = REENCODE_NO;
  88. break;
  89. case 1:
  90. reencode_mode = REENCODE_YES;
  91. break;
  92. default:
  93. if (!strcasecmp(arg, "abort"))
  94. reencode_mode = REENCODE_ABORT;
  95. else
  96. return error("Unknown reencoding mode: %s", arg);
  97. }
  98. return 0;
  99. }
  100. static struct decoration idnums;
  101. static uint32_t last_idnum;
  102. static int has_unshown_parent(struct commit *commit)
  103. {
  104. struct commit_list *parent;
  105. for (parent = commit->parents; parent; parent = parent->next)
  106. if (!(parent->item->object.flags & SHOWN) &&
  107. !(parent->item->object.flags & UNINTERESTING))
  108. return 1;
  109. return 0;
  110. }
  111. struct anonymized_entry {
  112. struct hashmap_entry hash;
  113. const char *anon;
  114. const char orig[FLEX_ARRAY];
  115. };
  116. struct anonymized_entry_key {
  117. struct hashmap_entry hash;
  118. const char *orig;
  119. size_t orig_len;
  120. };
  121. static int anonymized_entry_cmp(const void *unused_cmp_data,
  122. const struct hashmap_entry *eptr,
  123. const struct hashmap_entry *entry_or_key,
  124. const void *keydata)
  125. {
  126. const struct anonymized_entry *a, *b;
  127. a = container_of(eptr, const struct anonymized_entry, hash);
  128. if (keydata) {
  129. const struct anonymized_entry_key *key = keydata;
  130. int equal = !strncmp(a->orig, key->orig, key->orig_len) &&
  131. !a->orig[key->orig_len];
  132. return !equal;
  133. }
  134. b = container_of(entry_or_key, const struct anonymized_entry, hash);
  135. return strcmp(a->orig, b->orig);
  136. }
  137. /*
  138. * Basically keep a cache of X->Y so that we can repeatedly replace
  139. * the same anonymized string with another. The actual generation
  140. * is farmed out to the generate function.
  141. */
  142. static const char *anonymize_str(struct hashmap *map,
  143. char *(*generate)(void *),
  144. const char *orig, size_t len,
  145. void *data)
  146. {
  147. struct anonymized_entry_key key;
  148. struct anonymized_entry *ret;
  149. if (!map->cmpfn)
  150. hashmap_init(map, anonymized_entry_cmp, NULL, 0);
  151. hashmap_entry_init(&key.hash, memhash(orig, len));
  152. key.orig = orig;
  153. key.orig_len = len;
  154. /* First check if it's a token the user configured manually... */
  155. if (anonymized_seeds.cmpfn)
  156. ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
  157. else
  158. ret = NULL;
  159. /* ...otherwise check if we've already seen it in this context... */
  160. if (!ret)
  161. ret = hashmap_get_entry(map, &key, hash, &key);
  162. /* ...and finally generate a new mapping if necessary */
  163. if (!ret) {
  164. FLEX_ALLOC_MEM(ret, orig, orig, len);
  165. hashmap_entry_init(&ret->hash, key.hash.hash);
  166. ret->anon = generate(data);
  167. hashmap_put(map, &ret->hash);
  168. }
  169. return ret->anon;
  170. }
  171. /*
  172. * We anonymize each component of a path individually,
  173. * so that paths a/b and a/c will share a common root.
  174. * The paths are cached via anonymize_mem so that repeated
  175. * lookups for "a" will yield the same value.
  176. */
  177. static void anonymize_path(struct strbuf *out, const char *path,
  178. struct hashmap *map,
  179. char *(*generate)(void *))
  180. {
  181. while (*path) {
  182. const char *end_of_component = strchrnul(path, '/');
  183. size_t len = end_of_component - path;
  184. const char *c = anonymize_str(map, generate, path, len, NULL);
  185. strbuf_addstr(out, c);
  186. path = end_of_component;
  187. if (*path)
  188. strbuf_addch(out, *path++);
  189. }
  190. }
  191. static inline void *mark_to_ptr(uint32_t mark)
  192. {
  193. return (void *)(uintptr_t)mark;
  194. }
  195. static inline uint32_t ptr_to_mark(void * mark)
  196. {
  197. return (uint32_t)(uintptr_t)mark;
  198. }
  199. static inline void mark_object(struct object *object, uint32_t mark)
  200. {
  201. add_decoration(&idnums, object, mark_to_ptr(mark));
  202. }
  203. static inline void mark_next_object(struct object *object)
  204. {
  205. mark_object(object, ++last_idnum);
  206. }
  207. static int get_object_mark(struct object *object)
  208. {
  209. void *decoration = lookup_decoration(&idnums, object);
  210. if (!decoration)
  211. return 0;
  212. return ptr_to_mark(decoration);
  213. }
  214. static struct commit *rewrite_commit(struct commit *p)
  215. {
  216. for (;;) {
  217. if (p->parents && p->parents->next)
  218. break;
  219. if (p->object.flags & UNINTERESTING)
  220. break;
  221. if (!(p->object.flags & TREESAME))
  222. break;
  223. if (!p->parents)
  224. return NULL;
  225. p = p->parents->item;
  226. }
  227. return p;
  228. }
  229. static void show_progress(void)
  230. {
  231. static int counter = 0;
  232. if (!progress)
  233. return;
  234. if ((++counter % progress) == 0)
  235. printf("progress %d objects\n", counter);
  236. }
  237. /*
  238. * Ideally we would want some transformation of the blob data here
  239. * that is unreversible, but would still be the same size and have
  240. * the same data relationship to other blobs (so that we get the same
  241. * delta and packing behavior as the original). But the first and last
  242. * requirements there are probably mutually exclusive, so let's take
  243. * the easy way out for now, and just generate arbitrary content.
  244. *
  245. * There's no need to cache this result with anonymize_mem, since
  246. * we already handle blob content caching with marks.
  247. */
  248. static char *anonymize_blob(unsigned long *size)
  249. {
  250. static int counter;
  251. struct strbuf out = STRBUF_INIT;
  252. strbuf_addf(&out, "anonymous blob %d", counter++);
  253. *size = out.len;
  254. return strbuf_detach(&out, NULL);
  255. }
  256. static void export_blob(const struct object_id *oid)
  257. {
  258. unsigned long size;
  259. enum object_type type;
  260. char *buf;
  261. struct object *object;
  262. int eaten;
  263. if (no_data)
  264. return;
  265. if (is_null_oid(oid))
  266. return;
  267. object = lookup_object(the_repository, oid);
  268. if (object && object->flags & SHOWN)
  269. return;
  270. if (anonymize) {
  271. buf = anonymize_blob(&size);
  272. object = (struct object *)lookup_blob(the_repository, oid);
  273. eaten = 0;
  274. } else {
  275. buf = read_object_file(oid, &type, &size);
  276. if (!buf)
  277. die("could not read blob %s", oid_to_hex(oid));
  278. if (check_object_signature(the_repository, oid, buf, size,
  279. type_name(type)) < 0)
  280. die("oid mismatch in blob %s", oid_to_hex(oid));
  281. object = parse_object_buffer(the_repository, oid, type,
  282. size, buf, &eaten);
  283. }
  284. if (!object)
  285. die("Could not read blob %s", oid_to_hex(oid));
  286. mark_next_object(object);
  287. printf("blob\nmark :%"PRIu32"\n", last_idnum);
  288. if (show_original_ids)
  289. printf("original-oid %s\n", oid_to_hex(oid));
  290. printf("data %"PRIuMAX"\n", (uintmax_t)size);
  291. if (size && fwrite(buf, size, 1, stdout) != 1)
  292. die_errno("could not write blob '%s'", oid_to_hex(oid));
  293. printf("\n");
  294. show_progress();
  295. object->flags |= SHOWN;
  296. if (!eaten)
  297. free(buf);
  298. }
  299. static int depth_first(const void *a_, const void *b_)
  300. {
  301. const struct diff_filepair *a = *((const struct diff_filepair **)a_);
  302. const struct diff_filepair *b = *((const struct diff_filepair **)b_);
  303. const char *name_a, *name_b;
  304. int len_a, len_b, len;
  305. int cmp;
  306. name_a = a->one ? a->one->path : a->two->path;
  307. name_b = b->one ? b->one->path : b->two->path;
  308. len_a = strlen(name_a);
  309. len_b = strlen(name_b);
  310. len = (len_a < len_b) ? len_a : len_b;
  311. /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
  312. cmp = memcmp(name_a, name_b, len);
  313. if (cmp)
  314. return cmp;
  315. cmp = len_b - len_a;
  316. if (cmp)
  317. return cmp;
  318. /*
  319. * Move 'R'ename entries last so that all references of the file
  320. * appear in the output before it is renamed (e.g., when a file
  321. * was copied and renamed in the same commit).
  322. */
  323. return (a->status == 'R') - (b->status == 'R');
  324. }
  325. static void print_path_1(const char *path)
  326. {
  327. int need_quote = quote_c_style(path, NULL, NULL, 0);
  328. if (need_quote)
  329. quote_c_style(path, NULL, stdout, 0);
  330. else if (strchr(path, ' '))
  331. printf("\"%s\"", path);
  332. else
  333. printf("%s", path);
  334. }
  335. static char *anonymize_path_component(void *data)
  336. {
  337. static int counter;
  338. struct strbuf out = STRBUF_INIT;
  339. strbuf_addf(&out, "path%d", counter++);
  340. return strbuf_detach(&out, NULL);
  341. }
  342. static void print_path(const char *path)
  343. {
  344. if (!anonymize)
  345. print_path_1(path);
  346. else {
  347. static struct hashmap paths;
  348. static struct strbuf anon = STRBUF_INIT;
  349. anonymize_path(&anon, path, &paths, anonymize_path_component);
  350. print_path_1(anon.buf);
  351. strbuf_reset(&anon);
  352. }
  353. }
  354. static char *generate_fake_oid(void *data)
  355. {
  356. static uint32_t counter = 1; /* avoid null oid */
  357. const unsigned hashsz = the_hash_algo->rawsz;
  358. struct object_id oid;
  359. char *hex = xmallocz(GIT_MAX_HEXSZ);
  360. oidclr(&oid);
  361. put_be32(oid.hash + hashsz - 4, counter++);
  362. return oid_to_hex_r(hex, &oid);
  363. }
  364. static const char *anonymize_oid(const char *oid_hex)
  365. {
  366. static struct hashmap objs;
  367. size_t len = strlen(oid_hex);
  368. return anonymize_str(&objs, generate_fake_oid, oid_hex, len, NULL);
  369. }
  370. static void show_filemodify(struct diff_queue_struct *q,
  371. struct diff_options *options, void *data)
  372. {
  373. int i;
  374. struct string_list *changed = data;
  375. /*
  376. * Handle files below a directory first, in case they are all deleted
  377. * and the directory changes to a file or symlink.
  378. */
  379. QSORT(q->queue, q->nr, depth_first);
  380. for (i = 0; i < q->nr; i++) {
  381. struct diff_filespec *ospec = q->queue[i]->one;
  382. struct diff_filespec *spec = q->queue[i]->two;
  383. switch (q->queue[i]->status) {
  384. case DIFF_STATUS_DELETED:
  385. printf("D ");
  386. print_path(spec->path);
  387. string_list_insert(changed, spec->path);
  388. putchar('\n');
  389. break;
  390. case DIFF_STATUS_COPIED:
  391. case DIFF_STATUS_RENAMED:
  392. /*
  393. * If a change in the file corresponding to ospec->path
  394. * has been observed, we cannot trust its contents
  395. * because the diff is calculated based on the prior
  396. * contents, not the current contents. So, declare a
  397. * copy or rename only if there was no change observed.
  398. */
  399. if (!string_list_has_string(changed, ospec->path)) {
  400. printf("%c ", q->queue[i]->status);
  401. print_path(ospec->path);
  402. putchar(' ');
  403. print_path(spec->path);
  404. string_list_insert(changed, spec->path);
  405. putchar('\n');
  406. if (oideq(&ospec->oid, &spec->oid) &&
  407. ospec->mode == spec->mode)
  408. break;
  409. }
  410. /* fallthrough */
  411. case DIFF_STATUS_TYPE_CHANGED:
  412. case DIFF_STATUS_MODIFIED:
  413. case DIFF_STATUS_ADDED:
  414. /*
  415. * Links refer to objects in another repositories;
  416. * output the SHA-1 verbatim.
  417. */
  418. if (no_data || S_ISGITLINK(spec->mode))
  419. printf("M %06o %s ", spec->mode,
  420. anonymize ?
  421. anonymize_oid(oid_to_hex(&spec->oid)) :
  422. oid_to_hex(&spec->oid));
  423. else {
  424. struct object *object = lookup_object(the_repository,
  425. &spec->oid);
  426. printf("M %06o :%d ", spec->mode,
  427. get_object_mark(object));
  428. }
  429. print_path(spec->path);
  430. string_list_insert(changed, spec->path);
  431. putchar('\n');
  432. break;
  433. default:
  434. die("Unexpected comparison status '%c' for %s, %s",
  435. q->queue[i]->status,
  436. ospec->path ? ospec->path : "none",
  437. spec->path ? spec->path : "none");
  438. }
  439. }
  440. }
  441. static const char *find_encoding(const char *begin, const char *end)
  442. {
  443. const char *needle = "\nencoding ";
  444. char *bol, *eol;
  445. bol = memmem(begin, end ? end - begin : strlen(begin),
  446. needle, strlen(needle));
  447. if (!bol)
  448. return NULL;
  449. bol += strlen(needle);
  450. eol = strchrnul(bol, '\n');
  451. *eol = '\0';
  452. return bol;
  453. }
  454. static char *anonymize_ref_component(void *data)
  455. {
  456. static int counter;
  457. struct strbuf out = STRBUF_INIT;
  458. strbuf_addf(&out, "ref%d", counter++);
  459. return strbuf_detach(&out, NULL);
  460. }
  461. static const char *anonymize_refname(const char *refname)
  462. {
  463. /*
  464. * If any of these prefixes is found, we will leave it intact
  465. * so that tags remain tags and so forth.
  466. */
  467. static const char *prefixes[] = {
  468. "refs/heads/",
  469. "refs/tags/",
  470. "refs/remotes/",
  471. "refs/"
  472. };
  473. static struct hashmap refs;
  474. static struct strbuf anon = STRBUF_INIT;
  475. int i;
  476. strbuf_reset(&anon);
  477. for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
  478. if (skip_prefix(refname, prefixes[i], &refname)) {
  479. strbuf_addstr(&anon, prefixes[i]);
  480. break;
  481. }
  482. }
  483. anonymize_path(&anon, refname, &refs, anonymize_ref_component);
  484. return anon.buf;
  485. }
  486. /*
  487. * We do not even bother to cache commit messages, as they are unlikely
  488. * to be repeated verbatim, and it is not that interesting when they are.
  489. */
  490. static char *anonymize_commit_message(const char *old)
  491. {
  492. static int counter;
  493. return xstrfmt("subject %d\n\nbody\n", counter++);
  494. }
  495. static char *anonymize_ident(void *data)
  496. {
  497. static int counter;
  498. struct strbuf out = STRBUF_INIT;
  499. strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
  500. counter++;
  501. return strbuf_detach(&out, NULL);
  502. }
  503. /*
  504. * Our strategy here is to anonymize the names and email addresses,
  505. * but keep timestamps intact, as they influence things like traversal
  506. * order (and by themselves should not be too revealing).
  507. */
  508. static void anonymize_ident_line(const char **beg, const char **end)
  509. {
  510. static struct hashmap idents;
  511. static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
  512. static unsigned which_buffer;
  513. struct strbuf *out;
  514. struct ident_split split;
  515. const char *end_of_header;
  516. out = &buffers[which_buffer++];
  517. which_buffer %= ARRAY_SIZE(buffers);
  518. strbuf_reset(out);
  519. /* skip "committer", "author", "tagger", etc */
  520. end_of_header = strchr(*beg, ' ');
  521. if (!end_of_header)
  522. BUG("malformed line fed to anonymize_ident_line: %.*s",
  523. (int)(*end - *beg), *beg);
  524. end_of_header++;
  525. strbuf_add(out, *beg, end_of_header - *beg);
  526. if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
  527. split.date_begin) {
  528. const char *ident;
  529. size_t len;
  530. len = split.mail_end - split.name_begin;
  531. ident = anonymize_str(&idents, anonymize_ident,
  532. split.name_begin, len, NULL);
  533. strbuf_addstr(out, ident);
  534. strbuf_addch(out, ' ');
  535. strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
  536. } else {
  537. strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
  538. }
  539. *beg = out->buf;
  540. *end = out->buf + out->len;
  541. }
  542. static void handle_commit(struct commit *commit, struct rev_info *rev,
  543. struct string_list *paths_of_changed_objects)
  544. {
  545. int saved_output_format = rev->diffopt.output_format;
  546. const char *commit_buffer;
  547. const char *author, *author_end, *committer, *committer_end;
  548. const char *encoding, *message;
  549. char *reencoded = NULL;
  550. struct commit_list *p;
  551. const char *refname;
  552. int i;
  553. rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
  554. parse_commit_or_die(commit);
  555. commit_buffer = get_commit_buffer(commit, NULL);
  556. author = strstr(commit_buffer, "\nauthor ");
  557. if (!author)
  558. die("could not find author in commit %s",
  559. oid_to_hex(&commit->object.oid));
  560. author++;
  561. author_end = strchrnul(author, '\n');
  562. committer = strstr(author_end, "\ncommitter ");
  563. if (!committer)
  564. die("could not find committer in commit %s",
  565. oid_to_hex(&commit->object.oid));
  566. committer++;
  567. committer_end = strchrnul(committer, '\n');
  568. message = strstr(committer_end, "\n\n");
  569. encoding = find_encoding(committer_end, message);
  570. if (message)
  571. message += 2;
  572. if (commit->parents &&
  573. (get_object_mark(&commit->parents->item->object) != 0 ||
  574. reference_excluded_commits) &&
  575. !full_tree) {
  576. parse_commit_or_die(commit->parents->item);
  577. diff_tree_oid(get_commit_tree_oid(commit->parents->item),
  578. get_commit_tree_oid(commit), "", &rev->diffopt);
  579. }
  580. else
  581. diff_root_tree_oid(get_commit_tree_oid(commit),
  582. "", &rev->diffopt);
  583. /* Export the referenced blobs, and remember the marks. */
  584. for (i = 0; i < diff_queued_diff.nr; i++)
  585. if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
  586. export_blob(&diff_queued_diff.queue[i]->two->oid);
  587. refname = *revision_sources_at(&revision_sources, commit);
  588. /*
  589. * FIXME: string_list_remove() below for each ref is overall
  590. * O(N^2). Compared to a history walk and diffing trees, this is
  591. * just lost in the noise in practice. However, theoretically a
  592. * repo may have enough refs for this to become slow.
  593. */
  594. string_list_remove(&extra_refs, refname, 0);
  595. if (anonymize) {
  596. refname = anonymize_refname(refname);
  597. anonymize_ident_line(&committer, &committer_end);
  598. anonymize_ident_line(&author, &author_end);
  599. }
  600. mark_next_object(&commit->object);
  601. if (anonymize) {
  602. reencoded = anonymize_commit_message(message);
  603. } else if (encoding) {
  604. switch(reencode_mode) {
  605. case REENCODE_YES:
  606. reencoded = reencode_string(message, "UTF-8", encoding);
  607. break;
  608. case REENCODE_NO:
  609. break;
  610. case REENCODE_ABORT:
  611. die("Encountered commit-specific encoding %s in commit "
  612. "%s; use --reencode=[yes|no] to handle it",
  613. encoding, oid_to_hex(&commit->object.oid));
  614. }
  615. }
  616. if (!commit->parents)
  617. printf("reset %s\n", refname);
  618. printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
  619. if (show_original_ids)
  620. printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
  621. printf("%.*s\n%.*s\n",
  622. (int)(author_end - author), author,
  623. (int)(committer_end - committer), committer);
  624. if (!reencoded && encoding)
  625. printf("encoding %s\n", encoding);
  626. printf("data %u\n%s",
  627. (unsigned)(reencoded
  628. ? strlen(reencoded) : message
  629. ? strlen(message) : 0),
  630. reencoded ? reencoded : message ? message : "");
  631. free(reencoded);
  632. unuse_commit_buffer(commit, commit_buffer);
  633. for (i = 0, p = commit->parents; p; p = p->next) {
  634. struct object *obj = &p->item->object;
  635. int mark = get_object_mark(obj);
  636. if (!mark && !reference_excluded_commits)
  637. continue;
  638. if (i == 0)
  639. printf("from ");
  640. else
  641. printf("merge ");
  642. if (mark)
  643. printf(":%d\n", mark);
  644. else
  645. printf("%s\n",
  646. anonymize ?
  647. anonymize_oid(oid_to_hex(&obj->oid)) :
  648. oid_to_hex(&obj->oid));
  649. i++;
  650. }
  651. if (full_tree)
  652. printf("deleteall\n");
  653. log_tree_diff_flush(rev);
  654. string_list_clear(paths_of_changed_objects, 0);
  655. rev->diffopt.output_format = saved_output_format;
  656. printf("\n");
  657. show_progress();
  658. }
  659. static char *anonymize_tag(void *data)
  660. {
  661. static int counter;
  662. struct strbuf out = STRBUF_INIT;
  663. strbuf_addf(&out, "tag message %d", counter++);
  664. return strbuf_detach(&out, NULL);
  665. }
  666. static void handle_tail(struct object_array *commits, struct rev_info *revs,
  667. struct string_list *paths_of_changed_objects)
  668. {
  669. struct commit *commit;
  670. while (commits->nr) {
  671. commit = (struct commit *)object_array_pop(commits);
  672. if (has_unshown_parent(commit)) {
  673. /* Queue again, to be handled later */
  674. add_object_array(&commit->object, NULL, commits);
  675. return;
  676. }
  677. handle_commit(commit, revs, paths_of_changed_objects);
  678. }
  679. }
  680. static void handle_tag(const char *name, struct tag *tag)
  681. {
  682. unsigned long size;
  683. enum object_type type;
  684. char *buf;
  685. const char *tagger, *tagger_end, *message;
  686. size_t message_size = 0;
  687. struct object *tagged;
  688. int tagged_mark;
  689. struct commit *p;
  690. /* Trees have no identifier in fast-export output, thus we have no way
  691. * to output tags of trees, tags of tags of trees, etc. Simply omit
  692. * such tags.
  693. */
  694. tagged = tag->tagged;
  695. while (tagged->type == OBJ_TAG) {
  696. tagged = ((struct tag *)tagged)->tagged;
  697. }
  698. if (tagged->type == OBJ_TREE) {
  699. warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
  700. oid_to_hex(&tag->object.oid));
  701. return;
  702. }
  703. buf = read_object_file(&tag->object.oid, &type, &size);
  704. if (!buf)
  705. die("could not read tag %s", oid_to_hex(&tag->object.oid));
  706. message = memmem(buf, size, "\n\n", 2);
  707. if (message) {
  708. message += 2;
  709. message_size = strlen(message);
  710. }
  711. tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
  712. if (!tagger) {
  713. if (fake_missing_tagger)
  714. tagger = "tagger Unspecified Tagger "
  715. "<unspecified-tagger> 0 +0000";
  716. else
  717. tagger = "";
  718. tagger_end = tagger + strlen(tagger);
  719. } else {
  720. tagger++;
  721. tagger_end = strchrnul(tagger, '\n');
  722. if (anonymize)
  723. anonymize_ident_line(&tagger, &tagger_end);
  724. }
  725. if (anonymize) {
  726. name = anonymize_refname(name);
  727. if (message) {
  728. static struct hashmap tags;
  729. message = anonymize_str(&tags, anonymize_tag,
  730. message, message_size, NULL);
  731. }
  732. }
  733. /* handle signed tags */
  734. if (message) {
  735. const char *signature = strstr(message,
  736. "\n-----BEGIN PGP SIGNATURE-----\n");
  737. if (signature)
  738. switch(signed_tag_mode) {
  739. case SIGNED_TAG_ABORT:
  740. die("encountered signed tag %s; use "
  741. "--signed-tags=<mode> to handle it",
  742. oid_to_hex(&tag->object.oid));
  743. case WARN:
  744. warning("exporting signed tag %s",
  745. oid_to_hex(&tag->object.oid));
  746. /* fallthru */
  747. case VERBATIM:
  748. break;
  749. case WARN_STRIP:
  750. warning("stripping signature from tag %s",
  751. oid_to_hex(&tag->object.oid));
  752. /* fallthru */
  753. case STRIP:
  754. message_size = signature + 1 - message;
  755. break;
  756. }
  757. }
  758. /* handle tag->tagged having been filtered out due to paths specified */
  759. tagged = tag->tagged;
  760. tagged_mark = get_object_mark(tagged);
  761. if (!tagged_mark) {
  762. switch(tag_of_filtered_mode) {
  763. case TAG_FILTERING_ABORT:
  764. die("tag %s tags unexported object; use "
  765. "--tag-of-filtered-object=<mode> to handle it",
  766. oid_to_hex(&tag->object.oid));
  767. case DROP:
  768. /* Ignore this tag altogether */
  769. free(buf);
  770. return;
  771. case REWRITE:
  772. if (tagged->type == OBJ_TAG && !mark_tags) {
  773. die(_("Error: Cannot export nested tags unless --mark-tags is specified."));
  774. } else if (tagged->type == OBJ_COMMIT) {
  775. p = rewrite_commit((struct commit *)tagged);
  776. if (!p) {
  777. printf("reset %s\nfrom %s\n\n",
  778. name, oid_to_hex(&null_oid));
  779. free(buf);
  780. return;
  781. }
  782. tagged_mark = get_object_mark(&p->object);
  783. } else {
  784. /* tagged->type is either OBJ_BLOB or OBJ_TAG */
  785. tagged_mark = get_object_mark(tagged);
  786. }
  787. }
  788. }
  789. if (tagged->type == OBJ_TAG) {
  790. printf("reset %s\nfrom %s\n\n",
  791. name, oid_to_hex(&null_oid));
  792. }
  793. skip_prefix(name, "refs/tags/", &name);
  794. printf("tag %s\n", name);
  795. if (mark_tags) {
  796. mark_next_object(&tag->object);
  797. printf("mark :%"PRIu32"\n", last_idnum);
  798. }
  799. if (tagged_mark)
  800. printf("from :%d\n", tagged_mark);
  801. else
  802. printf("from %s\n", oid_to_hex(&tagged->oid));
  803. if (show_original_ids)
  804. printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
  805. printf("%.*s%sdata %d\n%.*s\n",
  806. (int)(tagger_end - tagger), tagger,
  807. tagger == tagger_end ? "" : "\n",
  808. (int)message_size, (int)message_size, message ? message : "");
  809. free(buf);
  810. }
  811. static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
  812. {
  813. switch (e->item->type) {
  814. case OBJ_COMMIT:
  815. return (struct commit *)e->item;
  816. case OBJ_TAG: {
  817. struct tag *tag = (struct tag *)e->item;
  818. /* handle nested tags */
  819. while (tag && tag->object.type == OBJ_TAG) {
  820. parse_object(the_repository, &tag->object.oid);
  821. string_list_append(&tag_refs, full_name)->util = tag;
  822. tag = (struct tag *)tag->tagged;
  823. }
  824. if (!tag)
  825. die("Tag %s points nowhere?", e->name);
  826. return (struct commit *)tag;
  827. break;
  828. }
  829. default:
  830. return NULL;
  831. }
  832. }
  833. static void get_tags_and_duplicates(struct rev_cmdline_info *info)
  834. {
  835. int i;
  836. for (i = 0; i < info->nr; i++) {
  837. struct rev_cmdline_entry *e = info->rev + i;
  838. struct object_id oid;
  839. struct commit *commit;
  840. char *full_name;
  841. if (e->flags & UNINTERESTING)
  842. continue;
  843. if (dwim_ref(e->name, strlen(e->name), &oid, &full_name, 0) != 1)
  844. continue;
  845. if (refspecs.nr) {
  846. char *private;
  847. private = apply_refspecs(&refspecs, full_name);
  848. if (private) {
  849. free(full_name);
  850. full_name = private;
  851. }
  852. }
  853. commit = get_commit(e, full_name);
  854. if (!commit) {
  855. warning("%s: Unexpected object of type %s, skipping.",
  856. e->name,
  857. type_name(e->item->type));
  858. continue;
  859. }
  860. switch(commit->object.type) {
  861. case OBJ_COMMIT:
  862. break;
  863. case OBJ_BLOB:
  864. export_blob(&commit->object.oid);
  865. continue;
  866. default: /* OBJ_TAG (nested tags) is already handled */
  867. warning("Tag points to object of unexpected type %s, skipping.",
  868. type_name(commit->object.type));
  869. continue;
  870. }
  871. /*
  872. * Make sure this ref gets properly updated eventually, whether
  873. * through a commit or manually at the end.
  874. */
  875. if (e->item->type != OBJ_TAG)
  876. string_list_append(&extra_refs, full_name)->util = commit;
  877. if (!*revision_sources_at(&revision_sources, commit))
  878. *revision_sources_at(&revision_sources, commit) = full_name;
  879. }
  880. string_list_sort(&extra_refs);
  881. string_list_remove_duplicates(&extra_refs, 0);
  882. }
  883. static void handle_tags_and_duplicates(struct string_list *extras)
  884. {
  885. struct commit *commit;
  886. int i;
  887. for (i = extras->nr - 1; i >= 0; i--) {
  888. const char *name = extras->items[i].string;
  889. struct object *object = extras->items[i].util;
  890. int mark;
  891. switch (object->type) {
  892. case OBJ_TAG:
  893. handle_tag(name, (struct tag *)object);
  894. break;
  895. case OBJ_COMMIT:
  896. if (anonymize)
  897. name = anonymize_refname(name);
  898. /* create refs pointing to already seen commits */
  899. commit = rewrite_commit((struct commit *)object);
  900. if (!commit) {
  901. /*
  902. * Neither this object nor any of its
  903. * ancestors touch any relevant paths, so
  904. * it has been filtered to nothing. Delete
  905. * it.
  906. */
  907. printf("reset %s\nfrom %s\n\n",
  908. name, oid_to_hex(&null_oid));
  909. continue;
  910. }
  911. mark = get_object_mark(&commit->object);
  912. if (!mark) {
  913. /*
  914. * Getting here means we have a commit which
  915. * was excluded by a negative refspec (e.g.
  916. * fast-export ^HEAD HEAD). If we are
  917. * referencing excluded commits, set the ref
  918. * to the exact commit. Otherwise, the user
  919. * wants the branch exported but every commit
  920. * in its history to be deleted, which basically
  921. * just means deletion of the ref.
  922. */
  923. if (!reference_excluded_commits) {
  924. /* delete the ref */
  925. printf("reset %s\nfrom %s\n\n",
  926. name, oid_to_hex(&null_oid));
  927. continue;
  928. }
  929. /* set ref to commit using oid, not mark */
  930. printf("reset %s\nfrom %s\n\n", name,
  931. oid_to_hex(&commit->object.oid));
  932. continue;
  933. }
  934. printf("reset %s\nfrom :%d\n\n", name, mark
  935. );
  936. show_progress();
  937. break;
  938. }
  939. }
  940. }
  941. static void export_marks(char *file)
  942. {
  943. unsigned int i;
  944. uint32_t mark;
  945. struct decoration_entry *deco = idnums.entries;
  946. FILE *f;
  947. int e = 0;
  948. f = fopen_for_writing(file);
  949. if (!f)
  950. die_errno("Unable to open marks file %s for writing.", file);
  951. for (i = 0; i < idnums.size; i++) {
  952. if (deco->base && deco->base->type == 1) {
  953. mark = ptr_to_mark(deco->decoration);
  954. if (fprintf(f, ":%"PRIu32" %s\n", mark,
  955. oid_to_hex(&deco->base->oid)) < 0) {
  956. e = 1;
  957. break;
  958. }
  959. }
  960. deco++;
  961. }
  962. e |= ferror(f);
  963. e |= fclose(f);
  964. if (e)
  965. error("Unable to write marks file %s.", file);
  966. }
  967. static void import_marks(char *input_file, int check_exists)
  968. {
  969. char line[512];
  970. FILE *f;
  971. struct stat sb;
  972. if (check_exists && stat(input_file, &sb))
  973. return;
  974. f = xfopen(input_file, "r");
  975. while (fgets(line, sizeof(line), f)) {
  976. uint32_t mark;
  977. char *line_end, *mark_end;
  978. struct object_id oid;
  979. struct object *object;
  980. struct commit *commit;
  981. enum object_type type;
  982. line_end = strchr(line, '\n');
  983. if (line[0] != ':' || !line_end)
  984. die("corrupt mark line: %s", line);
  985. *line_end = '\0';
  986. mark = strtoumax(line + 1, &mark_end, 10);
  987. if (!mark || mark_end == line + 1
  988. || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
  989. die("corrupt mark line: %s", line);
  990. if (last_idnum < mark)
  991. last_idnum = mark;
  992. type = oid_object_info(the_repository, &oid, NULL);
  993. if (type < 0)
  994. die("object not found: %s", oid_to_hex(&oid));
  995. if (type != OBJ_COMMIT)
  996. /* only commits */
  997. continue;
  998. commit = lookup_commit(the_repository, &oid);
  999. if (!commit)
  1000. die("not a commit? can't happen: %s", oid_to_hex(&oid));
  1001. object = &commit->object;
  1002. if (object->flags & SHOWN)
  1003. error("Object %s already has a mark", oid_to_hex(&oid));
  1004. mark_object(object, mark);
  1005. object->flags |= SHOWN;
  1006. }
  1007. fclose(f);
  1008. }
  1009. static void handle_deletes(void)
  1010. {
  1011. int i;
  1012. for (i = 0; i < refspecs.nr; i++) {
  1013. struct refspec_item *refspec = &refspecs.items[i];
  1014. if (*refspec->src)
  1015. continue;
  1016. printf("reset %s\nfrom %s\n\n",
  1017. refspec->dst, oid_to_hex(&null_oid));
  1018. }
  1019. }
  1020. static char *anonymize_seed(void *data)
  1021. {
  1022. return xstrdup(data);
  1023. }
  1024. static int parse_opt_anonymize_map(const struct option *opt,
  1025. const char *arg, int unset)
  1026. {
  1027. struct hashmap *map = opt->value;
  1028. const char *delim, *value;
  1029. size_t keylen;
  1030. BUG_ON_OPT_NEG(unset);
  1031. delim = strchr(arg, ':');
  1032. if (delim) {
  1033. keylen = delim - arg;
  1034. value = delim + 1;
  1035. } else {
  1036. keylen = strlen(arg);
  1037. value = arg;
  1038. }
  1039. if (!keylen || !*value)
  1040. return error(_("--anonymize-map token cannot be empty"));
  1041. anonymize_str(map, anonymize_seed, arg, keylen, (void *)value);
  1042. return 0;
  1043. }
  1044. int cmd_fast_export(int argc, const char **argv, const char *prefix)
  1045. {
  1046. struct rev_info revs;
  1047. struct object_array commits = OBJECT_ARRAY_INIT;
  1048. struct commit *commit;
  1049. char *export_filename = NULL,
  1050. *import_filename = NULL,
  1051. *import_filename_if_exists = NULL;
  1052. uint32_t lastimportid;
  1053. struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
  1054. struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
  1055. struct option options[] = {
  1056. OPT_INTEGER(0, "progress", &progress,
  1057. N_("show progress after <n> objects")),
  1058. OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
  1059. N_("select handling of signed tags"),
  1060. parse_opt_signed_tag_mode),
  1061. OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
  1062. N_("select handling of tags that tag filtered objects"),
  1063. parse_opt_tag_of_filtered_mode),
  1064. OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
  1065. N_("select handling of commit messages in an alternate encoding"),
  1066. parse_opt_reencode_mode),
  1067. OPT_STRING(0, "export-marks", &export_filename, N_("file"),
  1068. N_("Dump marks to this file")),
  1069. OPT_STRING(0, "import-marks", &import_filename, N_("file"),
  1070. N_("Import marks from this file")),
  1071. OPT_STRING(0, "import-marks-if-exists",
  1072. &import_filename_if_exists,
  1073. N_("file"),
  1074. N_("Import marks from this file if it exists")),
  1075. OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
  1076. N_("Fake a tagger when tags lack one")),
  1077. OPT_BOOL(0, "full-tree", &full_tree,
  1078. N_("Output full tree for each commit")),
  1079. OPT_BOOL(0, "use-done-feature", &use_done_feature,
  1080. N_("Use the done feature to terminate the stream")),
  1081. OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
  1082. OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
  1083. N_("Apply refspec to exported refs")),
  1084. OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
  1085. OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
  1086. N_("convert <from> to <to> in anonymized output"),
  1087. PARSE_OPT_NONEG, parse_opt_anonymize_map),
  1088. OPT_BOOL(0, "reference-excluded-parents",
  1089. &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")),
  1090. OPT_BOOL(0, "show-original-ids", &show_original_ids,
  1091. N_("Show original object ids of blobs/commits")),
  1092. OPT_BOOL(0, "mark-tags", &mark_tags,
  1093. N_("Label tags with mark ids")),
  1094. OPT_END()
  1095. };
  1096. if (argc == 1)
  1097. usage_with_options (fast_export_usage, options);
  1098. /* we handle encodings */
  1099. git_config(git_default_config, NULL);
  1100. repo_init_revisions(the_repository, &revs, prefix);
  1101. init_revision_sources(&revision_sources);
  1102. revs.topo_order = 1;
  1103. revs.sources = &revision_sources;
  1104. revs.rewrite_parents = 1;
  1105. argc = parse_options(argc, argv, prefix, options, fast_export_usage,
  1106. PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
  1107. argc = setup_revisions(argc, argv, &revs, NULL);
  1108. if (argc > 1)
  1109. usage_with_options (fast_export_usage, options);
  1110. if (anonymized_seeds.cmpfn && !anonymize)
  1111. die(_("--anonymize-map without --anonymize does not make sense"));
  1112. if (refspecs_list.nr) {
  1113. int i;
  1114. for (i = 0; i < refspecs_list.nr; i++)
  1115. refspec_append(&refspecs, refspecs_list.items[i].string);
  1116. string_list_clear(&refspecs_list, 1);
  1117. }
  1118. if (use_done_feature)
  1119. printf("feature done\n");
  1120. if (import_filename && import_filename_if_exists)
  1121. die(_("Cannot pass both --import-marks and --import-marks-if-exists"));
  1122. if (import_filename)
  1123. import_marks(import_filename, 0);
  1124. else if (import_filename_if_exists)
  1125. import_marks(import_filename_if_exists, 1);
  1126. lastimportid = last_idnum;
  1127. if (import_filename && revs.prune_data.nr)
  1128. full_tree = 1;
  1129. get_tags_and_duplicates(&revs.cmdline);
  1130. if (prepare_revision_walk(&revs))
  1131. die("revision walk setup failed");
  1132. revs.diffopt.format_callback = show_filemodify;
  1133. revs.diffopt.format_callback_data = &paths_of_changed_objects;
  1134. revs.diffopt.flags.recursive = 1;
  1135. while ((commit = get_revision(&revs))) {
  1136. if (has_unshown_parent(commit)) {
  1137. add_object_array(&commit->object, NULL, &commits);
  1138. }
  1139. else {
  1140. handle_commit(commit, &revs, &paths_of_changed_objects);
  1141. handle_tail(&commits, &revs, &paths_of_changed_objects);
  1142. }
  1143. }
  1144. handle_tags_and_duplicates(&extra_refs);
  1145. handle_tags_and_duplicates(&tag_refs);
  1146. handle_deletes();
  1147. if (export_filename && lastimportid != last_idnum)
  1148. export_marks(export_filename);
  1149. if (use_done_feature)
  1150. printf("done\n");
  1151. refspec_clear(&refspecs);
  1152. return 0;
  1153. }