git-cvsexportcommit.perl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. #!/usr/bin/perl
  2. use 5.008;
  3. use strict;
  4. use warnings;
  5. use Getopt::Std;
  6. use File::Temp qw(tempdir);
  7. use Data::Dumper;
  8. use File::Basename qw(basename dirname);
  9. use File::Spec;
  10. use Git;
  11. our ($opt_h, $opt_P, $opt_p, $opt_v, $opt_c, $opt_f, $opt_a, $opt_m, $opt_d, $opt_u, $opt_w, $opt_W, $opt_k);
  12. getopts('uhPpvcfkam:d:w:W');
  13. $opt_h && usage();
  14. die "Need at least one commit identifier!" unless @ARGV;
  15. # Get git-config settings
  16. my $repo = Git->repository();
  17. $opt_w = $repo->config('cvsexportcommit.cvsdir') unless defined $opt_w;
  18. my $tmpdir = File::Temp::tempdir(CLEANUP => 1);
  19. my $hash_algo = $repo->config('extensions.objectformat') || 'sha1';
  20. my $hexsz = $hash_algo eq 'sha256' ? 64 : 40;
  21. if ($opt_w || $opt_W) {
  22. # Remember where GIT_DIR is before changing to CVS checkout
  23. unless ($ENV{GIT_DIR}) {
  24. # No GIT_DIR set. Figure it out for ourselves
  25. my $gd =`git rev-parse --git-dir`;
  26. chomp($gd);
  27. $ENV{GIT_DIR} = $gd;
  28. }
  29. # On MSYS, convert a Windows-style path to an MSYS-style path
  30. # so that rel2abs() below works correctly.
  31. if ($^O eq 'msys') {
  32. $ENV{GIT_DIR} =~ s#^([[:alpha:]]):/#/$1/#;
  33. }
  34. # Make sure GIT_DIR is absolute
  35. $ENV{GIT_DIR} = File::Spec->rel2abs($ENV{GIT_DIR});
  36. }
  37. if ($opt_w) {
  38. if (! -d $opt_w."/CVS" ) {
  39. die "$opt_w is not a CVS checkout";
  40. }
  41. chdir $opt_w or die "Cannot change to CVS checkout at $opt_w";
  42. }
  43. unless ($ENV{GIT_DIR} && -r $ENV{GIT_DIR}){
  44. die "GIT_DIR is not defined or is unreadable";
  45. }
  46. my @cvs;
  47. if ($opt_d) {
  48. @cvs = ('cvs', '-d', $opt_d);
  49. } else {
  50. @cvs = ('cvs');
  51. }
  52. # resolve target commit
  53. my $commit;
  54. $commit = pop @ARGV;
  55. $commit = safe_pipe_capture('git', 'rev-parse', '--verify', "$commit^0");
  56. chomp $commit;
  57. if ($?) {
  58. die "The commit reference $commit did not resolve!";
  59. }
  60. # resolve what parent we want
  61. my $parent;
  62. if (@ARGV) {
  63. $parent = pop @ARGV;
  64. $parent = safe_pipe_capture('git', 'rev-parse', '--verify', "$parent^0");
  65. chomp $parent;
  66. if ($?) {
  67. die "The parent reference did not resolve!";
  68. }
  69. }
  70. # find parents from the commit itself
  71. my @commit = safe_pipe_capture('git', 'cat-file', 'commit', $commit);
  72. my @parents;
  73. my $committer;
  74. my $author;
  75. my $stage = 'headers'; # headers, msg
  76. my $title;
  77. my $msg = '';
  78. foreach my $line (@commit) {
  79. chomp $line;
  80. if ($stage eq 'headers' && $line eq '') {
  81. $stage = 'msg';
  82. next;
  83. }
  84. if ($stage eq 'headers') {
  85. if ($line =~ m/^parent ([0-9a-f]{$hexsz})$/) { # found a parent
  86. push @parents, $1;
  87. } elsif ($line =~ m/^author (.+) \d+ [-+]\d+$/) {
  88. $author = $1;
  89. } elsif ($line =~ m/^committer (.+) \d+ [-+]\d+$/) {
  90. $committer = $1;
  91. }
  92. } else {
  93. $msg .= $line . "\n";
  94. unless ($title) {
  95. $title = $line;
  96. }
  97. }
  98. }
  99. my $noparent = "0" x $hexsz;
  100. if ($parent) {
  101. my $found;
  102. # double check that it's a valid parent
  103. foreach my $p (@parents) {
  104. if ($p eq $parent) {
  105. $found = 1;
  106. last;
  107. }; # found it
  108. }
  109. die "Did not find $parent in the parents for this commit!" if !$found and !$opt_P;
  110. } else { # we don't have a parent from the cmdline...
  111. if (@parents == 1) { # it's safe to get it from the commit
  112. $parent = $parents[0];
  113. } elsif (@parents == 0) { # there is no parent
  114. $parent = $noparent;
  115. } else { # cannot choose automatically from multiple parents
  116. die "This commit has more than one parent -- please name the parent you want to use explicitly";
  117. }
  118. }
  119. my $go_back_to = 0;
  120. if ($opt_W) {
  121. $opt_v && print "Resetting to $parent\n";
  122. $go_back_to = `git symbolic-ref HEAD 2> /dev/null ||
  123. git rev-parse HEAD` || die "Could not determine current branch";
  124. system("git checkout -q $parent^0") && die "Could not check out $parent^0";
  125. }
  126. $opt_v && print "Applying to CVS commit $commit from parent $parent\n";
  127. # grab the commit message
  128. open(MSG, ">.msg") or die "Cannot open .msg for writing";
  129. if ($opt_m) {
  130. print MSG $opt_m;
  131. }
  132. print MSG $msg;
  133. if ($opt_a) {
  134. print MSG "\n\nAuthor: $author\n";
  135. if ($author ne $committer) {
  136. print MSG "Committer: $committer\n";
  137. }
  138. }
  139. close MSG;
  140. if ($parent eq $noparent) {
  141. `git diff-tree --binary -p --root $commit >.cvsexportcommit.diff`;# || die "Cannot diff";
  142. } else {
  143. `git diff-tree --binary -p $parent $commit >.cvsexportcommit.diff`;# || die "Cannot diff";
  144. }
  145. ## apply non-binary changes
  146. # In pedantic mode require all lines of context to match. In normal
  147. # mode, be compatible with diff/patch: assume 3 lines of context and
  148. # require at least one line match, i.e. ignore at most 2 lines of
  149. # context, like diff/patch do by default.
  150. my $context = $opt_p ? '' : '-C1';
  151. print "Checking if patch will apply\n";
  152. my @stat;
  153. open APPLY, "GIT_INDEX_FILE=$tmpdir/index git apply $context --summary --numstat<.cvsexportcommit.diff|" || die "cannot patch";
  154. @stat=<APPLY>;
  155. close APPLY || die "Cannot patch";
  156. my (@bfiles,@files,@afiles,@dfiles);
  157. chomp @stat;
  158. foreach (@stat) {
  159. push (@bfiles,$1) if m/^-\t-\t(.*)$/;
  160. push (@files, $1) if m/^-\t-\t(.*)$/;
  161. push (@files, $1) if m/^\d+\t\d+\t(.*)$/;
  162. push (@afiles,$1) if m/^ create mode [0-7]+ (.*)$/;
  163. push (@dfiles,$1) if m/^ delete mode [0-7]+ (.*)$/;
  164. }
  165. map { s/^"(.*)"$/$1/g } @bfiles,@files;
  166. map { s/\\([0-7]{3})/sprintf('%c',oct $1)/eg } @bfiles,@files;
  167. # check that the files are clean and up to date according to cvs
  168. my $dirty;
  169. my @dirs;
  170. foreach my $p (@afiles) {
  171. my $path = dirname $p;
  172. while (!-d $path and ! grep { $_ eq $path } @dirs) {
  173. unshift @dirs, $path;
  174. $path = dirname $path;
  175. }
  176. }
  177. # ... check dirs,
  178. foreach my $d (@dirs) {
  179. if (-e $d) {
  180. $dirty = 1;
  181. warn "$d exists and is not a directory!\n";
  182. }
  183. }
  184. # ... query status of all files that we have a directory for and parse output of 'cvs status' to %cvsstat.
  185. my @canstatusfiles;
  186. foreach my $f (@files) {
  187. my $path = dirname $f;
  188. next if (grep { $_ eq $path } @dirs);
  189. push @canstatusfiles, $f;
  190. }
  191. my %cvsstat;
  192. if (@canstatusfiles) {
  193. if ($opt_u) {
  194. my @updated = xargs_safe_pipe_capture([@cvs, 'update'], @canstatusfiles);
  195. print @updated;
  196. }
  197. # "cvs status" reorders the parameters, notably when there are multiple
  198. # arguments with the same basename. So be precise here.
  199. my %added = map { $_ => 1 } @afiles;
  200. my %todo = map { $_ => 1 } @canstatusfiles;
  201. while (%todo) {
  202. my @canstatusfiles2 = ();
  203. my %fullname = ();
  204. foreach my $name (keys %todo) {
  205. my $basename = basename($name);
  206. # CVS reports files that don't exist in the current revision as
  207. # "no file $basename" in its "status" output, so we should
  208. # anticipate that. Totally unknown files will have a status
  209. # "Unknown". However, if they exist in the Attic, their status
  210. # will be "Up-to-date" (this means they were added once but have
  211. # been removed).
  212. $basename = "no file $basename" if $added{$basename};
  213. $basename =~ s/^\s+//;
  214. $basename =~ s/\s+$//;
  215. if (!exists($fullname{$basename})) {
  216. $fullname{$basename} = $name;
  217. push (@canstatusfiles2, $name);
  218. delete($todo{$name});
  219. }
  220. }
  221. my @cvsoutput;
  222. @cvsoutput = xargs_safe_pipe_capture([@cvs, 'status'], @canstatusfiles2);
  223. foreach my $l (@cvsoutput) {
  224. chomp $l;
  225. next unless
  226. my ($file, $status) = $l =~ /^File:\s+(.*\S)\s+Status: (.*)$/;
  227. my $fullname = $fullname{$file};
  228. print STDERR "Huh? Status '$status' reported for unexpected file '$file'\n"
  229. unless defined $fullname;
  230. # This response means the file does not exist except in
  231. # CVS's attic, so set the status accordingly
  232. $status = "In-attic"
  233. if $file =~ /^no file /
  234. && $status eq 'Up-to-date';
  235. $cvsstat{$fullname{$file}} = $status
  236. if defined $fullname{$file};
  237. }
  238. }
  239. }
  240. # ... Validate that new files have the correct status
  241. foreach my $f (@afiles) {
  242. next unless defined(my $stat = $cvsstat{$f});
  243. # This means the file has never been seen before
  244. next if $stat eq 'Unknown';
  245. # This means the file has been seen before but was removed
  246. next if $stat eq 'In-attic';
  247. $dirty = 1;
  248. warn "File $f is already known in your CVS checkout -- perhaps it has been added by another user. Or this may indicate that it exists on a different branch. If this is the case, use -f to force the merge.\n";
  249. warn "Status was: $cvsstat{$f}\n";
  250. }
  251. # ... validate known files.
  252. foreach my $f (@files) {
  253. next if grep { $_ eq $f } @afiles;
  254. # TODO:we need to handle removed in cvs
  255. unless (defined ($cvsstat{$f}) and $cvsstat{$f} eq "Up-to-date") {
  256. $dirty = 1;
  257. warn "File $f not up to date but has status '$cvsstat{$f}' in your CVS checkout!\n";
  258. }
  259. # Depending on how your GIT tree got imported from CVS you may
  260. # have a conflict between expanded keywords in your CVS tree and
  261. # unexpanded keywords in the patch about to be applied.
  262. if ($opt_k) {
  263. my $orig_file ="$f.orig";
  264. rename $f, $orig_file;
  265. open(FILTER_IN, "<$orig_file") or die "Cannot open $orig_file\n";
  266. open(FILTER_OUT, ">$f") or die "Cannot open $f\n";
  267. while (<FILTER_IN>)
  268. {
  269. my $line = $_;
  270. $line =~ s/\$([A-Z][a-z]+):[^\$]+\$/\$$1\$/g;
  271. print FILTER_OUT $line;
  272. }
  273. close FILTER_IN;
  274. close FILTER_OUT;
  275. }
  276. }
  277. if ($dirty) {
  278. if ($opt_f) { warn "The tree is not clean -- forced merge\n";
  279. $dirty = 0;
  280. } else {
  281. die "Exiting: your CVS tree is not clean for this merge.";
  282. }
  283. }
  284. print "Applying\n";
  285. if ($opt_W) {
  286. system("git checkout -q $commit^0") && die "cannot patch";
  287. } else {
  288. `GIT_INDEX_FILE=$tmpdir/index git apply $context --summary --numstat --apply <.cvsexportcommit.diff` || die "cannot patch";
  289. }
  290. print "Patch applied successfully. Adding new files and directories to CVS\n";
  291. my $dirtypatch = 0;
  292. #
  293. # We have to add the directories in order otherwise we will have
  294. # problems when we try and add the sub-directory of a directory we
  295. # have not added yet.
  296. #
  297. # Luckily this is easy to deal with by sorting the directories and
  298. # dealing with the shortest ones first.
  299. #
  300. @dirs = sort { length $a <=> length $b} @dirs;
  301. foreach my $d (@dirs) {
  302. if (system(@cvs,'add',$d)) {
  303. $dirtypatch = 1;
  304. warn "Failed to cvs add directory $d -- you may need to do it manually";
  305. }
  306. }
  307. foreach my $f (@afiles) {
  308. if (grep { $_ eq $f } @bfiles) {
  309. system(@cvs, 'add','-kb',$f);
  310. } else {
  311. system(@cvs, 'add', $f);
  312. }
  313. if ($?) {
  314. $dirtypatch = 1;
  315. warn "Failed to cvs add $f -- you may need to do it manually";
  316. }
  317. }
  318. foreach my $f (@dfiles) {
  319. system(@cvs, 'rm', '-f', $f);
  320. if ($?) {
  321. $dirtypatch = 1;
  322. warn "Failed to cvs rm -f $f -- you may need to do it manually";
  323. }
  324. }
  325. print "Commit to CVS\n";
  326. print "Patch title (first comment line): $title\n";
  327. my @commitfiles = map { unless (m/\s/) { '\''.$_.'\''; } else { $_; }; } (@files);
  328. my $cmd = join(' ', @cvs)." commit -F .msg @commitfiles";
  329. if ($dirtypatch) {
  330. print "NOTE: One or more hunks failed to apply cleanly.\n";
  331. print "You'll need to apply the patch in .cvsexportcommit.diff manually\n";
  332. print "using a patch program. After applying the patch and resolving the\n";
  333. print "problems you may commit using:";
  334. print "\n cd \"$opt_w\"" if $opt_w;
  335. print "\n $cmd\n";
  336. print "\n git checkout $go_back_to\n" if $go_back_to;
  337. print "\n";
  338. exit(1);
  339. }
  340. if ($opt_c) {
  341. print "Autocommit\n $cmd\n";
  342. print xargs_safe_pipe_capture([@cvs, 'commit', '-F', '.msg'], @files);
  343. if ($?) {
  344. die "Exiting: The commit did not succeed";
  345. }
  346. print "Committed successfully to CVS\n";
  347. # clean up
  348. unlink(".msg");
  349. } else {
  350. print "Ready for you to commit, just run:\n\n $cmd\n";
  351. }
  352. # clean up
  353. unlink(".cvsexportcommit.diff");
  354. if ($opt_W) {
  355. system("git checkout $go_back_to") && die "cannot move back to $go_back_to";
  356. if (!($go_back_to =~ /^[0-9a-fA-F]{$hexsz}$/)) {
  357. system("git symbolic-ref HEAD $go_back_to") &&
  358. die "cannot move back to $go_back_to";
  359. }
  360. }
  361. # CVS version 1.11.x and 1.12.x sleeps the wrong way to ensure the timestamp
  362. # used by CVS and the one set by subsequence file modifications are different.
  363. # If they are not different CVS will not detect changes.
  364. sleep(1);
  365. sub usage {
  366. print STDERR <<END;
  367. usage: GIT_DIR=/path/to/.git git cvsexportcommit [-h] [-p] [-v] [-c] [-f] [-u] [-k] [-w cvsworkdir] [-m msgprefix] [ parent ] commit
  368. END
  369. exit(1);
  370. }
  371. # An alternative to `command` that allows input to be passed as an array
  372. # to work around shell problems with weird characters in arguments
  373. # if the exec returns non-zero we die
  374. sub safe_pipe_capture {
  375. my @output;
  376. if (my $pid = open my $child, '-|') {
  377. binmode($child, ":crlf");
  378. @output = (<$child>);
  379. close $child or die join(' ',@_).": $! $?";
  380. } else {
  381. exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
  382. }
  383. return wantarray ? @output : join('',@output);
  384. }
  385. sub xargs_safe_pipe_capture {
  386. my $MAX_ARG_LENGTH = 65536;
  387. my $cmd = shift;
  388. my @output;
  389. my $output;
  390. while(@_) {
  391. my @args;
  392. my $length = 0;
  393. while(@_ && $length < $MAX_ARG_LENGTH) {
  394. push @args, shift;
  395. $length += length($args[$#args]);
  396. }
  397. if (wantarray) {
  398. push @output, safe_pipe_capture(@$cmd, @args);
  399. }
  400. else {
  401. $output .= safe_pipe_capture(@$cmd, @args);
  402. }
  403. }
  404. return wantarray ? @output : $output;
  405. }