1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135 |
- #!/usr/bin/perl
- #
- # This tool is copyright (c) 2005, Martin Langhoff.
- # It is released under the Gnu Public License, version 2.
- #
- # The basic idea is to walk the output of tla abrowse,
- # fetch the changesets and apply them.
- #
- =head1 Invocation
- git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
- [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
- Imports a project from one or more Arch repositories. It will follow branches
- and repositories within the namespaces defined by the <archive/branch>
- parameters supplied. If it cannot find the remote branch a merge comes from
- it will just import it as a regular commit. If it can find it, it will mark it
- as a merge whenever possible.
- See man (1) git-archimport for more details.
- =head1 TODO
- - create tag objects instead of ref tags
- - audit shell-escaping of filenames
- - hide our private tags somewhere smarter
- - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
- - sort and apply patches by graphing ancestry relations instead of just
- relying in dates supplied in the changeset itself.
- tla ancestry-graph -m could be helpful here...
- =head1 Devel tricks
- Add print in front of the shell commands invoked via backticks.
- =head1 Devel Notes
- There are several places where Arch and git terminology are intermixed
- and potentially confused.
- The notion of a "branch" in git is approximately equivalent to
- a "archive/category--branch--version" in Arch. Also, it should be noted
- that the "--branch" portion of "archive/category--branch--version" is really
- optional in Arch although not many people (nor tools!) seem to know this.
- This means that "archive/category--version" is also a valid "branch"
- in git terms.
- We always refer to Arch names by their fully qualified variant (which
- means the "archive" name is prefixed.
- For people unfamiliar with Arch, an "archive" is the term for "repository",
- and can contain multiple, unrelated branches.
- =cut
- use 5.008;
- use strict;
- use warnings;
- use Getopt::Std;
- use File::Temp qw(tempdir);
- use File::Path qw(mkpath rmtree);
- use File::Basename qw(basename dirname);
- use Data::Dumper qw/ Dumper /;
- use IPC::Open2;
- $SIG{'PIPE'}="IGNORE";
- $ENV{'TZ'}="UTC";
- my $git_dir = $ENV{"GIT_DIR"} || ".git";
- $ENV{"GIT_DIR"} = $git_dir;
- my $ptag_dir = "$git_dir/archimport/tags";
- our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
- sub usage() {
- print STDERR <<END;
- usage: git archimport # fetch/update GIT from Arch
- [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
- repository/arch-branch [ repository/arch-branch] ...
- END
- exit(1);
- }
- getopts("fThvat:D:") or usage();
- usage if $opt_h;
- @ARGV >= 1 or usage();
- # $arch_branches:
- # values associated with keys:
- # =1 - Arch version / git 'branch' detected via abrowse on a limit
- # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
- my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
- # $branch_name_map:
- # maps arch branches to git branch names
- my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
- $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
- my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
- $opt_v && print "+ Using $tmp as temporary directory\n";
- unless (-d $git_dir) { # initial import needs empty directory
- opendir DIR, '.' or die "Unable to open current directory: $!\n";
- while (my $entry = readdir DIR) {
- $entry =~ /^\.\.?$/ or
- die "Initial import needs an empty current working directory.\n"
- }
- closedir DIR
- }
- my $default_archive; # default Arch archive
- my %reachable = (); # Arch repositories we can access
- my %unreachable = (); # Arch repositories we can't access :<
- my @psets = (); # the collection
- my %psets = (); # the collection, by name
- my %stats = ( # Track which strategy we used to import:
- get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
- simple_changeset => 0, import_or_tag => 0
- );
- my %rptags = (); # my reverse private tags
- # to map a SHA1 to a commitid
- my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
- sub do_abrowse {
- my $stage = shift;
- while (my ($limit, $level) = each %arch_branches) {
- next unless $level == $stage;
- open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
- or die "Problems with tla abrowse: $!";
- my %ps = (); # the current one
- my $lastseen = '';
- while (<ABROWSE>) {
- chomp;
- # first record padded w 8 spaces
- if (s/^\s{8}\b//) {
- my ($id, $type) = split(m/\s+/, $_, 2);
- my %last_ps;
- # store the record we just captured
- if (%ps && !exists $psets{ $ps{id} }) {
- %last_ps = %ps; # break references
- push (@psets, \%last_ps);
- $psets{ $last_ps{id} } = \%last_ps;
- }
- my $branch = extract_versionname($id);
- %ps = ( id => $id, branch => $branch );
- if (%last_ps && ($last_ps{branch} eq $branch)) {
- $ps{parent_id} = $last_ps{id};
- }
- $arch_branches{$branch} = 1;
- $lastseen = 'id';
- # deal with types (should work with baz or tla):
- if ($type =~ m/\(.*changeset\)/) {
- $ps{type} = 's';
- } elsif ($type =~ /\(.*import\)/) {
- $ps{type} = 'i';
- } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
- $ps{type} = 't';
- # read which revision we've tagged when we parse the log
- $ps{tag} = $1;
- } else {
- warn "Unknown type $type";
- }
- $arch_branches{$branch} = 1;
- $lastseen = 'id';
- } elsif (s/^\s{10}//) {
- # 10 leading spaces or more
- # indicate commit metadata
- # date
- if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
- $ps{date} = $1;
- $lastseen = 'date';
- } elsif ($_ eq 'merges in:') {
- $ps{merges} = [];
- $lastseen = 'merges';
- } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
- my $id = $_;
- push (@{$ps{merges}}, $id);
- # aggressive branch finding:
- if ($opt_D) {
- my $branch = extract_versionname($id);
- my $repo = extract_reponame($branch);
- if (archive_reachable($repo) &&
- !defined $arch_branches{$branch}) {
- $arch_branches{$branch} = $stage + 1;
- }
- }
- } else {
- warn "more metadata after merges!?: $_\n" unless /^\s*$/;
- }
- }
- }
- if (%ps && !exists $psets{ $ps{id} }) {
- my %temp = %ps; # break references
- if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
- $temp{parent_id} = $psets[$#psets]{id};
- }
- push (@psets, \%temp);
- $psets{ $temp{id} } = \%temp;
- }
- close ABROWSE or die "$TLA abrowse failed on $limit\n";
- }
- } # end foreach $root
- do_abrowse(1);
- my $depth = 2;
- $opt_D ||= 0;
- while ($depth <= $opt_D) {
- do_abrowse($depth);
- $depth++;
- }
- ## Order patches by time
- # FIXME see if we can find a more optimal way to do this by graphing
- # the ancestry data and walking it, that way we won't have to rely on
- # client-supplied dates
- @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
- #print Dumper \@psets;
- ##
- ## TODO cleanup irrelevant patches
- ## and put an initial import
- ## or a full tag
- my $import = 0;
- unless (-d $git_dir) { # initial import
- if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
- print "Starting import from $psets[0]{id}\n";
- `git-init`;
- die $! if $?;
- $import = 1;
- } else {
- die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
- }
- } else { # progressing an import
- # load the rptags
- opendir(DIR, $ptag_dir)
- || die "can't opendir: $!";
- while (my $file = readdir(DIR)) {
- # skip non-interesting-files
- next unless -f "$ptag_dir/$file";
- # convert first '--' to '/' from old git-archimport to use
- # as an archivename/c--b--v private tag
- if ($file !~ m!,!) {
- my $oldfile = $file;
- $file =~ s!--!,!;
- print STDERR "converting old tag $oldfile to $file\n";
- rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
- }
- my $sha = ptag($file);
- chomp $sha;
- $rptags{$sha} = $file;
- }
- closedir DIR;
- }
- # process patchsets
- # extract the Arch repository name (Arch "archive" in Arch-speak)
- sub extract_reponame {
- my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
- return (split(/\//, $fq_cvbr))[0];
- }
- sub extract_versionname {
- my $name = shift;
- $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
- return $name;
- }
- # convert a fully-qualified revision or version to a unique dirname:
- # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
- # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
- #
- # the git notion of a branch is closer to
- # archive/category--branch--version than archive/category--branch, so we
- # use this to convert to git branch names.
- # Also, keep archive names but replace '/' with ',' since it won't require
- # subdirectories, and is safer than swapping '--' which could confuse
- # reverse-mapping when dealing with bastard branches that
- # are just archive/category--version (no --branch)
- sub tree_dirname {
- my $revision = shift;
- my $name = extract_versionname($revision);
- $name =~ s#/#,#;
- return $name;
- }
- # old versions of git-archimport just use the <category--branch> part:
- sub old_style_branchname {
- my $id = shift;
- my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
- chomp $ret;
- return $ret;
- }
- *git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
- # retrieve default archive, since $branch_name_map keys might not include it
- sub get_default_archive {
- if (!defined $default_archive) {
- $default_archive = safe_pipe_capture($TLA,'my-default-archive');
- chomp $default_archive;
- }
- return $default_archive;
- }
- sub git_branchname {
- my $revision = shift;
- my $name = extract_versionname($revision);
- if (exists $branch_name_map{$name}) {
- return $branch_name_map{$name};
- } elsif ($name =~ m#^([^/]*)/(.*)$#
- && $1 eq get_default_archive()
- && exists $branch_name_map{$2}) {
- # the names given in the command-line lacked the archive.
- return $branch_name_map{$2};
- } else {
- return git_default_branchname($revision);
- }
- }
- sub process_patchset_accurate {
- my $ps = shift;
- # switch to that branch if we're not already in that branch:
- if (-e "$git_dir/refs/heads/$ps->{branch}") {
- system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
- # remove any old stuff that got leftover:
- my $rm = safe_pipe_capture('git-ls-files','--others','-z');
- rmtree(split(/\0/,$rm)) if $rm;
- }
- # Apply the import/changeset/merge into the working tree
- my $dir = sync_to_ps($ps);
- # read the new log entry:
- my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
- die "Error in cat-log: $!" if $?;
- chomp @commitlog;
- # grab variables we want from the log, new fields get added to $ps:
- # (author, date, email, summary, message body ...)
- parselog($ps, \@commitlog);
- if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
- # this should work when importing continuations
- if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
- # find where we are supposed to branch from
- if (! -e "$git_dir/refs/heads/$ps->{branch}") {
- system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
- # We trust Arch with the fact that this is just a tag,
- # and it does not affect the state of the tree, so
- # we just tag and move on. If the user really wants us
- # to consolidate more branches into one, don't tag because
- # the tag name would be already taken.
- tag($ps->{id}, $branchpoint);
- ptag($ps->{id}, $branchpoint);
- print " * Tagged $ps->{id} at $branchpoint\n";
- }
- system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
- # remove any old stuff that got leftover:
- my $rm = safe_pipe_capture('git-ls-files','--others','-z');
- rmtree(split(/\0/,$rm)) if $rm;
- return 0;
- } else {
- warn "Tagging from unknown id unsupported\n" if $ps->{tag};
- }
- # allow multiple bases/imports here since Arch supports cherry-picks
- # from unrelated trees
- }
- # update the index with all the changes we got
- system('git-diff-files --name-only -z | '.
- 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
- system('git-ls-files --others -z | '.
- 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
- return 1;
- }
- # the native changeset processing strategy. This is very fast, but
- # does not handle permissions or any renames involving directories
- sub process_patchset_fast {
- my $ps = shift;
- #
- # create the branch if needed
- #
- if ($ps->{type} eq 'i' && !$import) {
- die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
- }
- unless ($import) { # skip for import
- if ( -e "$git_dir/refs/heads/$ps->{branch}") {
- # we know about this branch
- system('git-checkout',$ps->{branch});
- } else {
- # new branch! we need to verify a few things
- die "Branch on a non-tag!" unless $ps->{type} eq 't';
- my $branchpoint = ptag($ps->{tag});
- die "Tagging from unknown id unsupported: $ps->{tag}"
- unless $branchpoint;
- # find where we are supposed to branch from
- if (! -e "$git_dir/refs/heads/$ps->{branch}") {
- system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
- # We trust Arch with the fact that this is just a tag,
- # and it does not affect the state of the tree, so
- # we just tag and move on. If the user really wants us
- # to consolidate more branches into one, don't tag because
- # the tag name would be already taken.
- tag($ps->{id}, $branchpoint);
- ptag($ps->{id}, $branchpoint);
- print " * Tagged $ps->{id} at $branchpoint\n";
- }
- system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
- return 0;
- }
- die $! if $?;
- }
- #
- # Apply the import/changeset/merge into the working tree
- #
- if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
- apply_import($ps) or die $!;
- $stats{import_or_tag}++;
- $import=0;
- } elsif ($ps->{type} eq 's') {
- apply_cset($ps);
- $stats{simple_changeset}++;
- }
- #
- # prepare update git's index, based on what arch knows
- # about the pset, resolve parents, etc
- #
- my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
- die "Error in cat-archive-log: $!" if $?;
- parselog($ps,\@commitlog);
- # imports don't give us good info
- # on added files. Shame on them
- if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
- system('git-ls-files --deleted -z | '.
- 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
- system('git-ls-files --others -z | '.
- 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
- }
- # TODO: handle removed_directories and renamed_directories:
- if (my $del = $ps->{removed_files}) {
- unlink @$del;
- while (@$del) {
- my @slice = splice(@$del, 0, 100);
- system('git-update-index','--remove','--',@slice) == 0 or
- die "Error in git-update-index --remove: $! $?\n";
- }
- }
- if (my $ren = $ps->{renamed_files}) { # renamed
- if (@$ren % 2) {
- die "Odd number of entries in rename!?";
- }
- while (@$ren) {
- my $from = shift @$ren;
- my $to = shift @$ren;
- unless (-d dirname($to)) {
- mkpath(dirname($to)); # will die on err
- }
- # print "moving $from $to";
- rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
- system('git-update-index','--remove','--',$from) == 0 or
- die "Error in git-update-index --remove: $! $?\n";
- system('git-update-index','--add','--',$to) == 0 or
- die "Error in git-update-index --add: $! $?\n";
- }
- }
- if (my $add = $ps->{new_files}) {
- while (@$add) {
- my @slice = splice(@$add, 0, 100);
- system('git-update-index','--add','--',@slice) == 0 or
- die "Error in git-update-index --add: $! $?\n";
- }
- }
- if (my $mod = $ps->{modified_files}) {
- while (@$mod) {
- my @slice = splice(@$mod, 0, 100);
- system('git-update-index','--',@slice) == 0 or
- die "Error in git-update-index: $! $?\n";
- }
- }
- return 1; # we successfully applied the changeset
- }
- if ($opt_f) {
- print "Will import patchsets using the fast strategy\n",
- "Renamed directories and permission changes will be missed\n";
- *process_patchset = *process_patchset_fast;
- } else {
- print "Using the default (accurate) import strategy.\n",
- "Things may be a bit slow\n";
- *process_patchset = *process_patchset_accurate;
- }
- foreach my $ps (@psets) {
- # process patchsets
- $ps->{branch} = git_branchname($ps->{id});
- #
- # ensure we have a clean state
- #
- if (my $dirty = `git-diff-files`) {
- die "Unclean tree when about to process $ps->{id} " .
- " - did we fail to commit cleanly before?\n$dirty";
- }
- die $! if $?;
- #
- # skip commits already in repo
- #
- if (ptag($ps->{id})) {
- $opt_v && print " * Skipping already imported: $ps->{id}\n";
- next;
- }
- print " * Starting to work on $ps->{id}\n";
- process_patchset($ps) or next;
- # warn "errors when running git-update-index! $!";
- my $tree = `git-write-tree`;
- die "cannot write tree $!" if $?;
- chomp $tree;
- #
- # Who's your daddy?
- #
- my @par;
- if ( -e "$git_dir/refs/heads/$ps->{branch}") {
- if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
- my $p = <HEAD>;
- close HEAD;
- chomp $p;
- push @par, '-p', $p;
- } else {
- if ($ps->{type} eq 's') {
- warn "Could not find the right head for the branch $ps->{branch}";
- }
- }
- }
- if ($ps->{merges}) {
- push @par, find_parents($ps);
- }
- #
- # Commit, tag and clean state
- #
- $ENV{TZ} = 'GMT';
- $ENV{GIT_AUTHOR_NAME} = $ps->{author};
- $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
- $ENV{GIT_AUTHOR_DATE} = $ps->{date};
- $ENV{GIT_COMMITTER_NAME} = $ps->{author};
- $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
- $ENV{GIT_COMMITTER_DATE} = $ps->{date};
- my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
- or die $!;
- print WRITER $ps->{summary},"\n\n";
- # only print message if it's not empty, to avoid a spurious blank line;
- # also append an extra newline, so there's a blank line before the
- # following "git-archimport-id:" line.
- print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
- # make it easy to backtrack and figure out which Arch revision this was:
- print WRITER 'git-archimport-id: ',$ps->{id},"\n";
- close WRITER;
- my $commitid = <READER>; # read
- chomp $commitid;
- close READER;
- waitpid $pid,0; # close;
- if (length $commitid != 40) {
- die "Something went wrong with the commit! $! $commitid";
- }
- #
- # Update the branch
- #
- open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
- print HEAD $commitid;
- close HEAD;
- system('git-update-ref', 'HEAD', "$ps->{branch}");
- # tag accordingly
- ptag($ps->{id}, $commitid); # private tag
- if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
- tag($ps->{id}, $commitid);
- }
- print " * Committed $ps->{id}\n";
- print " + tree $tree\n";
- print " + commit $commitid\n";
- $opt_v && print " + commit date is $ps->{date} \n";
- $opt_v && print " + parents: ",join(' ',@par),"\n";
- }
- if ($opt_v) {
- foreach (sort keys %stats) {
- print" $_: $stats{$_}\n";
- }
- }
- exit 0;
- # used by the accurate strategy:
- sub sync_to_ps {
- my $ps = shift;
- my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
- $opt_v && print "sync_to_ps($ps->{id}) method: ";
- if (-d $tree_dir) {
- if ($ps->{type} eq 't') {
- $opt_v && print "get (tag)\n";
- # looks like a tag-only or (worse,) a mixed tags/changeset branch,
- # can't rely on replay to work correctly on these
- rmtree($tree_dir);
- safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
- $stats{get_tag}++;
- } else {
- my $tree_id = arch_tree_id($tree_dir);
- if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
- # the common case (hopefully)
- $opt_v && print "replay\n";
- safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
- $stats{replay}++;
- } else {
- # getting one tree is usually faster than getting two trees
- # and applying the delta ...
- rmtree($tree_dir);
- $opt_v && print "apply-delta\n";
- safe_pipe_capture($TLA,'get','--no-pristine',
- $ps->{id},$tree_dir);
- $stats{get_delta}++;
- }
- }
- } else {
- # new branch work
- $opt_v && print "get (new tree)\n";
- safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
- $stats{get_new}++;
- }
- # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
- system('rsync','-aI','--delete','--exclude',$git_dir,
- # '--exclude','.arch-inventory',
- '--exclude','.arch-ids','--exclude','{arch}',
- '--exclude','+*','--exclude',',*',
- "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
- return $tree_dir;
- }
- sub apply_import {
- my $ps = shift;
- my $bname = git_branchname($ps->{id});
- mkpath($tmp);
- safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
- die "Cannot get import: $!" if $?;
- system('rsync','-aI','--delete', '--exclude',$git_dir,
- '--exclude','.arch-ids','--exclude','{arch}',
- "$tmp/import/", './');
- die "Cannot rsync import:$!" if $?;
- rmtree("$tmp/import");
- die "Cannot remove tempdir: $!" if $?;
- return 1;
- }
- sub apply_cset {
- my $ps = shift;
- mkpath($tmp);
- # get the changeset
- safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
- die "Cannot get changeset: $!" if $?;
- # apply patches
- if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
- # this can be sped up considerably by doing
- # (find | xargs cat) | patch
- # but that can get mucked up by patches
- # with missing trailing newlines or the standard
- # 'missing newline' flag in the patch - possibly
- # produced with an old/buggy diff.
- # slow and safe, we invoke patch once per patchfile
- `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
- die "Problem applying patches! $!" if $?;
- }
- # apply changed binary files
- if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
- foreach my $mod (@modified) {
- chomp $mod;
- my $orig = $mod;
- $orig =~ s/\.modified$//; # lazy
- $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
- #print "rsync -p '$mod' '$orig'";
- system('rsync','-p',$mod,"./$orig");
- die "Problem applying binary changes! $!" if $?;
- }
- }
- # bring in new files
- system('rsync','-aI','--exclude',$git_dir,
- '--exclude','.arch-ids',
- '--exclude', '{arch}',
- "$tmp/changeset/new-files-archive/",'./');
- # deleted files are hinted from the commitlog processing
- rmtree("$tmp/changeset");
- }
- # =for reference
- # notes: *-files/-directories keys cannot have spaces, they're always
- # pika-escaped. Everything after the first newline
- # A log entry looks like:
- # Revision: moodle-org--moodle--1.3.3--patch-15
- # Archive: arch-eduforge@catalyst.net.nz--2004
- # Creator: Penny Leach <penny@catalyst.net.nz>
- # Date: Wed May 25 14:15:34 NZST 2005
- # Standard-date: 2005-05-25 02:15:34 GMT
- # New-files: lang/de/.arch-ids/block_glossary_random.php.id
- # lang/de/.arch-ids/block_html.php.id
- # New-directories: lang/de/help/questionnaire
- # lang/de/help/questionnaire/.arch-ids
- # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
- # db_sears.sql db/db_sears.sql
- # Removed-files: lang/be/docs/.arch-ids/release.html.id
- # lang/be/docs/.arch-ids/releaseold.html.id
- # Modified-files: admin/cron.php admin/delete.php
- # admin/editor.html backup/lib.php backup/restore.php
- # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
- # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
- # summary can be multiline with a leading space just like the above fields
- # Keywords:
- #
- # Updating yadda tadda tadda madda
- sub parselog {
- my ($ps, $log) = @_;
- my $key = undef;
- # headers we want that contain filenames:
- my %want_headers = (
- new_files => 1,
- modified_files => 1,
- renamed_files => 1,
- renamed_directories => 1,
- removed_files => 1,
- removed_directories => 1,
- );
- chomp (@$log);
- while ($_ = shift @$log) {
- if (/^Continuation-of:\s*(.*)/) {
- $ps->{tag} = $1;
- $key = undef;
- } elsif (/^Summary:\s*(.*)$/ ) {
- # summary can be multiline as long as it has a leading space.
- # we squeeze it onto a single line, though.
- $ps->{summary} = [ $1 ];
- $key = 'summary';
- } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
- $ps->{author} = $1;
- $ps->{email} = $2;
- $key = undef;
- # any *-files or *-directories can be read here:
- } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
- my $val = $2;
- $key = lc $1;
- $key =~ tr/-/_/; # too lazy to quote :P
- if ($want_headers{$key}) {
- push @{$ps->{$key}}, split(/\s+/, $val);
- } else {
- $key = undef;
- }
- } elsif (/^$/) {
- last; # remainder of @$log that didn't get shifted off is message
- } elsif ($key) {
- if (/^\s+(.*)$/) {
- if ($key eq 'summary') {
- push @{$ps->{$key}}, $1;
- } else { # files/directories:
- push @{$ps->{$key}}, split(/\s+/, $1);
- }
- } else {
- $key = undef;
- }
- }
- }
- # drop leading empty lines from the log message
- while (@$log && $log->[0] eq '') {
- shift @$log;
- }
- if (exists $ps->{summary} && @{$ps->{summary}}) {
- $ps->{summary} = join(' ', @{$ps->{summary}});
- }
- elsif (@$log == 0) {
- $ps->{summary} = 'empty commit message';
- } else {
- $ps->{summary} = $log->[0] . '...';
- }
- $ps->{message} = join("\n",@$log);
- # skip Arch control files, unescape pika-escaped files
- foreach my $k (keys %want_headers) {
- next unless (defined $ps->{$k});
- my @tmp = ();
- foreach my $t (@{$ps->{$k}}) {
- next unless length ($t);
- next if $t =~ m!\{arch\}/!;
- next if $t =~ m!\.arch-ids/!;
- # should we skip this?
- next if $t =~ m!\.arch-inventory$!;
- # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
- # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
- if ($t =~ /\\/ ){
- $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
- }
- push @tmp, $t;
- }
- $ps->{$k} = \@tmp;
- }
- }
- # write/read a tag
- sub tag {
- my ($tag, $commit) = @_;
- if ($opt_o) {
- $tag =~ s|/|--|g;
- } else {
- my $patchname = $tag;
- $patchname =~ s/.*--//;
- $tag = git_branchname ($tag) . '--' . $patchname;
- }
- if ($commit) {
- open(C,">","$git_dir/refs/tags/$tag")
- or die "Cannot create tag $tag: $!\n";
- print C "$commit\n"
- or die "Cannot write tag $tag: $!\n";
- close(C)
- or die "Cannot write tag $tag: $!\n";
- print " * Created tag '$tag' on '$commit'\n" if $opt_v;
- } else { # read
- open(C,"<","$git_dir/refs/tags/$tag")
- or die "Cannot read tag $tag: $!\n";
- $commit = <C>;
- chomp $commit;
- die "Error reading tag $tag: $!\n" unless length $commit == 40;
- close(C)
- or die "Cannot read tag $tag: $!\n";
- return $commit;
- }
- }
- # write/read a private tag
- # reads fail softly if the tag isn't there
- sub ptag {
- my ($tag, $commit) = @_;
- # don't use subdirs for tags yet, it could screw up other porcelains
- $tag =~ s|/|,|g;
- my $tag_file = "$ptag_dir/$tag";
- my $tag_branch_dir = dirname($tag_file);
- mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
- if ($commit) { # write
- open(C,">",$tag_file)
- or die "Cannot create tag $tag: $!\n";
- print C "$commit\n"
- or die "Cannot write tag $tag: $!\n";
- close(C)
- or die "Cannot write tag $tag: $!\n";
- $rptags{$commit} = $tag
- unless $tag =~ m/--base-0$/;
- } else { # read
- # if the tag isn't there, return 0
- unless ( -s $tag_file) {
- return 0;
- }
- open(C,"<",$tag_file)
- or die "Cannot read tag $tag: $!\n";
- $commit = <C>;
- chomp $commit;
- die "Error reading tag $tag: $!\n" unless length $commit == 40;
- close(C)
- or die "Cannot read tag $tag: $!\n";
- unless (defined $rptags{$commit}) {
- $rptags{$commit} = $tag;
- }
- return $commit;
- }
- }
- sub find_parents {
- #
- # Identify what branches are merging into me
- # and whether we are fully merged
- # git-merge-base <headsha> <headsha> should tell
- # me what the base of the merge should be
- #
- my $ps = shift;
- my %branches; # holds an arrayref per branch
- # the arrayref contains a list of
- # merged patches between the base
- # of the merge and the current head
- my @parents; # parents found for this commit
- # simple loop to split the merges
- # per branch
- foreach my $merge (@{$ps->{merges}}) {
- my $branch = git_branchname($merge);
- unless (defined $branches{$branch} ){
- $branches{$branch} = [];
- }
- push @{$branches{$branch}}, $merge;
- }
- #
- # foreach branch find a merge base and walk it to the
- # head where we are, collecting the merged patchsets that
- # Arch has recorded. Keep that in @have
- # Compare that with the commits on the other branch
- # between merge-base and the tip of the branch (@need)
- # and see if we have a series of consecutive patches
- # starting from the merge base. The tip of the series
- # of consecutive patches merged is our new parent for
- # that branch.
- #
- foreach my $branch (keys %branches) {
- # check that we actually know about the branch
- next unless -e "$git_dir/refs/heads/$branch";
- my $mergebase = safe_pipe_capture(qw(git-merge-base), $branch, $ps->{branch});
- if ($?) {
- # Don't die here, Arch supports one-way cherry-picking
- # between branches with no common base (or any relationship
- # at all beforehand)
- warn "Cannot find merge base for $branch and $ps->{branch}";
- next;
- }
- chomp $mergebase;
- # now walk up to the mergepoint collecting what patches we have
- my $branchtip = git_rev_parse($ps->{branch});
- my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
- my %have; # collected merges this branch has
- foreach my $merge (@{$ps->{merges}}) {
- $have{$merge} = 1;
- }
- my %ancestorshave;
- foreach my $par (@ancestors) {
- $par = commitid2pset($par);
- if (defined $par->{merges}) {
- foreach my $merge (@{$par->{merges}}) {
- $ancestorshave{$merge}=1;
- }
- }
- }
- # print "++++ Merges in $ps->{id} are....\n";
- # my @have = sort keys %have; print Dumper(\@have);
- # merge what we have with what ancestors have
- %have = (%have, %ancestorshave);
- # see what the remote branch has - these are the merges we
- # will want to have in a consecutive series from the mergebase
- my $otherbranchtip = git_rev_parse($branch);
- my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
- my @need;
- foreach my $needps (@needraw) { # get the psets
- $needps = commitid2pset($needps);
- # git-rev-list will also
- # list commits merged in via earlier
- # merges. we are only interested in commits
- # from the branch we're looking at
- if ($branch eq $needps->{branch}) {
- push @need, $needps->{id};
- }
- }
- # print "++++ Merges from $branch we want are....\n";
- # print Dumper(\@need);
- my $newparent;
- while (my $needed_commit = pop @need) {
- if ($have{$needed_commit}) {
- $newparent = $needed_commit;
- } else {
- last; # break out of the while
- }
- }
- if ($newparent) {
- push @parents, $newparent;
- }
- } # end foreach branch
- # prune redundant parents
- my %parents;
- foreach my $p (@parents) {
- $parents{$p} = 1;
- }
- foreach my $p (@parents) {
- next unless exists $psets{$p}{merges};
- next unless ref $psets{$p}{merges};
- my @merges = @{$psets{$p}{merges}};
- foreach my $merge (@merges) {
- if ($parents{$merge}) {
- delete $parents{$merge};
- }
- }
- }
- @parents = ();
- foreach (keys %parents) {
- push @parents, '-p', ptag($_);
- }
- return @parents;
- }
- sub git_rev_parse {
- my $name = shift;
- my $val = safe_pipe_capture(qw(git-rev-parse), $name);
- die "Error: git-rev-parse $name" if $?;
- chomp $val;
- return $val;
- }
- # resolve a SHA1 to a known patchset
- sub commitid2pset {
- my $commitid = shift;
- chomp $commitid;
- my $name = $rptags{$commitid}
- || die "Cannot find reverse tag mapping for $commitid";
- $name =~ s|,|/|;
- my $ps = $psets{$name}
- || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
- return $ps;
- }
- # an alternative to `command` that allows input to be passed as an array
- # to work around shell problems with weird characters in arguments
- sub safe_pipe_capture {
- my @output;
- if (my $pid = open my $child, '-|') {
- @output = (<$child>);
- close $child or die join(' ',@_).": $! $?";
- } else {
- exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
- }
- return wantarray ? @output : join('',@output);
- }
- # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
- sub arch_tree_id {
- my $dir = shift;
- chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
- return $ret;
- }
- sub archive_reachable {
- my $archive = shift;
- return 1 if $reachable{$archive};
- return 0 if $unreachable{$archive};
- if (system "$TLA whereis-archive $archive >/dev/null") {
- if ($opt_a && (system($TLA,'register-archive',
- "http://mirrors.sourcecontrol.net/$archive") == 0)) {
- $reachable{$archive} = 1;
- return 1;
- }
- print STDERR "Archive is unreachable: $archive\n";
- $unreachable{$archive} = 1;
- return 0;
- } else {
- $reachable{$archive} = 1;
- return 1;
- }
- }
|