1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- #! /usr/bin/perl -w
- # Copyright (C) 2005, 2007 Alex Schroeder <alex@emacswiki.org>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- sub ParseData {
- my $data = shift;
- my %result;
- while ($data =~ /(\S+?): (.*?)(?=\n[^ \t]|\Z)/sg) {
- my ($key, $value) = ($1, $2);
- $value =~ s/\n\t/\n/g;
- $result{$key} = $value;
- }
- return %result;
- }
- sub main {
- my ($regexp, $PageDir, $RawDir) = @_;
- # include dotfiles!
- local $/ = undef; # Read complete files
- foreach my $file (glob("$PageDir/*.pg $PageDir/.*.pg")) {
- next unless $file =~ m|.*/(.+)\.pg$|;
- my $page = $1;
- next if $regexp && $page !~ m|$regexp|o;
- mkdir($RawDir) or die "Cannot create $RawDir directory: $!"
- unless -d $RawDir;
- open(F, $file) or die "Cannot read $page file: $!";
- my $data = <F>;
- close(F);
- my $ts = (stat("$RawDir/$page"))[9];
- my %result = ParseData($data);
- if ($ts && $ts == $result{ts}) {
- print "skipping $page because it is up to date\n" if $verbose;
- } else {
- print "writing $page because $ts != $result{ts}\n" if $verbose;
- open(F,"> $RawDir/$page") or die "Cannot write $page raw file: $!";
- print F $result{text};
- close(F);
- utime $result{ts}, $result{ts}, "$RawDir/$page"; # touch file
- }
- }
- }
- use Getopt::Long;
- my $regexp = undef;
- my $page = 'page';
- my $dir = 'raw';
- GetOptions ("regexp=s" => \$regexp,
- "page=s" => \$page,
- "dir=s" => \$dir,
- "help" => \$help);
- if ($help) {
- print qq{
- Usage: $0 [--regexp REGEXP] [--page DIR] [--dir DIR]
- Writes the raw wiki text into plain text files.
- --regexp selects a subsets of pages whose names match the regular
- expression. Note that spaces have been translated to underscores.
- --page designates the page directory. By default this is 'page' in the
- current directory. If you run this script in your data directory,
- the default should be fine.
- --dir designates an output directory. By default this is 'raw' in the
- current directory.
- Example: $0 --regexp '\\.el\$' --dir elisp
- }
- } else {
- main ($regexp, $page, $dir);
- }
|