raw.pl 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. #! /usr/bin/perl -w
  2. # Copyright (C) 2005, 2007 Alex Schroeder <alex@emacswiki.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. sub ParseData {
  17. my $data = shift;
  18. my %result;
  19. while ($data =~ /(\S+?): (.*?)(?=\n[^ \t]|\Z)/sg) {
  20. my ($key, $value) = ($1, $2);
  21. $value =~ s/\n\t/\n/g;
  22. $result{$key} = $value;
  23. }
  24. return %result;
  25. }
  26. sub main {
  27. my ($regexp, $PageDir, $RawDir) = @_;
  28. # include dotfiles!
  29. local $/ = undef; # Read complete files
  30. foreach my $file (glob("$PageDir/*.pg $PageDir/.*.pg")) {
  31. next unless $file =~ m|.*/(.+)\.pg$|;
  32. my $page = $1;
  33. next if $regexp && $page !~ m|$regexp|o;
  34. mkdir($RawDir) or die "Cannot create $RawDir directory: $!"
  35. unless -d $RawDir;
  36. open(F, $file) or die "Cannot read $page file: $!";
  37. my $data = <F>;
  38. close(F);
  39. my $ts = (stat("$RawDir/$page"))[9];
  40. my %result = ParseData($data);
  41. if ($ts && $ts == $result{ts}) {
  42. print "skipping $page because it is up to date\n" if $verbose;
  43. } else {
  44. print "writing $page because $ts != $result{ts}\n" if $verbose;
  45. open(F,"> $RawDir/$page") or die "Cannot write $page raw file: $!";
  46. print F $result{text};
  47. close(F);
  48. utime $result{ts}, $result{ts}, "$RawDir/$page"; # touch file
  49. }
  50. }
  51. }
  52. use Getopt::Long;
  53. my $regexp = undef;
  54. my $page = 'page';
  55. my $dir = 'raw';
  56. GetOptions ("regexp=s" => \$regexp,
  57. "page=s" => \$page,
  58. "dir=s" => \$dir,
  59. "help" => \$help);
  60. if ($help) {
  61. print qq{
  62. Usage: $0 [--regexp REGEXP] [--page DIR] [--dir DIR]
  63. Writes the raw wiki text into plain text files.
  64. --regexp selects a subsets of pages whose names match the regular
  65. expression. Note that spaces have been translated to underscores.
  66. --page designates the page directory. By default this is 'page' in the
  67. current directory. If you run this script in your data directory,
  68. the default should be fine.
  69. --dir designates an output directory. By default this is 'raw' in the
  70. current directory.
  71. Example: $0 --regexp '\\.el\$' --dir elisp
  72. }
  73. } else {
  74. main ($regexp, $page, $dir);
  75. }