stats.pl 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #! /usr/bin/perl -w
  2. # Copyright (C) 2005, 2007, 2021 Alex Schroeder <alex@gnu.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. use Modern::Perl;
  17. sub ParseData {
  18. my $data = shift;
  19. my %result;
  20. while ($data =~ /(\S+?): (.*?)(?=\n[^ \t]|\Z)/sg) {
  21. my ($key, $value) = ($1, $2);
  22. $value =~ s/\n\t/\n/g;
  23. $result{$key} = $value;
  24. }
  25. return %result;
  26. }
  27. sub main {
  28. my ($PageDir) = @_;
  29. my $pages = 0;
  30. my $texts = 0;
  31. my $redirects = 0;
  32. my $files = 0;
  33. my $big = 0;
  34. # include dotfiles!
  35. local $/ = undef; # Read complete files
  36. say "Reading files...";
  37. my @files = glob("$PageDir/*.pg $PageDir/.*.pg");
  38. my $n = @files;
  39. local $| = 1; # flush!
  40. foreach my $file (@files) {
  41. if (not --$n % 10) {
  42. printf("\r%06d files to go", $n);
  43. }
  44. next unless $file =~ m|.*/(.+)\.pg$|;
  45. my $page = $1;
  46. open(F, $file) or die "Cannot read $page file: $!";
  47. my $data = <F>;
  48. close(F);
  49. my %result = ParseData($data);
  50. $pages++;
  51. if ($result{text} =~ /^#FILE /) {
  52. $files++;
  53. } elsif ($result{text} =~ /^#REDIRECT /) {
  54. $redirects++;
  55. } else {
  56. $texts++;
  57. $big++ if length($result{text}) > 15000;
  58. }
  59. }
  60. printf("\r%06d files to go\n", 0);
  61. printf("Pages: %7d\n", $pages);
  62. printf("Files: %7d\n", $files);
  63. printf("Redirects: %6d\n", $redirects);
  64. printf("Texts: %7d\n", $texts);
  65. printf("Big: %7d\n", $big);
  66. }
  67. use Getopt::Long;
  68. my $regexp = undef;
  69. my $page = 'page';
  70. my $help;
  71. GetOptions ("page=s" => \$page,
  72. "help" => \$help);
  73. if ($help) {
  74. print qq{
  75. Usage: $0 [--page DIR]
  76. Prints some stats about the pages in DIR.
  77. --page designates the page directory. By default this is 'page' in the
  78. current directory. If you run this script in your data directory,
  79. the default should be fine.
  80. }
  81. } else {
  82. main ($page);
  83. }