despam.pl 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # Copyright (C) 2004, 2005, 2006, 2007 Alex Schroeder <alex@emacswiki.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the
  15. # Free Software Foundation, Inc.
  16. # 59 Temple Place, Suite 330
  17. # Boston, MA 02111-1307 USA
  18. use strict;
  19. use v5.10;
  20. our ($q, $Now, %IndexHash, %Action, %Page, $OpenPageName, $FS, $BannedContent, $RcFile, $RcDefault, @MyAdminCode, $FullUrlPattern, $DeletedPage, $StrangeBannedContent);
  21. AddModuleDescription('despam.pl', 'Despam Extension');
  22. push(@MyAdminCode, \&DespamMenu);
  23. sub DespamMenu {
  24. my ($id, $menuref, $restref) = @_;
  25. push(@$menuref, ScriptLink('action=spam', T('List spammed pages'), 'spam'));
  26. push(@$menuref, ScriptLink('action=despam', T('Despamming pages'), 'despam'));
  27. }
  28. my @DespamRules = ();
  29. my @DespamStrangeRules = ();
  30. sub DespamRule {
  31. $_ = shift;
  32. s/#.*//; # trim comments
  33. s/^\s+//; # trim leading whitespace
  34. s/\s+$//; # trim trailing whitespace
  35. return $_;
  36. }
  37. sub InitDespamRules {
  38. # read them only once
  39. @DespamRules = grep /./, map { DespamRule($_) }
  40. split(/\n/, GetPageContent($BannedContent));
  41. @DespamStrangeRules = grep /./, map { DespamRule($_) }
  42. split(/\n/, GetPageContent($StrangeBannedContent))
  43. if $IndexHash{$StrangeBannedContent};
  44. }
  45. $Action{despam} = \&DoDespam;
  46. sub DoDespam {
  47. RequestLockOrError();
  48. my $list = GetParam('list', 0);
  49. print GetHeader('', T('Despamming pages'), '') . '<div class="despam content"><p>';
  50. InitDespamRules();
  51. foreach my $id (DespamPages()) {
  52. next if $id eq $BannedContent or $id eq $StrangeBannedContent;
  53. OpenPage($id);
  54. my $rule = $list || DespamBannedContent($Page{text});
  55. print GetPageLink($id, NormalToFree($id));
  56. DespamPage($rule) if $rule and not $list;
  57. print $q->br();
  58. }
  59. print '</p></div>';
  60. PrintFooter();
  61. ReleaseLock();
  62. }
  63. $Action{spam} = \&DoSpam;
  64. sub DoSpam {
  65. print GetHeader('', T('Spammed pages'), '') . '<div class="spam content"><p>';
  66. InitDespamRules();
  67. foreach my $id (AllPagesList()) {
  68. next if $id eq $BannedContent or $id eq $StrangeBannedContent;
  69. OpenPage($id);
  70. my $rule = DespamBannedContent($Page{text});
  71. next unless $rule;
  72. print GetPageLink($id, NormalToFree($id)), ' ', $rule, $q->br();
  73. }
  74. print '</p></div>';
  75. PrintFooter();
  76. }
  77. # Based on BannedContent(), but with caching
  78. sub DespamBannedContent {
  79. my $str = shift;
  80. my @urls = $str =~ /$FullUrlPattern/g;
  81. foreach (@DespamRules) {
  82. my $regexp = $_;
  83. foreach my $url (@urls) {
  84. if ($url =~ /($regexp)/i) {
  85. return Tss('Rule "%1" matched "%2" on this page.',
  86. QuoteHtml($regexp), QuoteHtml($url));
  87. }
  88. }
  89. }
  90. # depends on strange-spam.pl!
  91. foreach (@DespamStrangeRules) {
  92. my $regexp = $_;
  93. if ($str =~ /($regexp)/i) {
  94. my $match = $1;
  95. $match =~ s/\n/ /g;
  96. return Tss('Rule "%1" matched "%2" on this page.',
  97. QuoteHtml($regexp), QuoteHtml($match));
  98. }
  99. }
  100. return 0;
  101. }
  102. sub DespamPages {
  103. # Assume that regular maintenance is happening and just read rc.log.
  104. # This is not optimized like DoRc().
  105. my $starttime = 0;
  106. $starttime = $Now - GetParam('days', $RcDefault) * 86400; # 24*60*60
  107. my $data = ReadFileOrDie($RcFile);
  108. my %files = (); # use a hash map to make it unique
  109. foreach my $line (split(/\n/, $data)) {
  110. my ($ts, $id) = split(/$FS/, $line);
  111. next if $ts < $starttime;
  112. $files{$id} = 1;
  113. }
  114. return keys %files;
  115. }
  116. sub DespamPage {
  117. my $rule = shift;
  118. # from DoHistory()
  119. my @revisions = sort {$b <=> $a} map { m|/([0-9]+).kp$|; $1; } GetKeepFiles($OpenPageName);
  120. foreach my $revision (@revisions) {
  121. my ($revisionPage, $rev) = GetTextRevision($revision, 1); # quiet
  122. if (not $rev) {
  123. print ': ' . Ts('Cannot find revision %s.', $revision);
  124. return;
  125. } elsif (not DespamBannedContent($revisionPage->{text})) {
  126. my $summary = Tss('Revert to revision %1: %2', $revision, $rule);
  127. print ': ' . $summary;
  128. Save($OpenPageName, $revisionPage->{text}, $summary) unless GetParam('debug', 0);
  129. return;
  130. }
  131. }
  132. if (grep(/^1$/, @revisions) or not @revisions) { # if there is no kept revision, yet
  133. my $summary = Ts($rule). ' ' . Ts('Marked as %s.', $DeletedPage);
  134. print ': ' . $summary;
  135. Save($OpenPageName, $DeletedPage, $summary) unless GetParam('debug', 0);
  136. } else {
  137. print ': ' . T('Cannot find unspammed revision.');
  138. }
  139. }