multi-url-spam-block.pl 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # Copyright (C) 2007–2015 Alex Schroeder <alex@gnu.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use v5.10;
  17. AddModuleDescription('multi-url-spam-block.pl', 'Multiple Link Ban Extension');
  18. our ($BannedContent, @MyInitVariables, %AdminPages, %PlainTextPages, $FullUrlPattern, $LocalNamesPage);
  19. *OldMultiUrlBannedContent = \&BannedContent;
  20. *BannedContent = \&NewMultiUrlBannedContent;
  21. our ($MultiUrlWhiteList, $MultiUrlLimit);
  22. $MultiUrlLimit = 10;
  23. $MultiUrlWhiteList = 'UrlWhitelist';
  24. push(@MyInitVariables, sub {
  25. $MultiUrlWhiteList = FreeToNormal($MultiUrlWhiteList);
  26. $AdminPages{$MultiUrlWhiteList} = 1;
  27. $PlainTextPages{$MultiUrlWhiteList} = 1;
  28. });
  29. sub NewMultiUrlBannedContent {
  30. my $str = shift;
  31. if (not $LocalNamesPage
  32. or GetParam('title', '') ne $LocalNamesPage) {
  33. my $rule = MultiUrlBannedContent($str);
  34. return $rule if $rule;
  35. }
  36. return OldMultiUrlBannedContent($str);
  37. }
  38. sub MultiUrlBannedContent {
  39. my $str = shift;
  40. my @urls = $str =~ /$FullUrlPattern/g;
  41. my %domains;
  42. my %whitelist;
  43. my $max = 0;
  44. my $label = '[a-z]([a-z0-9-]*[a-z0-9])?'; # RFC 1034
  45. foreach (split(/\n/, GetPageContent($MultiUrlWhiteList))) {
  46. next unless m/^\s*($label\.$label)/i;
  47. $whitelist{$1} = 1;
  48. }
  49. foreach my $url (@urls) {
  50. my @urlparts = split('/', $url, 4);
  51. my $domain = $urlparts[2];
  52. my @domainparts = split('\.', $domain);
  53. splice(@domainparts, 0, -2); # no subdomains
  54. $domain = join('.', @domainparts);
  55. next if $whitelist{$domain};
  56. $domains{$domain}++;
  57. $max = $domains{$domain} if $domains{$domain} > $max;
  58. }
  59. return Ts('You linked more than %s times to the same domain. It would seem that only a spammer would do this. Your edit is refused.', $MultiUrlLimit)
  60. if $max > $MultiUrlLimit;
  61. }