despam.pl 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # Copyright (C) 2004, 2005, 2006, 2007 Alex Schroeder <alex@emacswiki.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use v5.10;
  17. our ($q, $Now, %IndexHash, %Action, %Page, $OpenPageName, $FS, $BannedContent, $RcFile, $RcDefault, @MyAdminCode, $FullUrlPattern, $DeletedPage, $StrangeBannedContent);
  18. AddModuleDescription('despam.pl', 'Despam Extension');
  19. push(@MyAdminCode, \&DespamMenu);
  20. sub DespamMenu {
  21. my ($id, $menuref, $restref) = @_;
  22. push(@$menuref, ScriptLink('action=spam', T('List spammed pages'), 'spam'));
  23. push(@$menuref, ScriptLink('action=despam', T('Despamming pages'), 'despam'));
  24. }
  25. my @DespamRules = ();
  26. my @DespamStrangeRules = ();
  27. sub DespamRule {
  28. $_ = shift;
  29. s/#.*//; # trim comments
  30. s/^\s+//; # trim leading whitespace
  31. s/\s+$//; # trim trailing whitespace
  32. return $_;
  33. }
  34. sub InitDespamRules {
  35. # read them only once
  36. @DespamRules = grep /./, map { DespamRule($_) }
  37. split(/\n/, GetPageContent($BannedContent));
  38. @DespamStrangeRules = grep /./, map { DespamRule($_) }
  39. split(/\n/, GetPageContent($StrangeBannedContent))
  40. if $IndexHash{$StrangeBannedContent};
  41. }
  42. $Action{despam} = \&DoDespam;
  43. sub DoDespam {
  44. RequestLockOrError();
  45. my $list = GetParam('list', 0);
  46. print GetHeader('', T('Despamming pages'), '') . '<div class="despam content"><p>';
  47. InitDespamRules();
  48. foreach my $id (DespamPages()) {
  49. next if $id eq $BannedContent or $id eq $StrangeBannedContent;
  50. OpenPage($id);
  51. my $rule = $list || DespamBannedContent($Page{text});
  52. print GetPageLink($id, NormalToFree($id));
  53. DespamPage($rule) if $rule and not $list;
  54. print $q->br();
  55. }
  56. print '</p></div>';
  57. PrintFooter();
  58. ReleaseLock();
  59. }
  60. $Action{spam} = \&DoSpam;
  61. sub DoSpam {
  62. print GetHeader('', T('Spammed pages'), '') . '<div class="spam content"><p>';
  63. InitDespamRules();
  64. foreach my $id (AllPagesList()) {
  65. next if $id eq $BannedContent or $id eq $StrangeBannedContent;
  66. OpenPage($id);
  67. my $rule = DespamBannedContent($Page{text});
  68. next unless $rule;
  69. print GetPageLink($id, NormalToFree($id)), ' ', $rule, $q->br();
  70. }
  71. print '</p></div>';
  72. PrintFooter();
  73. }
  74. # Based on BannedContent(), but with caching
  75. sub DespamBannedContent {
  76. my $str = shift;
  77. my @urls = $str =~ /$FullUrlPattern/g;
  78. foreach (@DespamRules) {
  79. my $regexp = $_;
  80. foreach my $url (@urls) {
  81. if ($url =~ /($regexp)/i) {
  82. return Tss('Rule "%1" matched "%2" on this page.',
  83. QuoteHtml($regexp), QuoteHtml($url));
  84. }
  85. }
  86. }
  87. # depends on strange-spam.pl!
  88. foreach (@DespamStrangeRules) {
  89. my $regexp = $_;
  90. if ($str =~ /($regexp)/i) {
  91. my $match = $1;
  92. $match =~ s/\n/ /g;
  93. return Tss('Rule "%1" matched "%2" on this page.',
  94. QuoteHtml($regexp), QuoteHtml($match));
  95. }
  96. }
  97. return 0;
  98. }
  99. sub DespamPages {
  100. # Assume that regular maintenance is happening and just read rc.log.
  101. # This is not optimized like DoRc().
  102. my $starttime = 0;
  103. $starttime = $Now - GetParam('days', $RcDefault) * 86400; # 24*60*60
  104. my $data = ReadFileOrDie($RcFile);
  105. my %files = (); # use a hash map to make it unique
  106. foreach my $line (split(/\n/, $data)) {
  107. my ($ts, $id) = split(/$FS/, $line);
  108. next if $ts < $starttime;
  109. $files{$id} = 1;
  110. }
  111. return keys %files;
  112. }
  113. sub DespamPage {
  114. my $rule = shift;
  115. # from DoHistory()
  116. my @revisions = sort {$b <=> $a} map { m|/([0-9]+).kp$|; $1; } GetKeepFiles($OpenPageName);
  117. foreach my $revision (@revisions) {
  118. my ($revisionPage, $rev) = GetTextRevision($revision, 1); # quiet
  119. if (not $rev) {
  120. print ': ' . Ts('Cannot find revision %s.', $revision);
  121. return;
  122. } elsif (not DespamBannedContent($revisionPage->{text})) {
  123. my $summary = Tss('Revert to revision %1: %2', $revision, $rule);
  124. print ': ' . $summary;
  125. Save($OpenPageName, $revisionPage->{text}, $summary) unless GetParam('debug', 0);
  126. return;
  127. }
  128. }
  129. if (grep(/^1$/, @revisions) or not @revisions) { # if there is no kept revision, yet
  130. my $summary = Ts($rule). ' ' . Ts('Marked as %s.', $DeletedPage);
  131. print ': ' . $summary;
  132. Save($OpenPageName, $DeletedPage, $summary) unless GetParam('debug', 0);
  133. } else {
  134. print ': ' . T('Cannot find unspammed revision.');
  135. }
  136. }