MediaWiki  master
cleanupSpam.php
Go to the documentation of this file.
1 <?php
25 
26 require_once __DIR__ . '/Maintenance.php';
27 
33 class CleanupSpam extends Maintenance {
34 
35  public function __construct() {
36  parent::__construct();
37  $this->addDescription( 'Cleanup all spam from a given hostname' );
38  $this->addOption( 'all', 'Check all wikis in $wgLocalDatabases' );
39  $this->addOption( 'delete', 'Delete pages containing only spam instead of blanking them' );
40  $this->addArg(
41  'hostname',
42  'Hostname that was spamming, single * wildcard in the beginning allowed'
43  );
44  }
45 
46  public function execute() {
47  global $IP, $wgLocalDatabases, $wgUser;
48 
49  $username = wfMessage( 'spambot_username' )->text();
50  $wgUser = User::newSystemUser( $username );
51  if ( !$wgUser ) {
52  $this->fatalError( "Invalid username specified in 'spambot_username' message: $username" );
53  }
54  // Hack: Grant bot rights so we don't flood RecentChanges
55  $wgUser->addGroup( 'bot' );
56 
57  $spec = $this->getArg( 0 );
58 
59  $protConds = [];
60  foreach ( [ 'http://', 'https://' ] as $prot ) {
61  $conds = LinkFilter::getQueryConditions( $spec, [ 'protocol' => $prot ] );
62  if ( !$conds ) {
63  $this->fatalError( "Not a valid hostname specification: $spec" );
64  }
65  $protConds[$prot] = $conds;
66  }
67 
68  if ( $this->hasOption( 'all' ) ) {
69  // Clean up spam on all wikis
70  $this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" );
71  $found = false;
72  foreach ( $wgLocalDatabases as $wikiId ) {
74  $dbr = $this->getDB( DB_REPLICA, [], $wikiId );
75 
76  foreach ( $protConds as $conds ) {
77  $count = $dbr->selectField(
78  'externallinks',
79  'COUNT(*)',
80  $conds,
81  __METHOD__
82  );
83  if ( $count ) {
84  $found = true;
85  $cmd = wfShellWikiCmd(
86  "$IP/maintenance/cleanupSpam.php",
87  [ '--wiki', $wikiId, $spec ]
88  );
89  passthru( "$cmd | sed 's/^/$wikiId: /'" );
90  }
91  }
92  }
93  if ( $found ) {
94  $this->output( "All done\n" );
95  } else {
96  $this->output( "None found\n" );
97  }
98  } else {
99  // Clean up spam on this wiki
100 
101  $count = 0;
103  $dbr = $this->getDB( DB_REPLICA );
104  foreach ( $protConds as $prot => $conds ) {
105  $res = $dbr->select(
106  'externallinks',
107  [ 'DISTINCT el_from' ],
108  $conds,
109  __METHOD__
110  );
111  $count = $dbr->numRows( $res );
112  $this->output( "Found $count articles containing $spec\n" );
113  foreach ( $res as $row ) {
114  $this->cleanupArticle( $row->el_from, $spec, $prot );
115  }
116  }
117  if ( $count ) {
118  $this->output( "Done\n" );
119  }
120  }
121  }
122 
129  private function cleanupArticle( $id, $domain, $protocol ) {
130  $title = Title::newFromID( $id );
131  if ( !$title ) {
132  $this->error( "Internal error: no page for ID $id" );
133 
134  return;
135  }
136 
137  $this->output( $title->getPrefixedDBkey() . " ..." );
138  $rev = Revision::newFromTitle( $title );
139  $currentRevId = $rev->getId();
140 
141  while ( $rev && ( $rev->isDeleted( RevisionRecord::DELETED_TEXT )
142  || LinkFilter::matchEntry( $rev->getContent( RevisionRecord::RAW ), $domain, $protocol ) )
143  ) {
144  $rev = $rev->getPrevious();
145  }
146 
147  if ( $rev && $rev->getId() == $currentRevId ) {
148  // The regex didn't match the current article text
149  // This happens e.g. when a link comes from a template rather than the page itself
150  $this->output( "False match\n" );
151  } else {
152  $dbw = $this->getDB( DB_MASTER );
153  $this->beginTransaction( $dbw, __METHOD__ );
154  $page = WikiPage::factory( $title );
155  if ( $rev ) {
156  // Revert to this revision
157  $content = $rev->getContent( RevisionRecord::RAW );
158 
159  $this->output( "reverting\n" );
160  $page->doEditContent(
161  $content,
162  wfMessage( 'spam_reverting', $domain )->inContentLanguage()->text(),
164  $rev->getId()
165  );
166  } elseif ( $this->hasOption( 'delete' ) ) {
167  // Didn't find a non-spammy revision, blank the page
168  $this->output( "deleting\n" );
169  $page->doDeleteArticle(
170  wfMessage( 'spam_deleting', $domain )->inContentLanguage()->text()
171  );
172  } else {
173  // Didn't find a non-spammy revision, blank the page
174  $handler = ContentHandler::getForTitle( $title );
175  $content = $handler->makeEmptyContent();
176 
177  $this->output( "blanking\n" );
178  $page->doEditContent(
179  $content,
180  wfMessage( 'spam_blanking', $domain )->inContentLanguage()->text(),
182  );
183  }
184  $this->commitTransaction( $dbw, __METHOD__ );
185  }
186  }
187 }
188 
189 $maintClass = CleanupSpam::class;
190 require_once RUN_MAINTENANCE_IF_MAIN;
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:142
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
getArg( $argId=0, $default=null)
Get an argument.
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
static newFromID( $id, $flags=0)
Create a new Title from an article ID.
Definition: Title.php:467
error( $err, $die=0)
Throw an error to the user.
$IP
Definition: WebStart.php:41
wfShellWikiCmd( $script, array $parameters=[], array $options=[])
Generate a shell-escaped command line string to run a MediaWiki cli script.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:86
cleanupArticle( $id, $domain, $protocol)
hasOption( $name)
Checks to see if a particular option exists.
const EDIT_UPDATE
Definition: Defines.php:133
Maintenance script to cleanup all spam from a given hostname.
Definition: cleanupSpam.php:33
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:138
const DB_MASTER
Definition: defines.php:26
$maintClass
const EDIT_FORCE_BOT
Definition: Defines.php:136
addDescription( $text)
Set the description text.
addArg( $arg, $description, $required=true)
Add some args that are needed.
static matchEntry(Content $content, $filterEntry, $protocol='http://')
Check whether $content contains a link to $filterEntry.
Definition: LinkFilter.php:49
output( $out, $channel=null)
Throw some output to the user.
static getForTitle(Title $title)
Returns the appropriate ContentHandler singleton for the given title.
static getQueryConditions( $filterEntry, array $options=[])
Return query conditions which will match the specified string.
Definition: LinkFilter.php:254
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const DB_REPLICA
Definition: defines.php:25
$content
Definition: router.php:78
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
string [] $wgLocalDatabases
Other wikis on this site, can be administered from a single developer account.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:737
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.