MediaWiki master
cleanupSpam.php
Go to the documentation of this file.
1<?php
32
33// @codeCoverageIgnoreStart
34require_once __DIR__ . '/Maintenance.php';
35// @codeCoverageIgnoreEnd
36
42class CleanupSpam extends Maintenance {
43
44 public function __construct() {
45 parent::__construct();
46 $this->addDescription( 'Cleanup all spam from a given hostname' );
47 $this->addOption( 'all', 'Check all wikis in $wgLocalDatabases' );
48 $this->addOption( 'delete', 'Delete pages containing only spam instead of blanking them' );
49 $this->addArg(
50 'hostname',
51 'Hostname that was spamming, single * wildcard in the beginning allowed'
52 );
53 }
54
55 public function execute() {
56 global $IP, $wgLocalDatabases;
57
58 $username = wfMessage( 'spambot_username' )->text();
59 $user = User::newSystemUser( $username );
60 if ( !$user ) {
61 $this->fatalError( "Invalid username specified in 'spambot_username' message: $username" );
62 }
63 // Hack: Grant bot rights so we don't flood RecentChanges
64 $this->getServiceContainer()->getUserGroupManager()->addUserToGroup( $user, 'bot' );
65 StubGlobalUser::setUser( $user );
66
67 $spec = $this->getArg( 0 );
68
69 $protConds = [];
70 foreach ( [ 'http://', 'https://' ] as $prot ) {
71 $conds = LinkFilter::getQueryConditions( $spec, [ 'protocol' => $prot ] );
72 if ( !$conds ) {
73 $this->fatalError( "Not a valid hostname specification: $spec" );
74 }
75 $protConds[$prot] = $conds;
76 }
77
78 if ( $this->hasOption( 'all' ) ) {
79 // Clean up spam on all wikis
80 $this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" );
81 $found = false;
82 foreach ( $wgLocalDatabases as $wikiId ) {
84 $dbr = $this->getDB( DB_REPLICA, [], $wikiId );
85
86 foreach ( $protConds as $conds ) {
87 $count = $dbr->newSelectQueryBuilder()
88 ->select( 'COUNT(*)' )
89 ->from( 'externallinks' )
90 ->where( $conds )
91 ->caller( __METHOD__ )
92 ->fetchField();
93 if ( $count ) {
94 $found = true;
95 $cmd = wfShellWikiCmd(
96 "$IP/maintenance/cleanupSpam.php",
97 [ '--wiki', $wikiId, $spec ]
98 );
99 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.passthru
100 passthru( "$cmd | sed 's/^/$wikiId: /'" );
101 }
102 }
103 }
104 if ( $found ) {
105 $this->output( "All done\n" );
106 } else {
107 $this->output( "None found\n" );
108 }
109 } else {
110 // Clean up spam on this wiki
111
112 $count = 0;
114 $dbr = $this->getReplicaDB();
115 foreach ( $protConds as $prot => $conds ) {
116 $res = $dbr->newSelectQueryBuilder()
117 ->select( 'el_from' )
118 ->distinct()
119 ->from( 'externallinks' )
120 ->where( $conds )
121 ->caller( __METHOD__ )
122 ->fetchResultSet();
123 $count += $res->numRows();
124 $this->output( "Found $count articles containing $spec so far...\n" );
125 foreach ( $res as $row ) {
126 $this->cleanupArticle(
127 $row->el_from,
128 $spec,
129 $prot,
130 $user
131 );
132 }
133 }
134 if ( $count ) {
135 $this->output( "Done\n" );
136 }
137 }
138 }
139
146 private function cleanupArticle( $id, $domain, $protocol, Authority $performer ) {
147 $title = Title::newFromID( $id );
148 if ( !$title ) {
149 $this->error( "Internal error: no page for ID $id" );
150
151 return;
152 }
153
154 $this->output( $title->getPrefixedDBkey() . " ..." );
155
156 $services = $this->getServiceContainer();
157 $revLookup = $services->getRevisionLookup();
158 $rev = $revLookup->getRevisionByTitle( $title );
159 $currentRevId = $rev->getId();
160
161 while ( $rev && ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ||
162 LinkFilter::matchEntry(
163 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null
164 $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ),
165 $domain,
166 $protocol
167 ) )
168 ) {
169 $rev = $revLookup->getPreviousRevision( $rev );
170 }
171
172 if ( $rev && $rev->getId() == $currentRevId ) {
173 // The regex didn't match the current article text
174 // This happens e.g. when a link comes from a template rather than the page itself
175 $this->output( "False match\n" );
176 } else {
177 $dbw = $this->getPrimaryDB();
178 $this->beginTransaction( $dbw, __METHOD__ );
179 $page = $services->getWikiPageFactory()->newFromTitle( $title );
180 if ( $rev ) {
181 // Revert to this revision
182 $content = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
183
184 $this->output( "reverting\n" );
185 $page->doUserEditContent(
186 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null
187 $content,
188 $performer,
189 wfMessage( 'spam_reverting', $domain )->inContentLanguage()->text(),
191 $rev->getId()
192 );
193 } elseif ( $this->hasOption( 'delete' ) ) {
194 // Didn't find a non-spammy revision, blank the page
195 $this->output( "deleting\n" );
196 $deletePage = $services->getDeletePageFactory()->newDeletePage( $page, $performer );
197 $deletePage->deleteUnsafe( wfMessage( 'spam_deleting', $domain )->inContentLanguage()->text() );
198 } else {
199 // Didn't find a non-spammy revision, blank the page
200 $handler = $services->getContentHandlerFactory()
201 ->getContentHandler( $title->getContentModel() );
202 $content = $handler->makeEmptyContent();
203
204 $this->output( "blanking\n" );
205 $page->doUserEditContent(
206 $content,
207 $performer,
208 wfMessage( 'spam_blanking', $domain )->inContentLanguage()->text(),
210 );
211 }
212 $this->commitTransaction( $dbw, __METHOD__ );
213 }
214 }
215}
216
217// @codeCoverageIgnoreStart
218$maintClass = CleanupSpam::class;
219require_once RUN_MAINTENANCE_IF_MAIN;
220// @codeCoverageIgnoreEnd
getDB()
const EDIT_FORCE_BOT
Definition Defines.php:131
const EDIT_UPDATE
Definition Defines.php:128
wfShellWikiCmd( $script, array $parameters=[], array $options=[])
Generate a shell-escaped command line string to run a MediaWiki cli script.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined( 'MEDIAWIKI')) if(ini_get('mbstring.func_overload')) if(!defined( 'MW_ENTRY_POINT')) global $IP
Environment checks.
Definition Setup.php:102
Maintenance script to cleanup all spam from a given hostname.
__construct()
Default constructor.
execute()
Do the actual work.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Page revision base class.
Value object representing a content slot associated with a page revision.
Stub object for the global user ($wgUser) that makes it possible to change the relevant underlying ob...
Represents a title within MediaWiki.
Definition Title.php:78
internal since 1.36
Definition User.php:93
$maintClass
$wgLocalDatabases
Config variable stub for the LocalDatabases setting, for use by phpdoc and IDEs.
This interface represents the authority associated with the current execution context,...
Definition Authority.php:37
const DB_REPLICA
Definition defines.php:26