Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 110 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
CleanupSpam | |
0.00% |
0 / 110 |
|
0.00% |
0 / 3 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 59 |
|
0.00% |
0 / 1 |
156 | |||
cleanupArticle | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | /** |
3 | * Cleanup all spam from a given hostname. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | use MediaWiki\ExternalLinks\LinkFilter; |
25 | use MediaWiki\Maintenance\Maintenance; |
26 | use MediaWiki\Permissions\Authority; |
27 | use MediaWiki\Revision\RevisionRecord; |
28 | use MediaWiki\Revision\SlotRecord; |
29 | use MediaWiki\StubObject\StubGlobalUser; |
30 | use MediaWiki\Title\Title; |
31 | use MediaWiki\User\User; |
32 | use Wikimedia\Rdbms\Database; |
33 | |
34 | // @codeCoverageIgnoreStart |
35 | require_once __DIR__ . '/Maintenance.php'; |
36 | // @codeCoverageIgnoreEnd |
37 | |
38 | /** |
39 | * Maintenance script to cleanup all spam from a given hostname. |
40 | * |
41 | * @ingroup Maintenance |
42 | */ |
43 | class CleanupSpam extends Maintenance { |
44 | |
45 | public function __construct() { |
46 | parent::__construct(); |
47 | $this->addDescription( 'Cleanup all spam from a given hostname' ); |
48 | $this->addOption( 'all', 'Check all wikis in $wgLocalDatabases' ); |
49 | $this->addOption( 'delete', 'Delete pages containing only spam instead of blanking them' ); |
50 | $this->addArg( |
51 | 'hostname', |
52 | 'Hostname that was spamming, single * wildcard in the beginning allowed' |
53 | ); |
54 | } |
55 | |
56 | public function execute() { |
57 | global $IP, $wgLocalDatabases; |
58 | |
59 | $username = wfMessage( 'spambot_username' )->text(); |
60 | $user = User::newSystemUser( $username ); |
61 | if ( !$user ) { |
62 | $this->fatalError( "Invalid username specified in 'spambot_username' message: $username" ); |
63 | } |
64 | // Hack: Grant bot rights so we don't flood RecentChanges |
65 | $this->getServiceContainer()->getUserGroupManager()->addUserToGroup( $user, 'bot' ); |
66 | StubGlobalUser::setUser( $user ); |
67 | |
68 | $spec = $this->getArg( 0 ); |
69 | |
70 | $protConds = []; |
71 | foreach ( [ 'http://', 'https://' ] as $prot ) { |
72 | $conds = LinkFilter::getQueryConditions( $spec, [ 'protocol' => $prot ] ); |
73 | if ( !$conds ) { |
74 | $this->fatalError( "Not a valid hostname specification: $spec" ); |
75 | } |
76 | $protConds[$prot] = $conds; |
77 | } |
78 | |
79 | if ( $this->hasOption( 'all' ) ) { |
80 | // Clean up spam on all wikis |
81 | $this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" ); |
82 | $found = false; |
83 | foreach ( $wgLocalDatabases as $wikiId ) { |
84 | /** @var Database $dbr */ |
85 | $dbr = $this->getDB( DB_REPLICA, [], $wikiId ); |
86 | |
87 | foreach ( $protConds as $conds ) { |
88 | $count = $dbr->newSelectQueryBuilder() |
89 | ->select( 'COUNT(*)' ) |
90 | ->from( 'externallinks' ) |
91 | ->where( $conds ) |
92 | ->caller( __METHOD__ ) |
93 | ->fetchField(); |
94 | if ( $count ) { |
95 | $found = true; |
96 | $cmd = wfShellWikiCmd( |
97 | "$IP/maintenance/cleanupSpam.php", |
98 | [ '--wiki', $wikiId, $spec ] |
99 | ); |
100 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.passthru |
101 | passthru( "$cmd | sed 's/^/$wikiId: /'" ); |
102 | } |
103 | } |
104 | } |
105 | if ( $found ) { |
106 | $this->output( "All done\n" ); |
107 | } else { |
108 | $this->output( "None found\n" ); |
109 | } |
110 | } else { |
111 | // Clean up spam on this wiki |
112 | |
113 | $count = 0; |
114 | /** @var Database $dbr */ |
115 | $dbr = $this->getReplicaDB(); |
116 | foreach ( $protConds as $prot => $conds ) { |
117 | $res = $dbr->newSelectQueryBuilder() |
118 | ->select( 'el_from' ) |
119 | ->distinct() |
120 | ->from( 'externallinks' ) |
121 | ->where( $conds ) |
122 | ->caller( __METHOD__ ) |
123 | ->fetchResultSet(); |
124 | $count += $res->numRows(); |
125 | $this->output( "Found $count articles containing $spec so far...\n" ); |
126 | foreach ( $res as $row ) { |
127 | $this->beginTransactionRound( __METHOD__ ); |
128 | $this->cleanupArticle( |
129 | $row->el_from, |
130 | $spec, |
131 | $prot, |
132 | $user |
133 | ); |
134 | $this->commitTransactionRound( __METHOD__ ); |
135 | } |
136 | } |
137 | if ( $count ) { |
138 | $this->output( "Done\n" ); |
139 | } |
140 | } |
141 | } |
142 | |
143 | /** |
144 | * @param int $id |
145 | * @param string $domain |
146 | * @param string $protocol |
147 | * @param Authority $performer |
148 | */ |
149 | private function cleanupArticle( $id, $domain, $protocol, Authority $performer ) { |
150 | $title = Title::newFromID( $id ); |
151 | if ( !$title ) { |
152 | $this->error( "Internal error: no page for ID $id" ); |
153 | |
154 | return; |
155 | } |
156 | |
157 | $this->output( $title->getPrefixedDBkey() . " ..." ); |
158 | |
159 | $services = $this->getServiceContainer(); |
160 | $revLookup = $services->getRevisionLookup(); |
161 | $rev = $revLookup->getRevisionByTitle( $title ); |
162 | $currentRevId = $rev->getId(); |
163 | |
164 | while ( $rev && ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) || |
165 | LinkFilter::matchEntry( |
166 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null |
167 | $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ), |
168 | $domain, |
169 | $protocol |
170 | ) ) |
171 | ) { |
172 | $rev = $revLookup->getPreviousRevision( $rev ); |
173 | } |
174 | |
175 | if ( $rev && $rev->getId() == $currentRevId ) { |
176 | // The regex didn't match the current article text |
177 | // This happens e.g. when a link comes from a template rather than the page itself |
178 | $this->output( "False match\n" ); |
179 | } else { |
180 | $page = $services->getWikiPageFactory()->newFromTitle( $title ); |
181 | if ( $rev ) { |
182 | // Revert to this revision |
183 | $content = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); |
184 | |
185 | $this->output( "reverting\n" ); |
186 | $page->doUserEditContent( |
187 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null |
188 | $content, |
189 | $performer, |
190 | wfMessage( 'spam_reverting', $domain )->inContentLanguage()->text(), |
191 | EDIT_UPDATE | EDIT_FORCE_BOT, |
192 | $rev->getId() |
193 | ); |
194 | } elseif ( $this->hasOption( 'delete' ) ) { |
195 | // Didn't find a non-spammy revision, blank the page |
196 | $this->output( "deleting\n" ); |
197 | $deletePage = $services->getDeletePageFactory()->newDeletePage( $page, $performer ); |
198 | $deletePage->deleteUnsafe( wfMessage( 'spam_deleting', $domain )->inContentLanguage()->text() ); |
199 | } else { |
200 | // Didn't find a non-spammy revision, blank the page |
201 | $handler = $services->getContentHandlerFactory() |
202 | ->getContentHandler( $title->getContentModel() ); |
203 | $content = $handler->makeEmptyContent(); |
204 | |
205 | $this->output( "blanking\n" ); |
206 | $page->doUserEditContent( |
207 | $content, |
208 | $performer, |
209 | wfMessage( 'spam_blanking', $domain )->inContentLanguage()->text(), |
210 | EDIT_UPDATE | EDIT_FORCE_BOT |
211 | ); |
212 | } |
213 | } |
214 | } |
215 | } |
216 | |
217 | // @codeCoverageIgnoreStart |
218 | $maintClass = CleanupSpam::class; |
219 | require_once RUN_MAINTENANCE_IF_MAIN; |
220 | // @codeCoverageIgnoreEnd |