Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 110
0.00% covered (danger)
0.00%
0 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
CleanupSpam
0.00% covered (danger)
0.00%
0 / 110
0.00% covered (danger)
0.00%
0 / 3
506
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 59
0.00% covered (danger)
0.00%
0 / 1
156
 cleanupArticle
0.00% covered (danger)
0.00%
0 / 43
0.00% covered (danger)
0.00%
0 / 1
90
1<?php
2/**
3 * Cleanup all spam from a given hostname.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24use MediaWiki\ExternalLinks\LinkFilter;
25use MediaWiki\Maintenance\Maintenance;
26use MediaWiki\Permissions\Authority;
27use MediaWiki\Revision\RevisionRecord;
28use MediaWiki\Revision\SlotRecord;
29use MediaWiki\StubObject\StubGlobalUser;
30use MediaWiki\Title\Title;
31use MediaWiki\User\User;
32use Wikimedia\Rdbms\Database;
33
34// @codeCoverageIgnoreStart
35require_once __DIR__ . '/Maintenance.php';
36// @codeCoverageIgnoreEnd
37
38/**
39 * Maintenance script to cleanup all spam from a given hostname.
40 *
41 * @ingroup Maintenance
42 */
43class CleanupSpam extends Maintenance {
44
45    public function __construct() {
46        parent::__construct();
47        $this->addDescription( 'Cleanup all spam from a given hostname' );
48        $this->addOption( 'all', 'Check all wikis in $wgLocalDatabases' );
49        $this->addOption( 'delete', 'Delete pages containing only spam instead of blanking them' );
50        $this->addArg(
51            'hostname',
52            'Hostname that was spamming, single * wildcard in the beginning allowed'
53        );
54    }
55
56    public function execute() {
57        global $IP, $wgLocalDatabases;
58
59        $username = wfMessage( 'spambot_username' )->text();
60        $user = User::newSystemUser( $username );
61        if ( !$user ) {
62            $this->fatalError( "Invalid username specified in 'spambot_username' message: $username" );
63        }
64        // Hack: Grant bot rights so we don't flood RecentChanges
65        $this->getServiceContainer()->getUserGroupManager()->addUserToGroup( $user, 'bot' );
66        StubGlobalUser::setUser( $user );
67
68        $spec = $this->getArg( 0 );
69
70        $protConds = [];
71        foreach ( [ 'http://', 'https://' ] as $prot ) {
72            $conds = LinkFilter::getQueryConditions( $spec, [ 'protocol' => $prot ] );
73            if ( !$conds ) {
74                $this->fatalError( "Not a valid hostname specification: $spec" );
75            }
76            $protConds[$prot] = $conds;
77        }
78
79        if ( $this->hasOption( 'all' ) ) {
80            // Clean up spam on all wikis
81            $this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" );
82            $found = false;
83            foreach ( $wgLocalDatabases as $wikiId ) {
84                /** @var Database $dbr */
85                $dbr = $this->getDB( DB_REPLICA, [], $wikiId );
86
87                foreach ( $protConds as $conds ) {
88                    $count = $dbr->newSelectQueryBuilder()
89                        ->select( 'COUNT(*)' )
90                        ->from( 'externallinks' )
91                        ->where( $conds )
92                        ->caller( __METHOD__ )
93                        ->fetchField();
94                    if ( $count ) {
95                        $found = true;
96                        $cmd = wfShellWikiCmd(
97                            "$IP/maintenance/cleanupSpam.php",
98                            [ '--wiki', $wikiId, $spec ]
99                        );
100                        // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.passthru
101                        passthru( "$cmd | sed 's/^/$wikiId:  /'" );
102                    }
103                }
104            }
105            if ( $found ) {
106                $this->output( "All done\n" );
107            } else {
108                $this->output( "None found\n" );
109            }
110        } else {
111            // Clean up spam on this wiki
112
113            $count = 0;
114            /** @var Database $dbr */
115            $dbr = $this->getReplicaDB();
116            foreach ( $protConds as $prot => $conds ) {
117                $res = $dbr->newSelectQueryBuilder()
118                    ->select( 'el_from' )
119                    ->distinct()
120                    ->from( 'externallinks' )
121                    ->where( $conds )
122                    ->caller( __METHOD__ )
123                    ->fetchResultSet();
124                $count += $res->numRows();
125                $this->output( "Found $count articles containing $spec so far...\n" );
126                foreach ( $res as $row ) {
127                    $this->beginTransactionRound( __METHOD__ );
128                    $this->cleanupArticle(
129                        $row->el_from,
130                        $spec,
131                        $prot,
132                        $user
133                    );
134                    $this->commitTransactionRound( __METHOD__ );
135                }
136            }
137            if ( $count ) {
138                $this->output( "Done\n" );
139            }
140        }
141    }
142
143    /**
144     * @param int $id
145     * @param string $domain
146     * @param string $protocol
147     * @param Authority $performer
148     */
149    private function cleanupArticle( $id, $domain, $protocol, Authority $performer ) {
150        $title = Title::newFromID( $id );
151        if ( !$title ) {
152            $this->error( "Internal error: no page for ID $id" );
153
154            return;
155        }
156
157        $this->output( $title->getPrefixedDBkey() . " ..." );
158
159        $services = $this->getServiceContainer();
160        $revLookup = $services->getRevisionLookup();
161        $rev = $revLookup->getRevisionByTitle( $title );
162        $currentRevId = $rev->getId();
163
164        while ( $rev && ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ||
165            LinkFilter::matchEntry(
166                // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null
167                $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ),
168                $domain,
169                $protocol
170            ) )
171        ) {
172            $rev = $revLookup->getPreviousRevision( $rev );
173        }
174
175        if ( $rev && $rev->getId() == $currentRevId ) {
176            // The regex didn't match the current article text
177            // This happens e.g. when a link comes from a template rather than the page itself
178            $this->output( "False match\n" );
179        } else {
180            $page = $services->getWikiPageFactory()->newFromTitle( $title );
181            if ( $rev ) {
182                // Revert to this revision
183                $content = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
184
185                $this->output( "reverting\n" );
186                $page->doUserEditContent(
187                    // @phan-suppress-next-line PhanTypeMismatchArgumentNullable RAW never returns null
188                    $content,
189                    $performer,
190                    wfMessage( 'spam_reverting', $domain )->inContentLanguage()->text(),
191                    EDIT_UPDATE | EDIT_FORCE_BOT,
192                    $rev->getId()
193                );
194            } elseif ( $this->hasOption( 'delete' ) ) {
195                // Didn't find a non-spammy revision, blank the page
196                $this->output( "deleting\n" );
197                $deletePage = $services->getDeletePageFactory()->newDeletePage( $page, $performer );
198                $deletePage->deleteUnsafe( wfMessage( 'spam_deleting', $domain )->inContentLanguage()->text() );
199            } else {
200                // Didn't find a non-spammy revision, blank the page
201                $handler = $services->getContentHandlerFactory()
202                    ->getContentHandler( $title->getContentModel() );
203                $content = $handler->makeEmptyContent();
204
205                $this->output( "blanking\n" );
206                $page->doUserEditContent(
207                    $content,
208                    $performer,
209                    wfMessage( 'spam_blanking', $domain )->inContentLanguage()->text(),
210                    EDIT_UPDATE | EDIT_FORCE_BOT
211                );
212            }
213        }
214    }
215}
216
217// @codeCoverageIgnoreStart
218$maintClass = CleanupSpam::class;
219require_once RUN_MAINTENANCE_IF_MAIN;
220// @codeCoverageIgnoreEnd