MediaWiki REL1_33
cleanup.php
Go to the documentation of this file.
1<?php
9$IP = getenv( 'MW_INSTALL_PATH' );
10if ( $IP === false ) {
11 $IP = __DIR__ . '/../../..';
12}
13require_once "$IP/maintenance/Maintenance.php";
14
15class Cleanup extends Maintenance {
16 public function __construct() {
17 parent::__construct();
18 $this->requireExtension( 'SpamBlacklist' );
19 $this->addOption( 'dry-run', 'Only do a dry run' );
20 }
21
22 public function execute() {
23 $user = User::newSystemUser( 'Spam cleanup script', [ 'steal' => true ] );
24
26 $regexes = $sb->getBlacklists();
27 if ( !$regexes ) {
28 $this->fatalError( "Invalid regex, can't clean up spam" );
29 }
30 $dryRun = $this->hasOption( 'dry-run' );
31
33 $maxID = (int)$dbr->selectField( 'page', 'MAX(page_id)' );
34 $reportingInterval = 100;
35
36 $this->output( "Regexes are " . implode( ', ', array_map( 'count', $regexes ) ) . " bytes\n" );
37 $this->output( "Searching for spam in $maxID pages...\n" );
38 if ( $dryRun ) {
39 $this->output( "Dry run only\n" );
40 }
41
42 for ( $id = 1; $id <= $maxID; $id++ ) {
43 if ( $id % $reportingInterval == 0 ) {
44 printf( "%-8d %-5.2f%%\r", $id, $id / $maxID * 100 );
45 }
46 $revision = Revision::loadFromPageId( $dbr, $id );
47 if ( $revision ) {
48 $text = ContentHandler::getContentText( $revision->getContent() );
49 if ( $text ) {
50 foreach ( $regexes as $regex ) {
51 if ( preg_match( $regex, $text, $matches ) ) {
52 $title = $revision->getTitle();
53 $titleText = $title->getPrefixedText();
54 if ( $dryRun ) {
55 $this->output( "Found spam in [[$titleText]]\n" );
56 } else {
57 $this->output( "Cleaning up links to {$matches[0]} in [[$titleText]]\n" );
58 $match = str_replace( 'http://', '', $matches[0] );
59 $this->cleanupArticle( $revision, $regexes, $match, $user );
60 }
61 }
62 }
63 }
64 }
65 }
66 // Just for satisfaction
67 printf( "%-8d %-5.2f%%\n", $id - 1, ( $id - 1 ) / $maxID * 100 );
68 }
69
77 private function cleanupArticle( Revision $rev, $regexes, $match, User $user ) {
78 $title = $rev->getTitle();
79 while ( $rev ) {
80 $matches = false;
81 foreach ( $regexes as $regex ) {
83 || preg_match(
84 $regex,
85 ContentHandler::getContentText( $rev->getContent() )
86 );
87 }
88 if ( !$matches ) {
89 // Didn't find any spam
90 break;
91 }
92
93 $rev = $rev->getPrevious();
94 }
95 if ( !$rev ) {
96 // Didn't find a non-spammy revision, blank the page
97 $this->output( "All revisions are spam, blanking...\n" );
98 $text = '';
99 $comment = "All revisions matched the spam blacklist ($match), blanking";
100 } else {
101 // Revert to this revision
102 $text = ContentHandler::getContentText( $rev->getContent() );
103 $comment = "Cleaning up links to $match";
104 }
105 $wikiPage = new WikiPage( $title );
106 $wikiPage->doEditContent(
107 ContentHandler::makeContent( $text, $title ), $comment,
108 0, false, $user
109 );
110 }
111}
112
113$maintClass = Cleanup::class;
114require_once RUN_MAINTENANCE_IF_MAIN;
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
static getSpamBlacklist()
__construct()
Default constructor.
Definition cleanup.php:16
cleanupArticle(Revision $rev, $regexes, $match, User $user)
Find the latest revision of the article that does not contain spam and revert to it.
Definition cleanup.php:77
execute()
Do the actual work.
Definition cleanup.php:22
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
requireExtension( $name)
Indicate that the specified extension must be loaded before the script can run.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static loadFromPageId( $db, $pageid, $id=0)
Load either the current, or a specified, revision that's attached to a given page.
Definition Revision.php:260
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:48
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition User.php:813
Class representing a MediaWiki article and history.
Definition WikiPage.php:45
$IP
An aggressive spam cleanup script.
Definition cleanup.php:9
$maintClass
Definition cleanup.php:113
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition hooks.txt:1779
require_once RUN_MAINTENANCE_IF_MAIN
const DB_REPLICA
Definition defines.php:25