9$IP = getenv(
'MW_INSTALL_PATH' );
11 $IP = __DIR__ .
'/../../..';
13require_once
"$IP/maintenance/Maintenance.php";
17 parent::__construct();
19 $this->
addOption(
'dry-run',
'Only do a dry run' );
26 $regexes = $sb->getBlacklists();
28 $this->
fatalError(
"Invalid regex, can't clean up spam" );
33 $maxID = (int)
$dbr->selectField(
'page',
'MAX(page_id)' );
34 $reportingInterval = 100;
36 $this->
output(
"Regexes are " . implode(
', ', array_map(
'count', $regexes ) ) .
" bytes\n" );
37 $this->
output(
"Searching for spam in $maxID pages...\n" );
39 $this->
output(
"Dry run only\n" );
42 for ( $id = 1; $id <= $maxID; $id++ ) {
43 if ( $id % $reportingInterval == 0 ) {
44 printf(
"%-8d %-5.2f%%\r", $id, $id / $maxID * 100 );
48 $text = ContentHandler::getContentText( $revision->getContent() );
50 foreach ( $regexes as $regex ) {
51 if ( preg_match( $regex, $text,
$matches ) ) {
52 $title = $revision->getTitle();
53 $titleText = $title->getPrefixedText();
55 $this->
output(
"Found spam in [[$titleText]]\n" );
57 $this->
output(
"Cleaning up links to {$matches[0]} in [[$titleText]]\n" );
58 $match = str_replace(
'http://',
'',
$matches[0] );
67 printf(
"%-8d %-5.2f%%\n", $id - 1, ( $id - 1 ) / $maxID * 100 );
78 $title =
$rev->getTitle();
81 foreach ( $regexes as $regex ) {
85 ContentHandler::getContentText(
$rev->getContent() )
97 $this->
output(
"All revisions are spam, blanking...\n" );
99 $comment =
"All revisions matched the spam blacklist ($match), blanking";
102 $text = ContentHandler::getContentText(
$rev->getContent() );
103 $comment =
"Cleaning up links to $match";
106 $wikiPage->doEditContent(
107 ContentHandler::makeContent( $text, $title ), $comment,
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
static getSpamBlacklist()
__construct()
Default constructor.
cleanupArticle(Revision $rev, $regexes, $match, User $user)
Find the latest revision of the article that does not contain spam and revert to it.
execute()
Do the actual work.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
requireExtension( $name)
Indicate that the specified extension must be loaded before the script can run.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static loadFromPageId( $db, $pageid, $id=0)
Load either the current, or a specified, revision that's attached to a given page.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Class representing a MediaWiki article and history.
$IP
An aggressive spam cleanup script.
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
require_once RUN_MAINTENANCE_IF_MAIN