Go to the documentation of this file.
9 $IP = getenv(
'MW_INSTALL_PATH' );
10 if (
$IP ===
false ) {
11 $IP = __DIR__ .
'/../../..';
13 require_once
"$IP/maintenance/Maintenance.php";
17 parent::__construct();
19 $this->
addOption(
'dry-run',
'Only do a dry run' );
26 $regexes = $sb->getBlacklists();
28 $this->
fatalError(
"Invalid regex, can't clean up spam" );
33 $maxID = (int)
$dbr->selectField(
'page',
'MAX(page_id)' );
34 $reportingInterval = 100;
36 $this->
output(
"Regexes are " . implode(
', ', array_map(
'count', $regexes ) ) .
" bytes\n" );
37 $this->
output(
"Searching for spam in $maxID pages...\n" );
39 $this->
output(
"Dry run only\n" );
42 for ( $id = 1; $id <= $maxID; $id++ ) {
43 if ( $id % $reportingInterval == 0 ) {
44 printf(
"%-8d %-5.2f%%\r", $id, $id / $maxID * 100 );
50 foreach ( $regexes as $regex ) {
51 if ( preg_match( $regex, $text,
$matches ) ) {
52 $title = $revision->getTitle();
53 $titleText =
$title->getPrefixedText();
55 $this->
output(
"Found spam in [[$titleText]]\n" );
57 $this->
output(
"Cleaning up links to {$matches[0]} in [[$titleText]]\n" );
58 $match = str_replace(
'http://',
'',
$matches[0] );
67 printf(
"%-8d %-5.2f%%\n", $id - 1, ( $id - 1 ) / $maxID * 100 );
81 foreach ( $regexes as $regex ) {
97 $this->
output(
"All revisions are spam, blanking...\n" );
99 $comment =
"All revisions matched the spam blacklist ($match), blanking";
103 $comment =
"Cleaning up links to $match";
106 $wikiPage->doEditContent(
static getSpamBlacklist()
const RUN_MAINTENANCE_IF_MAIN
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
getContent( $audience=self::FOR_PUBLIC, User $user=null)
Fetch revision content if it's available to the specified audience.
Class representing a MediaWiki article and history.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
static loadFromPageId( $db, $pageid, $id=0)
Load either the current, or a specified, revision that's attached to a given page.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
requireExtension( $name)
Indicate that the specified extension must be loaded before the script can run.
cleanupArticle(Revision $rev, $regexes, $match, User $user)
Find the latest revision of the article that does not contain spam and revert to it.
getPrevious()
Get previous revision for this title.
static makeContent( $text, Title $title=null, $modelId=null, $format=null)
Convenience function for creating a Content object from a given textual representation.
getTitle()
Returns the title of the page associated with this entry.
$IP
An aggressive spam cleanup script.
execute()
Do the actual work.
static getContentText(Content $content=null)
Convenience function for getting flat text from a Content object.
output( $out, $channel=null)
Throw some output to the user.
__construct()
Default constructor.
hasOption( $name)
Checks to see if a particular option exists.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...