MediaWiki REL1_37
dumpIterator.php
Go to the documentation of this file.
1<?php
31
32require_once __DIR__ . '/Maintenance.php';
33
39abstract class DumpIterator extends Maintenance {
40 private $count = 0;
41 private $startTime;
43 private $from;
44
45 public function __construct() {
46 parent::__construct();
47 $this->addDescription( 'Does something with a dump' );
48 $this->addOption( 'file', 'File with text to run.', false, true );
49 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
50 $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
51 }
52
53 public function execute() {
54 if ( !( $this->hasOption( 'file' ) xor $this->hasOption( 'dump' ) ) ) {
55 $this->fatalError( "You must provide a file or dump" );
56 }
57
58 $this->checkOptions();
59
60 if ( $this->hasOption( 'file' ) ) {
61 $file = $this->getOption( 'file' );
62 $revision = new WikiRevision( $this->getConfig() );
63 $text = file_get_contents( $file );
64 $title = Title::newFromText( rawurldecode( basename( $file, '.txt' ) ) );
65 $revision->setTitle( $title );
66 $content = ContentHandler::makeContent( $text, $title );
67 $revision->setContent( SlotRecord::MAIN, $content );
68
69 $this->from = false;
70 $this->handleRevision( $revision );
71
72 return;
73 }
74
75 $this->startTime = microtime( true );
76
77 if ( $this->getOption( 'dump' ) == '-' ) {
78 $source = new ImportStreamSource( $this->getStdin() );
79 } else {
80 $this->fatalError( "Sorry, I don't support dump filenames yet. "
81 . "Use - and provide it on stdin on the meantime." );
82 }
83
84 $importer = MediaWikiServices::getInstance()
85 ->getWikiImporterFactory()
86 ->getWikiImporter( $source );
87
88 $importer->setRevisionCallback(
89 [ $this, 'handleRevision' ] );
90 $importer->setNoticeCallback( static function ( $msg, $params ) {
91 echo wfMessage( $msg, $params )->text() . "\n";
92 } );
93
94 $this->from = $this->getOption( 'from', null );
95 $this->count = 0;
96 $importer->doImport();
97
98 $this->conclusions();
99
100 $delta = microtime( true ) - $this->startTime;
101 $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
102 if ( $delta > 0 ) {
103 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
104 }
105
106 # Perform the memory_get_peak_usage() when all the other data has been
107 # output so there's no damage if it dies. It is only available since
108 # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
109 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
110 }
111
112 public function finalSetup() {
113 parent::finalSetup();
114
115 if ( $this->getDbType() == Maintenance::DB_NONE ) {
118 $wgLocalisationCacheConf['storeClass'] = LCStoreNull::class;
119 $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
120 }
121 }
122
123 public static function disableInterwikis( $prefix, &$data ) {
124 # Title::newFromText will check on each namespaced article if it's an interwiki.
125 # We always answer that it is not.
126
127 return false;
128 }
129
135 public function handleRevision( $rev ) {
136 $title = $rev->getTitle();
137 if ( !$title ) {
138 $this->error( "Got bogus revision with null title!" );
139
140 return;
141 }
142
143 $this->count++;
144 if ( $this->from !== false ) {
145 if ( $this->from != $title ) {
146 return;
147 }
148 $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
149
150 $this->count = 1;
151 $this->from = null;
152 }
153
154 $this->processRevision( $rev );
155 }
156
160 public function checkOptions() {
161 }
162
166 public function conclusions() {
167 }
168
174 abstract public function processRevision( WikiRevision $rev );
175}
176
183
184 public function __construct() {
185 parent::__construct();
186 $this->addDescription( 'Runs a regex in the revisions from a dump' );
187 $this->addOption( 'regex', 'Searching regex', true, true );
188 }
189
190 public function getDbType() {
192 }
193
197 public function processRevision( WikiRevision $rev ) {
198 if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
199 $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
200 }
201 }
202}
203
204$maintClass = SearchDump::class;
205require_once RUN_MAINTENANCE_IF_MAIN;
$wgUseDatabaseMessages
Translation using MediaWiki: namespace.
$wgHooks
Global list of hooks.
$wgLocalisationCacheConf
Localisation cache configuration.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Base class for interating over a dump.
static disableInterwikis( $prefix, &$data)
__construct()
Default constructor.
execute()
Do the actual work.
string bool null $from
processRevision(WikiRevision $rev)
Core function which does whatever the maintenance script is designed to do.
conclusions()
Stub function for giving data about what was computed.
checkOptions()
Stub function for processing additional options.
finalSetup()
Handle some last-minute setup here.
handleRevision( $rev)
Callback function for each revision, child classes should override processRevision instead.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
const DB_NONE
Constants for DB access type.
output( $out, $channel=null)
Throw some output to the user.
getStdin( $len=null)
Return input from stdin.
hasOption( $name)
Checks to see if a particular option was set.
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Value object representing a content slot associated with a page revision.
Maintenance script that runs a regex in the revisions from a dump.
__construct()
Default constructor.
processRevision(WikiRevision $rev)
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
$maintClass
$source
$content
Definition router.php:76
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42