MediaWiki master
dumpIterator.php
Go to the documentation of this file.
1<?php
26
27// @codeCoverageIgnoreStart
28require_once __DIR__ . '/Maintenance.php';
29// @codeCoverageIgnoreEnd
30
36abstract class DumpIterator extends Maintenance {
38 private $count = 0;
40 private $startTime;
42 private $from;
43
44 public function __construct() {
45 parent::__construct();
46 $this->addDescription( 'Does something with a dump' );
47 $this->addOption( 'file', 'File with text to run.', false, true );
48 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
49 $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
50 }
51
52 public function execute() {
53 if ( !( $this->hasOption( 'file' ) xor $this->hasOption( 'dump' ) ) ) {
54 $this->fatalError( "You must provide a file or dump" );
55 }
56
57 $this->checkOptions();
58
59 if ( $this->hasOption( 'file' ) ) {
60 $file = $this->getOption( 'file' );
61 $revision = new WikiRevision();
62 $text = file_get_contents( $file );
63 $title = Title::newFromText( rawurldecode( basename( $file, '.txt' ) ) );
64 $revision->setTitle( $title );
65 $content = ContentHandler::makeContent( $text, $title );
66 $revision->setContent( SlotRecord::MAIN, $content );
67
68 $this->from = false;
69 $this->handleRevision( $revision );
70
71 return;
72 }
73
74 $this->startTime = microtime( true );
75
76 if ( $this->getOption( 'dump' ) == '-' ) {
77 $source = new ImportStreamSource( $this->getStdin() );
78 } else {
79 $this->fatalError( "Sorry, I don't support dump filenames yet. "
80 . "Use - and provide it on stdin on the meantime." );
81 }
82
83 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
84
85 $importer = $this->getServiceContainer()
86 ->getWikiImporterFactory()
87 ->getWikiImporter( $source, new UltimateAuthority( $user ) );
88
89 $importer->setRevisionCallback(
90 $this->handleRevision( ... ) );
91 $importer->setNoticeCallback( static function ( $msg, $params ) {
92 echo wfMessage( $msg, $params )->text() . "\n";
93 } );
94
95 $this->from = $this->getOption( 'from', null );
96 $this->count = 0;
97 $importer->doImport();
98
99 $this->conclusions();
100
101 $delta = microtime( true ) - $this->startTime;
102 $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
103 if ( $delta > 0 ) {
104 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
105 }
106
107 # Perform the memory_get_peak_usage() when all the other data has been
108 # output so there's no damage if it dies. It is only available since
109 # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
110 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
111 }
112
113 public function finalSetup( SettingsBuilder $settingsBuilder ) {
114 parent::finalSetup( $settingsBuilder );
115
116 if ( $this->getDbType() == Maintenance::DB_NONE ) {
117 // TODO: Allow hooks to be registered via SettingsBuilder as well!
118 // This matches the idea of unifying SettingsBuilder with ExtensionRegistry.
119 // phpcs:disable MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgHooks
120 global $wgHooks;
121 $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
122
123 $settingsBuilder->putConfigValues( [
124 MainConfigNames::UseDatabaseMessages => false,
125 MainConfigNames::LocalisationCacheConf => [ 'storeClass' => LCStoreNull::class ],
126 ] );
127 }
128 }
129
130 public static function disableInterwikis( string $prefix, array &$data ): bool {
131 # Title::newFromText will check on each namespaced article if it's an interwiki.
132 # We always answer that it is not.
133
134 return false;
135 }
136
142 public function handleRevision( $rev ) {
143 $title = $rev->getTitle();
144 if ( !$title ) {
145 $this->error( "Got bogus revision with null title!" );
146
147 return;
148 }
149
150 $this->count++;
151 if ( $this->from !== false ) {
152 if ( $this->from != $title ) {
153 return;
154 }
155 $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
156
157 $this->count = 1;
158 $this->from = null;
159 }
160
161 $this->processRevision( $rev );
162 }
163
167 public function checkOptions() {
168 }
169
173 public function conclusions() {
174 }
175
179 abstract public function processRevision( WikiRevision $rev );
180}
181
187class SearchDump extends DumpIterator {
188
189 public function __construct() {
190 parent::__construct();
191 $this->addDescription( 'Runs a regex in the revisions from a dump' );
192 $this->addOption( 'regex', 'Searching regex', true, true );
193 }
194
196 public function getDbType() {
197 return Maintenance::DB_NONE;
198 }
199
200 public function processRevision( WikiRevision $rev ) {
201 if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
202 $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
203 }
204 }
205}
206
207// @codeCoverageIgnoreStart
208$maintClass = SearchDump::class;
209require_once RUN_MAINTENANCE_IF_MAIN;
210// @codeCoverageIgnoreEnd
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
Base class for iterating over a dump.
__construct()
Default constructor.
execute()
Do the actual work.
conclusions()
Stub function for giving data about what was computed.
static disableInterwikis(string $prefix, array &$data)
checkOptions()
Stub function for processing additional options.
handleRevision( $rev)
Callback function for each revision, child classes should override processRevision instead.
finalSetup(SettingsBuilder $settingsBuilder)
Handle some last-minute setup here.
Base class for content handling.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Represents a revision, log entry or upload during the import process.
Null store backend, used to avoid DB errors during MediaWiki installation.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
getStdin( $len=null)
Return input from stdin.
addDescription( $text)
Set the description text.
Represents an authority that has all permissions.
Value object representing a content slot associated with a page revision.
Builder class for constructing a Config object from a set of sources during bootstrap.
putConfigValues(array $values)
Sets the value of multiple config variables.
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
$wgHooks
Config variable stub for the Hooks setting, for use by phpdoc and IDEs.
$source