MediaWiki master
dumpIterator.php
Go to the documentation of this file.
1<?php
23
24// @codeCoverageIgnoreStart
25require_once __DIR__ . '/Maintenance.php';
26// @codeCoverageIgnoreEnd
27
33abstract class DumpIterator extends Maintenance {
35 private $count = 0;
37 private $startTime;
39 private $from;
40
41 public function __construct() {
42 parent::__construct();
43 $this->addDescription( 'Does something with a dump' );
44 $this->addOption( 'file', 'File with text to run.', false, true );
45 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
46 $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
47 }
48
49 public function execute() {
50 if ( !( $this->hasOption( 'file' ) xor $this->hasOption( 'dump' ) ) ) {
51 $this->fatalError( "You must provide a file or dump" );
52 }
53
54 $this->checkOptions();
55
56 if ( $this->hasOption( 'file' ) ) {
57 $file = $this->getOption( 'file' );
58 $revision = new WikiRevision();
59 $text = file_get_contents( $file );
60 $title = Title::newFromText( rawurldecode( basename( $file, '.txt' ) ) );
61 $revision->setTitle( $title );
62 $content = ContentHandler::makeContent( $text, $title );
63 $revision->setContent( SlotRecord::MAIN, $content );
64
65 $this->from = false;
66 $this->handleRevision( $revision );
67
68 return;
69 }
70
71 $this->startTime = microtime( true );
72
73 if ( $this->getOption( 'dump' ) == '-' ) {
74 $source = new ImportStreamSource( $this->getStdin() );
75 } else {
76 $this->fatalError( "Sorry, I don't support dump filenames yet. "
77 . "Use - and provide it on stdin on the meantime." );
78 }
79
80 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
81
82 $importer = $this->getServiceContainer()
83 ->getWikiImporterFactory()
84 ->getWikiImporter( $source, new UltimateAuthority( $user ) );
85
86 $importer->setRevisionCallback(
87 $this->handleRevision( ... ) );
88 $importer->setNoticeCallback( static function ( $msg, $params ) {
89 echo wfMessage( $msg, $params )->text() . "\n";
90 } );
91
92 $this->from = $this->getOption( 'from', null );
93 $this->count = 0;
94 $importer->doImport();
95
96 $this->conclusions();
97
98 $delta = microtime( true ) - $this->startTime;
99 $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
100 if ( $delta > 0 ) {
101 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
102 }
103
104 # Perform the memory_get_peak_usage() when all the other data has been
105 # output so there's no damage if it dies. It is only available since
106 # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
107 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
108 }
109
110 public function finalSetup( SettingsBuilder $settingsBuilder ) {
111 parent::finalSetup( $settingsBuilder );
112
113 if ( $this->getDbType() == Maintenance::DB_NONE ) {
114 // TODO: Allow hooks to be registered via SettingsBuilder as well!
115 // This matches the idea of unifying SettingsBuilder with ExtensionRegistry.
116 // phpcs:disable MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgHooks
117 global $wgHooks;
118 $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
119
120 $settingsBuilder->putConfigValues( [
121 MainConfigNames::UseDatabaseMessages => false,
122 MainConfigNames::LocalisationCacheConf => [ 'storeClass' => LCStoreNull::class ],
123 ] );
124 }
125 }
126
127 public static function disableInterwikis( string $prefix, array &$data ): bool {
128 # Title::newFromText will check on each namespaced article if it's an interwiki.
129 # We always answer that it is not.
130
131 return false;
132 }
133
139 public function handleRevision( $rev ) {
140 $title = $rev->getTitle();
141 if ( !$title ) {
142 $this->error( "Got bogus revision with null title!" );
143
144 return;
145 }
146
147 $this->count++;
148 if ( $this->from !== false ) {
149 if ( $this->from != $title ) {
150 return;
151 }
152 $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
153
154 $this->count = 1;
155 $this->from = null;
156 }
157
158 $this->processRevision( $rev );
159 }
160
164 public function checkOptions() {
165 }
166
170 public function conclusions() {
171 }
172
176 abstract public function processRevision( WikiRevision $rev );
177}
178
184class SearchDump extends DumpIterator {
185
186 public function __construct() {
187 parent::__construct();
188 $this->addDescription( 'Runs a regex in the revisions from a dump' );
189 $this->addOption( 'regex', 'Searching regex', true, true );
190 }
191
193 public function getDbType() {
194 return Maintenance::DB_NONE;
195 }
196
197 public function processRevision( WikiRevision $rev ) {
198 if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
199 $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
200 }
201 }
202}
203
204// @codeCoverageIgnoreStart
205$maintClass = SearchDump::class;
206require_once RUN_MAINTENANCE_IF_MAIN;
207// @codeCoverageIgnoreEnd
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
Base class for iterating over a dump.
__construct()
Default constructor.
execute()
Do the actual work.
conclusions()
Stub function for giving data about what was computed.
static disableInterwikis(string $prefix, array &$data)
checkOptions()
Stub function for processing additional options.
handleRevision( $rev)
Callback function for each revision, child classes should override processRevision instead.
finalSetup(SettingsBuilder $settingsBuilder)
Handle some last-minute setup here.
Base class for content handling.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
getStdin( $len=null)
Return input from stdin.
addDescription( $text)
Set the description text.
Represents an authority that has all permissions.
Value object representing a content slot associated with a page revision.
Builder class for constructing a Config object from a set of sources during bootstrap.
putConfigValues(array $values)
Sets the value of multiple config variables.
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
$wgHooks
Config variable stub for the Hooks setting, for use by phpdoc and IDEs.
$source