MediaWiki REL1_35
dumpIterator.php
Go to the documentation of this file.
1<?php
29require_once __DIR__ . '/Maintenance.php';
30
36abstract class DumpIterator extends Maintenance {
37 private $count = 0;
38 private $startTime;
40 private $from;
41
42 public function __construct() {
43 parent::__construct();
44 $this->addDescription( 'Does something with a dump' );
45 $this->addOption( 'file', 'File with text to run.', false, true );
46 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
47 $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
48 }
49
50 public function execute() {
51 if ( !( $this->hasOption( 'file' ) xor $this->hasOption( 'dump' ) ) ) {
52 $this->fatalError( "You must provide a file or dump" );
53 }
54
55 $this->checkOptions();
56
57 if ( $this->hasOption( 'file' ) ) {
58 $revision = new WikiRevision( $this->getConfig() );
59
60 $revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
61 $revision->setTitle( Title::newFromText(
62 rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) )
63 ) );
64 $this->from = false;
65 $this->handleRevision( $revision );
66
67 return;
68 }
69
70 $this->startTime = microtime( true );
71
72 if ( $this->getOption( 'dump' ) == '-' ) {
73 $source = new ImportStreamSource( $this->getStdin() );
74 } else {
75 $this->fatalError( "Sorry, I don't support dump filenames yet. "
76 . "Use - and provide it on stdin on the meantime." );
77 }
78 $importer = new WikiImporter( $source, $this->getConfig() );
79
80 $importer->setRevisionCallback(
81 [ $this, 'handleRevision' ] );
82 $importer->setNoticeCallback( function ( $msg, $params ) {
83 echo wfMessage( $msg, $params )->text() . "\n";
84 } );
85
86 $this->from = $this->getOption( 'from', null );
87 $this->count = 0;
88 $importer->doImport();
89
90 $this->conclusions();
91
92 $delta = microtime( true ) - $this->startTime;
93 $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
94 if ( $delta > 0 ) {
95 $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
96 }
97
98 # Perform the memory_get_peak_usage() when all the other data has been
99 # output so there's no damage if it dies. It is only available since
100 # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
101 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
102 }
103
104 public function finalSetup() {
105 parent::finalSetup();
106
107 if ( $this->getDbType() == Maintenance::DB_NONE ) {
110 $wgLocalisationCacheConf['storeClass'] = LCStoreNull::class;
111 $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
112 }
113 }
114
115 public static function disableInterwikis( $prefix, &$data ) {
116 # Title::newFromText will check on each namespaced article if it's an interwiki.
117 # We always answer that it is not.
118
119 return false;
120 }
121
127 public function handleRevision( $rev ) {
128 $title = $rev->getTitle();
129 if ( !$title ) {
130 $this->error( "Got bogus revision with null title!" );
131
132 return;
133 }
134
135 $this->count++;
136 if ( $this->from !== false ) {
137 if ( $this->from != $title ) {
138 return;
139 }
140 $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
141
142 $this->count = 1;
143 $this->from = null;
144 }
145
146 $this->processRevision( $rev );
147 }
148
152 public function checkOptions() {
153 }
154
158 public function conclusions() {
159 }
160
166 abstract public function processRevision( WikiRevision $rev );
167}
168
175
176 public function __construct() {
177 parent::__construct();
178 $this->addDescription( 'Runs a regex in the revisions from a dump' );
179 $this->addOption( 'regex', 'Searching regex', true, true );
180 }
181
182 public function getDbType() {
184 }
185
189 public function processRevision( WikiRevision $rev ) {
190 if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
191 $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
192 }
193 }
194}
195
196$maintClass = SearchDump::class;
197require_once RUN_MAINTENANCE_IF_MAIN;
$wgUseDatabaseMessages
Translation using MediaWiki: namespace.
$wgHooks
Global list of hooks.
$wgLocalisationCacheConf
Localisation cache configuration.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
const RUN_MAINTENANCE_IF_MAIN
Base class for interating over a dump.
static disableInterwikis( $prefix, &$data)
__construct()
Default constructor.
execute()
Do the actual work.
string bool null $from
processRevision(WikiRevision $rev)
Core function which does whatever the maintenance script is designed to do.
conclusions()
Stub function for giving data about what was computed.
checkOptions()
Stub function for processing additional options.
finalSetup()
Handle some last-minute setup here.
handleRevision( $rev)
Callback function for each revision, child classes should override processRevision instead.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
const DB_NONE
Constants for DB access type.
output( $out, $channel=null)
Throw some output to the user.
getStdin( $len=null)
Return input from stdin.
hasOption( $name)
Checks to see if a particular option was set.
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Maintenance script that runs a regex in the revisions from a dump.
__construct()
Default constructor.
processRevision(WikiRevision $rev)
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
XML file reader for the page data importer.
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
$maintClass
$source