MediaWiki  1.23.0
dumpIterator.php
Go to the documentation of this file.
1 <?php
29 require_once __DIR__ . '/Maintenance.php';
30 
36 abstract class DumpIterator extends Maintenance {
37 
38  private $count = 0;
39  private $startTime;
40 
41  public function __construct() {
42  parent::__construct();
43  $this->mDescription = "Does something with a dump";
44  $this->addOption( 'file', 'File with text to run.', false, true );
45  $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
46  $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
47  }
48 
49  public function execute() {
50  if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) {
51  $this->error( "You must provide a file or dump", true );
52  }
53 
54  $this->checkOptions();
55 
56  if ( $this->hasOption( 'file' ) ) {
57  $revision = new WikiRevision;
58 
59  $revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
60  $revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) );
61  $this->handleRevision( $revision );
62  return;
63  }
64 
65  $this->startTime = microtime( true );
66 
67  if ( $this->getOption( 'dump' ) == '-' ) {
68  $source = new ImportStreamSource( $this->getStdin() );
69  } else {
70  $this->error( "Sorry, I don't support dump filenames yet. Use - and provide it on stdin on the meantime.", true );
71  }
72  $importer = new WikiImporter( $source );
73 
74  $importer->setRevisionCallback(
75  array( &$this, 'handleRevision' ) );
76 
77  $this->from = $this->getOption( 'from', null );
78  $this->count = 0;
79  $importer->doImport();
80 
81  $this->conclusions();
82 
83  $delta = microtime( true ) - $this->startTime;
84  $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
85  if ( $delta > 0 ) {
86  $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
87  }
88 
89  # Perform the memory_get_peak_usage() when all the other data has been output so there's no damage if it dies.
90  # It is only available since 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
91  $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
92  }
93 
94  public function finalSetup() {
95  parent::finalSetup();
96 
97  if ( $this->getDbType() == Maintenance::DB_NONE ) {
98  global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks;
99  $wgUseDatabaseMessages = false;
100  $wgLocalisationCacheConf['storeClass'] = 'LCStoreNull';
101  $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
102  }
103  }
104 
105  static function disableInterwikis( $prefix, &$data ) {
106  # Title::newFromText will check on each namespaced article if it's an interwiki.
107  # We always answer that it is not.
108 
109  return false;
110  }
111 
117  public function handleRevision( $rev ) {
118  $title = $rev->getTitle();
119  if ( !$title ) {
120  $this->error( "Got bogus revision with null title!" );
121  return;
122  }
123 
124  $this->count++;
125  if ( isset( $this->from ) ) {
126  if ( $this->from != $title ) {
127  return;
128  }
129  $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
130 
131  $this->count = 1;
132  $this->from = null;
133  }
134 
135  $this->processRevision( $rev );
136  }
137 
138  /* Stub function for processing additional options */
139  public function checkOptions() {
140  return;
141  }
142 
143  /* Stub function for giving data about what was computed */
144  public function conclusions() {
145  return;
146  }
147 
148  /* Core function which does whatever the maintenance script is designed to do */
149  abstract public function processRevision( $rev );
150 }
151 
157 class SearchDump extends DumpIterator {
158 
159  public function __construct() {
160  parent::__construct();
161  $this->mDescription = "Runs a regex in the revisions from a dump";
162  $this->addOption( 'regex', 'Searching regex', true, true );
163  }
164 
165  public function getDbType() {
166  return Maintenance::DB_NONE;
167  }
168 
172  public function processRevision( $rev ) {
173  if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
174  $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
175  }
176  }
177 }
178 
179 $maintClass = "SearchDump";
180 require_once RUN_MAINTENANCE_IF_MAIN;
WikiImporter
XML file reader for the page data importer.
Definition: Import.php:33
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:189
DumpIterator\__construct
__construct()
Default constructor.
Definition: dumpIterator.php:41
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
Maintenance\getStdin
getStdin( $len=null)
Return input from stdin.
Definition: Maintenance.php:287
Maintenance\getDbType
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
Definition: Maintenance.php:399
DumpIterator\handleRevision
handleRevision( $rev)
Callback function for each revision, child classes should override processRevision instead.
Definition: dumpIterator.php:117
SearchDump\getDbType
getDbType()
Does the script need different DB access? By default, we give Maintenance scripts normal rights to th...
Definition: dumpIterator.php:165
SearchDump\processRevision
processRevision( $rev)
Definition: dumpIterator.php:172
$maintClass
$maintClass
Definition: dumpIterator.php:179
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false)
Add a parameter to the script.
Definition: Maintenance.php:169
WikiRevision\setText
setText( $text)
Definition: Import.php:1123
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$wgHooks
$wgHooks['ArticleShow'][]
Definition: hooks.txt:110
ImportStreamSource
Definition: Import.php:1630
DumpIterator\execute
execute()
Do the actual work.
Definition: dumpIterator.php:49
DumpIterator\$startTime
$startTime
Definition: dumpIterator.php:39
DumpIterator\processRevision
processRevision( $rev)
SearchDump
Maintenance script that runs a regex in the revisions from a dump.
Definition: dumpIterator.php:157
DumpIterator\$count
$count
Definition: dumpIterator.php:38
SearchDump\__construct
__construct()
Default constructor.
Definition: dumpIterator.php:159
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DumpIterator\checkOptions
checkOptions()
Definition: dumpIterator.php:139
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
DumpIterator\disableInterwikis
static disableInterwikis( $prefix, &$data)
Definition: dumpIterator.php:105
Maintenance\DB_NONE
const DB_NONE
Constants for DB access type.
Definition: Maintenance.php:57
DumpIterator
Base class for interating over a dump.
Definition: dumpIterator.php:36
$rev
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1337
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:191
WikiRevision
Definition: Import.php:1037
$source
if(PHP_SAPI !='cli') $source
Definition: mwdoc-filter.php:18
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:333
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:314
DumpIterator\finalSetup
finalSetup()
Handle some last-minute setup here.
Definition: dumpIterator.php:94
from
Please log in again after you receive it</td >< td > s a saved copy from
Definition: All_system_messages.txt:3297
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:181
DumpIterator\conclusions
conclusions()
Definition: dumpIterator.php:144