MediaWiki  master
populateContentModel.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
28 
34  protected $wikiId;
36  protected $wanCache;
37 
38  public function __construct() {
39  parent::__construct();
40  $this->addDescription( 'Populate the various content_* fields' );
41  $this->addOption( 'ns', 'Namespace to run in, or "all" for all namespaces', true, true );
42  $this->addOption( 'table', 'Table to run in', true, true );
43  $this->setBatchSize( 100 );
44  }
45 
46  public function execute() {
47  $dbw = $this->getDB( DB_MASTER );
48 
49  $this->wikiId = $dbw->getDomainID();
50  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
51 
52  $ns = $this->getOption( 'ns' );
53  if ( !ctype_digit( $ns ) && $ns !== 'all' ) {
54  $this->fatalError( 'Invalid namespace' );
55  }
56  $ns = $ns === 'all' ? 'all' : (int)$ns;
57  $table = $this->getOption( 'table' );
58  switch ( $table ) {
59  case 'revision':
60  case 'archive':
61  $this->populateRevisionOrArchive( $dbw, $table, $ns );
62  break;
63  case 'page':
64  $this->populatePage( $dbw, $ns );
65  break;
66  default:
67  $this->fatalError( "Invalid table name: $table" );
68  }
69  }
70 
71  protected function clearCache( $page_id, $rev_id ) {
72  $contentModelKey = $this->wanCache->makeKey( 'page-content-model', $rev_id );
73  $revisionKey =
74  $this->wanCache->makeGlobalKey( 'revision', $this->wikiId, $page_id, $rev_id );
75 
76  // WikiPage content model cache
77  $this->wanCache->delete( $contentModelKey );
78 
79  // Revision object cache, which contains a content model
80  $this->wanCache->delete( $revisionKey );
81  }
82 
83  private function updatePageRows( IDatabase $dbw, $pageIds, $model ) {
84  $count = count( $pageIds );
85  $this->output( "Setting $count rows to $model..." );
86  $dbw->update(
87  'page',
88  [ 'page_content_model' => $model ],
89  [ 'page_id' => $pageIds ],
90  __METHOD__
91  );
93  $this->output( "done.\n" );
94  }
95 
96  protected function populatePage( IDatabase $dbw, $ns ) {
97  $toSave = [];
98  $lastId = 0;
99  $nsCondition = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
100  $batchSize = $this->getBatchSize();
101  do {
102  $rows = $dbw->select(
103  'page',
104  [ 'page_namespace', 'page_title', 'page_id' ],
105  [
106  'page_content_model' => null,
107  'page_id > ' . $dbw->addQuotes( $lastId ),
108  ] + $nsCondition,
109  __METHOD__,
110  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'page_id ASC' ]
111  );
112  $this->output( "Fetched {$rows->numRows()} rows.\n" );
113  foreach ( $rows as $row ) {
114  $title = Title::newFromRow( $row );
116  $toSave[$model][] = $row->page_id;
117  if ( count( $toSave[$model] ) >= $batchSize ) {
118  $this->updatePageRows( $dbw, $toSave[$model], $model );
119  unset( $toSave[$model] );
120  }
121  $lastId = $row->page_id;
122  }
123  } while ( $rows->numRows() >= $batchSize );
124  foreach ( $toSave as $model => $pages ) {
125  $this->updatePageRows( $dbw, $pages, $model );
126  }
127  }
128 
129  private function updateRevisionOrArchiveRows( IDatabase $dbw, $ids, $model, $table ) {
130  $prefix = $table === 'archive' ? 'ar' : 'rev';
131  $model_column = "{$prefix}_content_model";
132  $format_column = "{$prefix}_content_format";
133  $key = "{$prefix}_id";
134 
135  $count = count( $ids );
136  $format = MediaWikiServices::getInstance()
137  ->getContentHandlerFactory()
138  ->getContentHandler( $model )
139  ->getDefaultFormat();
140  $this->output( "Setting $count rows to $model / $format..." );
141  $dbw->update(
142  $table,
143  [ $model_column => $model, $format_column => $format ],
144  [ $key => $ids ],
145  __METHOD__
146  );
147 
148  $this->output( "done.\n" );
149  }
150 
151  protected function populateRevisionOrArchive( IDatabase $dbw, $table, $ns ) {
152  $prefix = $table === 'archive' ? 'ar' : 'rev';
153  $model_column = "{$prefix}_content_model";
154  $format_column = "{$prefix}_content_format";
155  $key = "{$prefix}_id";
156  if ( $table === 'archive' ) {
157  $selectTables = 'archive';
158  $fields = [ 'ar_namespace', 'ar_title' ];
159  $join_conds = [];
160  $where = $ns === 'all' ? [] : [ 'ar_namespace' => $ns ];
161  $page_id_column = 'ar_page_id';
162  $rev_id_column = 'ar_rev_id';
163  } else { // revision
164  $selectTables = [ 'revision', 'page' ];
165  $fields = [ 'page_title', 'page_namespace' ];
166  $join_conds = [ 'page' => [ 'JOIN', 'rev_page=page_id' ] ];
167  $where = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
168  $page_id_column = 'rev_page';
169  $rev_id_column = 'rev_id';
170  }
171 
172  $toSave = [];
173  $idsToClear = [];
174  $lastId = 0;
175  $batchSize = $this->getBatchSize();
176  do {
177  $rows = $dbw->select(
178  $selectTables,
179  array_merge(
180  $fields,
181  [ $model_column, $format_column, $key, $page_id_column, $rev_id_column ]
182  ),
183  // @todo support populating format if model is already set
184  [
185  $model_column => null,
186  "$key > " . $dbw->addQuotes( $lastId ),
187  ] + $where,
188  __METHOD__,
189  [ 'LIMIT' => $batchSize, 'ORDER BY' => "$key ASC" ],
190  $join_conds
191  );
192  $this->output( "Fetched {$rows->numRows()} rows.\n" );
193  foreach ( $rows as $row ) {
194  if ( $table === 'archive' ) {
195  $title = Title::makeTitle( $row->ar_namespace, $row->ar_title );
196  } else {
197  $title = Title::newFromRow( $row );
198  }
199  $lastId = $row->{$key};
200  try {
201  $handler = MediaWikiServices::getInstance()
202  ->getContentHandlerFactory()
203  ->getContentHandler( $title->getContentModel() );
204  } catch ( MWException $e ) {
205  $this->error( "Invalid content model for $title" );
206  continue;
207  }
208  $defaultModel = $handler->getModelID();
209  $defaultFormat = $handler->getDefaultFormat();
210  $dbModel = $row->{$model_column};
211  $dbFormat = $row->{$format_column};
212  $id = $row->{$key};
213  if ( $dbModel === null && $dbFormat === null ) {
214  // Set the defaults
215  $toSave[$defaultModel][] = $row->{$key};
216  $idsToClear[] = [
217  'page_id' => $row->{$page_id_column},
218  'rev_id' => $row->{$rev_id_column},
219  ];
220  } else { // $dbModel === null, $dbFormat set.
221  if ( $dbFormat === $defaultFormat ) {
222  $toSave[$defaultModel][] = $row->{$key};
223  $idsToClear[] = [
224  'page_id' => $row->{$page_id_column},
225  'rev_id' => $row->{$rev_id_column},
226  ];
227  } else { // non-default format, just update now
228  $this->output( "Updating model to match format for $table $id of $title... " );
229  $dbw->update(
230  $table,
231  [ $model_column => $defaultModel ],
232  [ $key => $id ],
233  __METHOD__
234  );
235  wfWaitForSlaves();
236  $this->clearCache( $row->{$page_id_column}, $row->{$rev_id_column} );
237  $this->output( "done.\n" );
238  continue;
239  }
240  }
241 
242  if ( count( $toSave[$defaultModel] ) >= $batchSize ) {
243  $this->updateRevisionOrArchiveRows( $dbw, $toSave[$defaultModel], $defaultModel, $table );
244  unset( $toSave[$defaultModel] );
245  }
246  }
247  } while ( $rows->numRows() >= $batchSize );
248  foreach ( $toSave as $model => $ids ) {
249  $this->updateRevisionOrArchiveRows( $dbw, $ids, $model, $table );
250  }
251 
252  foreach ( $idsToClear as $idPair ) {
253  $this->clearCache( $idPair['page_id'], $idPair['rev_id'] );
254  }
255  }
256 }
257 
258 $maintClass = PopulateContentModel::class;
259 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:41
PopulateContentModel\updatePageRows
updatePageRows(IDatabase $dbw, $pageIds, $model)
Definition: populateContentModel.php:83
PopulateContentModel\updateRevisionOrArchiveRows
updateRevisionOrArchiveRows(IDatabase $dbw, $ids, $model, $table)
Definition: populateContentModel.php:129
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:134
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition: Maintenance.php:510
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:354
PopulateContentModel\populatePage
populatePage(IDatabase $dbw, $ns)
Definition: populateContentModel.php:96
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:88
wfWaitForSlaves
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
Definition: GlobalFunctions.php:2707
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
PopulateContentModel\__construct
__construct()
Default constructor.
Definition: populateContentModel.php:38
PopulateContentModel\populateRevisionOrArchive
populateRevisionOrArchive(IDatabase $dbw, $table, $ns)
Definition: populateContentModel.php:151
ContentHandler\getDefaultModelFor
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
Definition: ContentHandler.php:188
MWException
MediaWiki exception.
Definition: MWException.php:26
$maintClass
$maintClass
Definition: populateContentModel.php:258
Title\newFromRow
static newFromRow( $row)
Make a Title object from a DB row.
Definition: Title.php:534
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:273
$title
$title
Definition: testCompression.php:36
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:602
DB_MASTER
const DB_MASTER
Definition: defines.php:26
PopulateContentModel\clearCache
clearCache( $page_id, $rev_id)
Definition: populateContentModel.php:71
PopulateContentModel
Usage: populateContentModel.php –ns=1 –table=page.
Definition: populateContentModel.php:33
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:116
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1409
Wikimedia\Rdbms\IDatabase\update
update( $table, $values, $conds, $fname=__METHOD__, $options=[])
UPDATE wrapper.
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:308
Wikimedia\Rdbms\IDatabase\addQuotes
addQuotes( $s)
Escape and quote a raw value string for use in a SQL query.
PopulateContentModel\execute
execute()
Do the actual work.
Definition: populateContentModel.php:46
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:392
PopulateContentModel\$wanCache
WANObjectCache $wanCache
Definition: populateContentModel.php:36
Wikimedia\Rdbms\IDatabase\select
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
PopulateContentModel\$wikiId
$wikiId
Definition: populateContentModel.php:34
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:487
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:459
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:400