MediaWiki  1.34.0
populateContentModel.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
28 
34  protected $wikiId;
36  protected $wanCache;
37 
38  public function __construct() {
39  parent::__construct();
40  $this->addDescription( 'Populate the various content_* fields' );
41  $this->addOption( 'ns', 'Namespace to run in, or "all" for all namespaces', true, true );
42  $this->addOption( 'table', 'Table to run in', true, true );
43  $this->setBatchSize( 100 );
44  }
45 
46  public function execute() {
47  $dbw = $this->getDB( DB_MASTER );
48 
49  $this->wikiId = $dbw->getDomainID();
50  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
51 
52  $ns = $this->getOption( 'ns' );
53  if ( !ctype_digit( $ns ) && $ns !== 'all' ) {
54  $this->fatalError( 'Invalid namespace' );
55  }
56  $ns = $ns === 'all' ? 'all' : (int)$ns;
57  $table = $this->getOption( 'table' );
58  switch ( $table ) {
59  case 'revision':
60  case 'archive':
61  $this->populateRevisionOrArchive( $dbw, $table, $ns );
62  break;
63  case 'page':
64  $this->populatePage( $dbw, $ns );
65  break;
66  default:
67  $this->fatalError( "Invalid table name: $table" );
68  }
69  }
70 
71  protected function clearCache( $page_id, $rev_id ) {
72  $contentModelKey = $this->wanCache->makeKey( 'page-content-model', $rev_id );
73  $revisionKey =
74  $this->wanCache->makeGlobalKey( 'revision', $this->wikiId, $page_id, $rev_id );
75 
76  // WikiPage content model cache
77  $this->wanCache->delete( $contentModelKey );
78 
79  // Revision object cache, which contains a content model
80  $this->wanCache->delete( $revisionKey );
81  }
82 
83  private function updatePageRows( IDatabase $dbw, $pageIds, $model ) {
84  $count = count( $pageIds );
85  $this->output( "Setting $count rows to $model..." );
86  $dbw->update(
87  'page',
88  [ 'page_content_model' => $model ],
89  [ 'page_id' => $pageIds ],
90  __METHOD__
91  );
93  $this->output( "done.\n" );
94  }
95 
96  protected function populatePage( IDatabase $dbw, $ns ) {
97  $toSave = [];
98  $lastId = 0;
99  $nsCondition = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
100  $batchSize = $this->getBatchSize();
101  do {
102  $rows = $dbw->select(
103  'page',
104  [ 'page_namespace', 'page_title', 'page_id' ],
105  [
106  'page_content_model' => null,
107  'page_id > ' . $dbw->addQuotes( $lastId ),
108  ] + $nsCondition,
109  __METHOD__,
110  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'page_id ASC' ]
111  );
112  $this->output( "Fetched {$rows->numRows()} rows.\n" );
113  foreach ( $rows as $row ) {
114  $title = Title::newFromRow( $row );
116  $toSave[$model][] = $row->page_id;
117  if ( count( $toSave[$model] ) >= $batchSize ) {
118  $this->updatePageRows( $dbw, $toSave[$model], $model );
119  unset( $toSave[$model] );
120  }
121  $lastId = $row->page_id;
122  }
123  } while ( $rows->numRows() >= $batchSize );
124  foreach ( $toSave as $model => $pages ) {
125  $this->updatePageRows( $dbw, $pages, $model );
126  }
127  }
128 
129  private function updateRevisionOrArchiveRows( IDatabase $dbw, $ids, $model, $table ) {
130  $prefix = $table === 'archive' ? 'ar' : 'rev';
131  $model_column = "{$prefix}_content_model";
132  $format_column = "{$prefix}_content_format";
133  $key = "{$prefix}_id";
134 
135  $count = count( $ids );
136  $format = ContentHandler::getForModelID( $model )->getDefaultFormat();
137  $this->output( "Setting $count rows to $model / $format..." );
138  $dbw->update(
139  $table,
140  [ $model_column => $model, $format_column => $format ],
141  [ $key => $ids ],
142  __METHOD__
143  );
144 
145  $this->output( "done.\n" );
146  }
147 
148  protected function populateRevisionOrArchive( IDatabase $dbw, $table, $ns ) {
149  $prefix = $table === 'archive' ? 'ar' : 'rev';
150  $model_column = "{$prefix}_content_model";
151  $format_column = "{$prefix}_content_format";
152  $key = "{$prefix}_id";
153  if ( $table === 'archive' ) {
154  $selectTables = 'archive';
155  $fields = [ 'ar_namespace', 'ar_title' ];
156  $join_conds = [];
157  $where = $ns === 'all' ? [] : [ 'ar_namespace' => $ns ];
158  $page_id_column = 'ar_page_id';
159  $rev_id_column = 'ar_rev_id';
160  } else { // revision
161  $selectTables = [ 'revision', 'page' ];
162  $fields = [ 'page_title', 'page_namespace' ];
163  $join_conds = [ 'page' => [ 'JOIN', 'rev_page=page_id' ] ];
164  $where = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
165  $page_id_column = 'rev_page';
166  $rev_id_column = 'rev_id';
167  }
168 
169  $toSave = [];
170  $idsToClear = [];
171  $lastId = 0;
172  $batchSize = $this->getBatchSize();
173  do {
174  $rows = $dbw->select(
175  $selectTables,
176  array_merge(
177  $fields,
178  [ $model_column, $format_column, $key, $page_id_column, $rev_id_column ]
179  ),
180  // @todo support populating format if model is already set
181  [
182  $model_column => null,
183  "$key > " . $dbw->addQuotes( $lastId ),
184  ] + $where,
185  __METHOD__,
186  [ 'LIMIT' => $batchSize, 'ORDER BY' => "$key ASC" ],
187  $join_conds
188  );
189  $this->output( "Fetched {$rows->numRows()} rows.\n" );
190  foreach ( $rows as $row ) {
191  if ( $table === 'archive' ) {
192  $title = Title::makeTitle( $row->ar_namespace, $row->ar_title );
193  } else {
194  $title = Title::newFromRow( $row );
195  }
196  $lastId = $row->{$key};
197  try {
198  $handler = ContentHandler::getForTitle( $title );
199  } catch ( MWException $e ) {
200  $this->error( "Invalid content model for $title" );
201  continue;
202  }
203  $defaultModel = $handler->getModelID();
204  $defaultFormat = $handler->getDefaultFormat();
205  $dbModel = $row->{$model_column};
206  $dbFormat = $row->{$format_column};
207  $id = $row->{$key};
208  if ( $dbModel === null && $dbFormat === null ) {
209  // Set the defaults
210  $toSave[$defaultModel][] = $row->{$key};
211  $idsToClear[] = [
212  'page_id' => $row->{$page_id_column},
213  'rev_id' => $row->{$rev_id_column},
214  ];
215  } else { // $dbModel === null, $dbFormat set.
216  if ( $dbFormat === $defaultFormat ) {
217  $toSave[$defaultModel][] = $row->{$key};
218  $idsToClear[] = [
219  'page_id' => $row->{$page_id_column},
220  'rev_id' => $row->{$rev_id_column},
221  ];
222  } else { // non-default format, just update now
223  $this->output( "Updating model to match format for $table $id of $title... " );
224  $dbw->update(
225  $table,
226  [ $model_column => $defaultModel ],
227  [ $key => $id ],
228  __METHOD__
229  );
230  wfWaitForSlaves();
231  $this->clearCache( $row->{$page_id_column}, $row->{$rev_id_column} );
232  $this->output( "done.\n" );
233  continue;
234  }
235  }
236 
237  if ( count( $toSave[$defaultModel] ) >= $batchSize ) {
238  $this->updateRevisionOrArchiveRows( $dbw, $toSave[$defaultModel], $defaultModel, $table );
239  unset( $toSave[$defaultModel] );
240  }
241  }
242  } while ( $rows->numRows() >= $batchSize );
243  foreach ( $toSave as $model => $ids ) {
244  $this->updateRevisionOrArchiveRows( $dbw, $ids, $model, $table );
245  }
246 
247  foreach ( $idsToClear as $idPair ) {
248  $this->clearCache( $idPair['page_id'], $idPair['rev_id'] );
249  }
250  }
251 }
252 
253 $maintClass = PopulateContentModel::class;
254 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
ContentHandler\getForModelID
static getForModelID( $modelId)
Returns the ContentHandler singleton for the given model ID.
Definition: ContentHandler.php:254
PopulateContentModel\updatePageRows
updatePageRows(IDatabase $dbw, $pageIds, $model)
Definition: populateContentModel.php:83
PopulateContentModel\updateRevisionOrArchiveRows
updateRevisionOrArchiveRows(IDatabase $dbw, $ids, $model, $table)
Definition: populateContentModel.php:129
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition: Maintenance.php:504
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:348
PopulateContentModel\populatePage
populatePage(IDatabase $dbw, $ns)
Definition: populateContentModel.php:96
ContentHandler\getForTitle
static getForTitle(Title $title)
Returns the appropriate ContentHandler singleton for the given title.
Definition: ContentHandler.php:201
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:82
wfWaitForSlaves
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
Definition: GlobalFunctions.php:2718
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
PopulateContentModel\__construct
__construct()
Default constructor.
Definition: populateContentModel.php:38
PopulateContentModel\populateRevisionOrArchive
populateRevisionOrArchive(IDatabase $dbw, $table, $ns)
Definition: populateContentModel.php:148
ContentHandler\getDefaultModelFor
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
Definition: ContentHandler.php:186
MWException
MediaWiki exception.
Definition: MWException.php:26
$maintClass
$maintClass
Definition: populateContentModel.php:253
Title\newFromRow
static newFromRow( $row)
Make a Title object from a DB row.
Definition: Title.php:518
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:267
$title
$title
Definition: testCompression.php:34
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
DB_MASTER
const DB_MASTER
Definition: defines.php:26
PopulateContentModel\clearCache
clearCache( $page_id, $rev_id)
Definition: populateContentModel.php:71
PopulateContentModel
Usage: populateContentModel.php –ns=1 –table=page.
Definition: populateContentModel.php:33
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:116
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1396
Wikimedia\Rdbms\IDatabase\update
update( $table, $values, $conds, $fname=__METHOD__, $options=[])
UPDATE wrapper.
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:302
Wikimedia\Rdbms\IDatabase\addQuotes
addQuotes( $s)
Escape and quote a raw value string for use in a SQL query.
PopulateContentModel\execute
execute()
Do the actual work.
Definition: populateContentModel.php:46
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:386
PopulateContentModel\$wanCache
WANObjectCache $wanCache
Definition: populateContentModel.php:36
Wikimedia\Rdbms\IDatabase\select
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
PopulateContentModel\$wikiId
$wikiId
Definition: populateContentModel.php:34
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:481
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:453
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:394