MediaWiki  master
populateContentModel.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
28 
34  protected $wikiId;
36  protected $wanCache;
37 
38  public function __construct() {
39  parent::__construct();
40  $this->addDescription( 'Populate the various content_* fields' );
41  $this->addOption( 'ns', 'Namespace to run in, or "all" for all namespaces', true, true );
42  $this->addOption( 'table', 'Table to run in', true, true );
43  $this->setBatchSize( 100 );
44  }
45 
46  public function execute() {
47  $dbw = $this->getDB( DB_MASTER );
48 
49  $this->wikiId = $dbw->getDomainID();
50  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
51 
52  $ns = $this->getOption( 'ns' );
53  if ( !ctype_digit( $ns ) && $ns !== 'all' ) {
54  $this->fatalError( 'Invalid namespace' );
55  }
56  $ns = $ns === 'all' ? 'all' : (int)$ns;
57  $table = $this->getOption( 'table' );
58  switch ( $table ) {
59  case 'revision':
60  case 'archive':
61  $this->populateRevisionOrArchive( $dbw, $table, $ns );
62  break;
63  case 'page':
64  $this->populatePage( $dbw, $ns );
65  break;
66  default:
67  $this->fatalError( "Invalid table name: $table" );
68  }
69  }
70 
71  protected function clearCache( $page_id, $rev_id ) {
72  $contentModelKey = $this->wanCache->makeKey( 'page-content-model', $rev_id );
73  $revisionKey =
74  $this->wanCache->makeGlobalKey( 'revision', $this->wikiId, $page_id, $rev_id );
75 
76  // WikiPage content model cache
77  $this->wanCache->delete( $contentModelKey );
78 
79  // Revision object cache, which contains a content model
80  $this->wanCache->delete( $revisionKey );
81  }
82 
83  private function updatePageRows( IDatabase $dbw, $pageIds, $model ) {
84  $count = count( $pageIds );
85  $this->output( "Setting $count rows to $model..." );
86  $dbw->update(
87  'page',
88  [ 'page_content_model' => $model ],
89  [ 'page_id' => $pageIds ],
90  __METHOD__
91  );
92  MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
93  $this->output( "done.\n" );
94  }
95 
96  protected function populatePage( IDatabase $dbw, $ns ) {
97  $toSave = [];
98  $lastId = 0;
99  $nsCondition = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
100  $batchSize = $this->getBatchSize();
101  do {
102  $rows = $dbw->select(
103  'page',
104  [ 'page_namespace', 'page_title', 'page_id' ],
105  [
106  'page_content_model' => null,
107  'page_id > ' . $dbw->addQuotes( $lastId ),
108  ] + $nsCondition,
109  __METHOD__,
110  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'page_id ASC' ]
111  );
112  $this->output( "Fetched {$rows->numRows()} rows.\n" );
113  foreach ( $rows as $row ) {
114  $title = Title::newFromRow( $row );
116  $toSave[$model][] = $row->page_id;
117  if ( count( $toSave[$model] ) >= $batchSize ) {
118  $this->updatePageRows( $dbw, $toSave[$model], $model );
119  unset( $toSave[$model] );
120  }
121  $lastId = $row->page_id;
122  }
123  } while ( $rows->numRows() >= $batchSize );
124  foreach ( $toSave as $model => $pages ) {
125  $this->updatePageRows( $dbw, $pages, $model );
126  }
127  }
128 
129  private function updateRevisionOrArchiveRows( IDatabase $dbw, $ids, $model, $table ) {
130  $prefix = $table === 'archive' ? 'ar' : 'rev';
131  $model_column = "{$prefix}_content_model";
132  $format_column = "{$prefix}_content_format";
133  $key = "{$prefix}_id";
134 
135  $count = count( $ids );
136  $format = MediaWikiServices::getInstance()
137  ->getContentHandlerFactory()
138  ->getContentHandler( $model )
139  ->getDefaultFormat();
140  $this->output( "Setting $count rows to $model / $format..." );
141  $dbw->update(
142  $table,
143  [ $model_column => $model, $format_column => $format ],
144  [ $key => $ids ],
145  __METHOD__
146  );
147 
148  $this->output( "done.\n" );
149  }
150 
151  protected function populateRevisionOrArchive( IDatabase $dbw, $table, $ns ) {
152  $prefix = $table === 'archive' ? 'ar' : 'rev';
153  $model_column = "{$prefix}_content_model";
154  $format_column = "{$prefix}_content_format";
155  $key = "{$prefix}_id";
156  if ( $table === 'archive' ) {
157  $selectTables = 'archive';
158  $fields = [ 'ar_namespace', 'ar_title' ];
159  $join_conds = [];
160  $where = $ns === 'all' ? [] : [ 'ar_namespace' => $ns ];
161  $page_id_column = 'ar_page_id';
162  $rev_id_column = 'ar_rev_id';
163  } else { // revision
164  $selectTables = [ 'revision', 'page' ];
165  $fields = [ 'page_title', 'page_namespace' ];
166  $join_conds = [ 'page' => [ 'JOIN', 'rev_page=page_id' ] ];
167  $where = $ns === 'all' ? [] : [ 'page_namespace' => $ns ];
168  $page_id_column = 'rev_page';
169  $rev_id_column = 'rev_id';
170  }
171 
172  $toSave = [];
173  $idsToClear = [];
174  $lastId = 0;
175  $batchSize = $this->getBatchSize();
176  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
177  do {
178  $rows = $dbw->select(
179  $selectTables,
180  array_merge(
181  $fields,
182  [ $model_column, $format_column, $key, $page_id_column, $rev_id_column ]
183  ),
184  // @todo support populating format if model is already set
185  [
186  $model_column => null,
187  "$key > " . $dbw->addQuotes( $lastId ),
188  ] + $where,
189  __METHOD__,
190  [ 'LIMIT' => $batchSize, 'ORDER BY' => "$key ASC" ],
191  $join_conds
192  );
193  $this->output( "Fetched {$rows->numRows()} rows.\n" );
194  foreach ( $rows as $row ) {
195  if ( $table === 'archive' ) {
196  $title = Title::makeTitle( $row->ar_namespace, $row->ar_title );
197  } else {
198  $title = Title::newFromRow( $row );
199  }
200  $lastId = $row->{$key};
201  try {
202  $handler = MediaWikiServices::getInstance()
203  ->getContentHandlerFactory()
204  ->getContentHandler( $title->getContentModel() );
205  } catch ( MWException $e ) {
206  $this->error( "Invalid content model for $title" );
207  continue;
208  }
209  $defaultModel = $handler->getModelID();
210  $defaultFormat = $handler->getDefaultFormat();
211  $dbModel = $row->{$model_column};
212  $dbFormat = $row->{$format_column};
213  $id = $row->{$key};
214  if ( $dbModel === null && $dbFormat === null ) {
215  // Set the defaults
216  $toSave[$defaultModel][] = $row->{$key};
217  $idsToClear[] = [
218  'page_id' => $row->{$page_id_column},
219  'rev_id' => $row->{$rev_id_column},
220  ];
221  } else { // $dbModel === null, $dbFormat set.
222  if ( $dbFormat === $defaultFormat ) {
223  $toSave[$defaultModel][] = $row->{$key};
224  $idsToClear[] = [
225  'page_id' => $row->{$page_id_column},
226  'rev_id' => $row->{$rev_id_column},
227  ];
228  } else { // non-default format, just update now
229  $this->output( "Updating model to match format for $table $id of $title... " );
230  $dbw->update(
231  $table,
232  [ $model_column => $defaultModel ],
233  [ $key => $id ],
234  __METHOD__
235  );
236  $lbFactory->waitForReplication();
237  $this->clearCache( $row->{$page_id_column}, $row->{$rev_id_column} );
238  $this->output( "done.\n" );
239  continue;
240  }
241  }
242 
243  if ( count( $toSave[$defaultModel] ) >= $batchSize ) {
244  $this->updateRevisionOrArchiveRows( $dbw, $toSave[$defaultModel], $defaultModel, $table );
245  unset( $toSave[$defaultModel] );
246  }
247  }
248  } while ( $rows->numRows() >= $batchSize );
249  foreach ( $toSave as $model => $ids ) {
250  $this->updateRevisionOrArchiveRows( $dbw, $ids, $model, $table );
251  }
252 
253  foreach ( $idsToClear as $idPair ) {
254  $this->clearCache( $idPair['page_id'], $idPair['rev_id'] );
255  }
256  }
257 }
258 
259 $maintClass = PopulateContentModel::class;
260 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:38
PopulateContentModel\updatePageRows
updatePageRows(IDatabase $dbw, $pageIds, $model)
Definition: populateContentModel.php:83
PopulateContentModel\updateRevisionOrArchiveRows
updateRevisionOrArchiveRows(IDatabase $dbw, $ids, $model, $table)
Definition: populateContentModel.php:129
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:144
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition: Maintenance.php:472
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:315
PopulateContentModel\populatePage
populatePage(IDatabase $dbw, $ns)
Definition: populateContentModel.php:96
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:51
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
Wikimedia\Rdbms\IDatabase\update
update( $table, $set, $conds, $fname=__METHOD__, $options=[])
Update all rows in a table that match a given condition.
PopulateContentModel\__construct
__construct()
Default constructor.
Definition: populateContentModel.php:38
PopulateContentModel\populateRevisionOrArchive
populateRevisionOrArchive(IDatabase $dbw, $table, $ns)
Definition: populateContentModel.php:151
ContentHandler\getDefaultModelFor
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
Definition: ContentHandler.php:189
MWException
MediaWiki exception.
Definition: MWException.php:26
$maintClass
$maintClass
Definition: populateContentModel.php:259
Title\newFromRow
static newFromRow( $row)
Make a Title object from a DB row.
Definition: Title.php:527
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:233
$title
$title
Definition: testCompression.php:38
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:595
DB_MASTER
const DB_MASTER
Definition: defines.php:26
PopulateContentModel\clearCache
clearCache( $page_id, $rev_id)
Definition: populateContentModel.php:71
PopulateContentModel
Usage: populateContentModel.php –ns=1 –table=page.
Definition: populateContentModel.php:33
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:120
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1342
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:269
Wikimedia\Rdbms\IDatabase\addQuotes
addQuotes( $s)
Escape and quote a raw value string for use in a SQL query.
PopulateContentModel\execute
execute()
Do the actual work.
Definition: populateContentModel.php:46
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:354
PopulateContentModel\$wanCache
WANObjectCache $wanCache
Definition: populateContentModel.php:36
Wikimedia\Rdbms\IDatabase\select
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
PopulateContentModel\$wikiId
$wikiId
Definition: populateContentModel.php:34
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:449
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:421
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:362