44 private $contentModelStore;
47 private $slotRoleStore;
56 private $contentRowMap =
null;
58 private $count = 0, $totalCount = 0;
61 parent::__construct();
64 $this->
addOption(
'table',
'revision or archive table, or `all` to populate both',
false,
67 'Reuse content table rows when the address and model are the same. '
68 .
'This will increase the script\'s time and memory usage, perhaps significantly.',
70 $this->
addOption(
'start-revision',
'The rev_id to start at',
false,
true );
71 $this->
addOption(
'start-archive',
'The ar_rev_id to start at',
false,
true );
75 private function initServices() {
77 $services = MediaWikiServices::getInstance();
78 $this->contentModelStore = $services->getContentModelStore();
79 $this->slotRoleStore = $services->getSlotRoleStore();
80 $this->blobStore = $services->getBlobStore();
84 $this->contentModelStore->reloadMap();
85 $this->slotRoleStore->reloadMap();
86 $this->mainRoleId = $this->slotRoleStore->acquireId( SlotRecord::MAIN );
90 $t0 = microtime(
true );
92 $this->initServices();
94 if ( $this->
getOption(
'reuse-content',
false ) ) {
95 $this->loadContentMap();
98 foreach ( $this->getTables() as $table ) {
99 $this->populateTable( $table );
102 $elapsed = microtime(
true ) - $t0;
103 $this->writeln(
"Done. Processed $this->totalCount rows in $elapsed seconds" );
110 private function getTables() {
111 $table = $this->
getOption(
'table',
'all' );
112 $validTableOptions = [
'all',
'revision',
'archive' ];
114 if ( !in_array( $table, $validTableOptions ) ) {
115 $this->
fatalError(
'Invalid table. Must be either `revision` or `archive` or `all`' );
118 if ( $table ===
'all' ) {
119 $tables = [
'revision',
'archive' ];
121 $tables = [ $table ];
127 private function loadContentMap() {
128 $t0 = microtime(
true );
129 $this->writeln(
"Loading existing content table rows..." );
130 $this->contentRowMap = [];
136 [
'content_id',
'content_address',
'content_model' ],
137 $from ?
"content_id > $from" :
'',
139 [
'ORDER BY' =>
'content_id',
'LIMIT' => $this->
getBatchSize() ]
144 foreach (
$res as $row ) {
145 $from = $row->content_id;
146 $this->contentRowMap[
"{$row->content_model}:{$row->content_address}"] = $row->content_id;
149 $elapsed = microtime(
true ) - $t0;
150 $this->writeln(
"Loaded " . count( $this->contentRowMap ) .
" rows in $elapsed seconds" );
156 private function populateTable( $table ) {
157 $t0 = microtime(
true );
159 $this->writeln(
"Populating $table..." );
161 if ( $table ===
'revision' ) {
163 $tables = [
'revision',
'slots',
'page' ];
167 'sha1' =>
'rev_sha1',
168 'text_id' =>
'rev_text_id',
169 'content_model' =>
'rev_content_model',
170 'namespace' =>
'page_namespace',
171 'title' =>
'page_title',
174 'slots' => [
'LEFT JOIN',
'rev_id=slot_revision_id' ],
175 'page' => [
'LEFT JOIN',
'rev_page=page_id' ],
177 $startOption =
'start-revision';
179 $idField =
'ar_rev_id';
180 $tables = [
'archive',
'slots' ];
182 'rev_id' =>
'ar_rev_id',
185 'text_id' =>
'ar_text_id',
186 'content_model' =>
'ar_content_model',
187 'namespace' =>
'ar_namespace',
188 'title' =>
'ar_title',
191 'slots' => [
'LEFT JOIN',
'ar_rev_id=slot_revision_id' ],
193 $startOption =
'start-archive';
196 if ( !$this->dbw->fieldExists( $table, $fields[
'text_id'], __METHOD__ ) ) {
197 $this->writeln(
"No need to populate, $table.{$fields['text_id']} field does not exist" );
201 $minmax = $this->dbw->selectRow(
203 [
'min' =>
"MIN( $idField )",
'max' =>
"MAX( $idField )" ],
207 if ( $this->
hasOption( $startOption ) ) {
208 $minmax->min = (int)$this->
getOption( $startOption );
210 if ( !$minmax || !is_numeric( $minmax->min ) || !is_numeric( $minmax->max ) ) {
212 $minmax = (object)[
'min' => 1,
'max' => 0 ];
217 for ( $startId = (
int)$minmax->min; $startId <= $minmax->max; $startId += $batchSize ) {
218 $endId = (int)min( $startId + $batchSize - 1, $minmax->max );
219 $rows = $this->dbw->select(
223 "$idField >= $startId",
224 "$idField <= $endId",
225 'slot_revision_id IS NULL',
228 [
'ORDER BY' =>
'rev_id' ],
231 if ( $rows->
numRows() !== 0 ) {
232 $this->populateContentTablesForRowBatch( $rows, $startId, $table );
235 $elapsed = microtime(
true ) - $t0;
237 "... $table processed up to revision id $endId of {$minmax->max}"
238 .
" ($this->count rows in $elapsed seconds)"
242 $elapsed = microtime(
true ) - $t0;
243 $this->writeln(
"Done populating $table table. Processed $this->count rows in $elapsed seconds" );
252 private function populateContentTablesForRowBatch(
IResultWrapper $rows, $startId, $table ) {
255 if ( $this->contentRowMap ===
null ) {
258 $map = &$this->contentRowMap;
265 foreach ( $rows as $row ) {
266 $revisionId = $row->rev_id;
268 Assert::invariant( $revisionId !==
null,
'rev_id must not be null' );
270 $model = $this->getContentModel( $row );
271 $modelId = $this->contentModelStore->acquireId( $model );
272 $address = SqlBlobStore::makeAddressFromTextId( $row->text_id );
274 $key =
"{$modelId}:{$address}";
275 $contentKeys[$revisionId] = $key;
277 if ( !isset( $map[$key] ) ) {
278 $this->fillMissingFields( $row, $model, $address );
281 'content_size' => (int)$row->len,
282 'content_sha1' => $row->sha1,
283 'content_model' => $modelId,
284 'content_address' => $address,
290 if ( $contentRows ) {
291 $id = $this->dbw->selectField(
'content',
'MAX(content_id)',
'', __METHOD__ );
292 $this->dbw->insert(
'content', $contentRows, __METHOD__ );
293 $res = $this->dbw->select(
295 [
'content_id',
'content_model',
'content_address' ],
296 'content_id > ' . (
int)$id,
299 foreach (
$res as $row ) {
300 $address = $row->content_address;
301 if ( substr( $address, 0, 4 ) ===
'bad:' ) {
302 $address = substr( $address, 4 );
304 $key = $row->content_model .
':' . $address;
305 $map[$key] = $row->content_id;
311 foreach ( $rows as $row ) {
312 $revisionId = $row->rev_id;
313 $contentId = $map[$contentKeys[$revisionId]] ??
false;
314 if ( $contentId ===
false ) {
315 throw new \RuntimeException(
"Content row for $revisionId not found after content insert" );
318 'slot_revision_id' => $revisionId,
319 'slot_role_id' => $this->mainRoleId,
320 'slot_content_id' => $contentId,
324 'slot_origin' => $revisionId,
327 $this->dbw->insert(
'slots', $slotRows, __METHOD__ );
328 $this->count += count( $slotRows );
329 $this->totalCount += count( $slotRows );
330 }
catch ( \Exception $e ) {
332 $this->
fatalError(
"Failed to populate content table $table row batch starting at $startId "
333 .
"due to exception: " . $e->__toString() );
343 private function getContentModel( $row ) {
344 if ( isset( $row->content_model ) ) {
345 return $row->content_model;
356 private function writeln( $msg ) {
357 $this->
output(
"$msg\n" );
368 private function fillMissingFields( $row, $model, &$address ) {
369 if ( !isset( $row->content_model ) ) {
371 $row->content_model = $model;
374 if ( isset( $row->len ) && isset( $row->sha1 ) && $row->sha1 !==
'' ) {
380 $blob = $this->blobStore->getBlob( $address );
382 $address =
'bad:' . $address;
386 if ( !isset( $row->len ) ) {
391 if ( !isset( $row->sha1 ) || $row->sha1 ===
'' ) {
392 $row->sha1 = SlotRecord::base36Sha1(
$blob );
static makeContent( $text, Title $title=null, $modelId=null, $format=null)
Convenience function for creating a Content object from a given textual representation.
static getDefaultModelFor(Title $title)
Returns the name of the default content model to be used for the page with the given title.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
rollbackTransaction(IDatabase $dbw, $fname)
Rollback the transaction on a DB handle.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.