28use Wikimedia\Assert\Assert;
32require_once __DIR__ .
'/Maintenance.php';
61 parent::__construct();
64 $this->
addOption(
'table',
'revision or archive table, or `all` to populate both',
false,
67 'Reuse content table rows when the address and model are the same. '
68 .
'This will increase the script\'s time and memory usage, perhaps significantly.',
70 $this->
addOption(
'start-revision',
'The rev_id to start at',
false,
true );
71 $this->
addOption(
'start-archive',
'The ar_rev_id to start at',
false,
true );
77 $services = MediaWikiServices::getInstance();
78 $this->contentModelStore = $services->getContentModelStore();
79 $this->slotRoleStore = $services->getSlotRoleStore();
80 $this->blobStore = $services->getBlobStore();
84 $this->contentModelStore->reloadMap();
85 $this->slotRoleStore->reloadMap();
86 $this->mainRoleId = $this->slotRoleStore->acquireId( SlotRecord::MAIN );
90 $t0 = microtime(
true );
94 if ( $this->
getOption(
'reuse-content',
false ) ) {
98 foreach ( $this->
getTables() as $table ) {
102 $elapsed = microtime(
true ) - $t0;
103 $this->
writeln(
"Done. Processed $this->totalCount rows in $elapsed seconds" );
111 $table = $this->
getOption(
'table',
'all' );
112 $validTableOptions = [
'all',
'revision',
'archive' ];
114 if ( !in_array( $table, $validTableOptions ) ) {
115 $this->
fatalError(
'Invalid table. Must be either `revision` or `archive` or `all`' );
118 if ( $table ===
'all' ) {
119 $tables = [
'revision',
'archive' ];
121 $tables = [ $table ];
128 $t0 = microtime(
true );
129 $this->
writeln(
"Loading existing content table rows..." );
130 $this->contentRowMap = [];
136 [
'content_id',
'content_address',
'content_model' ],
137 $from ?
"content_id > $from" :
'',
139 [
'ORDER BY' =>
'content_id',
'LIMIT' => $this->
getBatchSize() ]
144 foreach (
$res as $row ) {
145 $from = $row->content_id;
146 $this->contentRowMap[
"{$row->content_model}:{$row->content_address}"] = $row->content_id;
149 $elapsed = microtime(
true ) - $t0;
150 $this->
writeln(
"Loaded " . count( $this->contentRowMap ) .
" rows in $elapsed seconds" );
157 $t0 = microtime(
true );
159 $this->
writeln(
"Populating $table..." );
161 if ( $table ===
'revision' ) {
163 $tables = [
'revision',
'slots',
'page' ];
167 'sha1' =>
'rev_sha1',
168 'text_id' =>
'rev_text_id',
169 'content_model' =>
'rev_content_model',
170 'namespace' =>
'page_namespace',
171 'title' =>
'page_title',
174 'slots' => [
'LEFT JOIN',
'rev_id=slot_revision_id' ],
175 'page' => [
'LEFT JOIN',
'rev_page=page_id' ],
177 $startOption =
'start-revision';
179 $idField =
'ar_rev_id';
180 $tables = [
'archive',
'slots' ];
182 'rev_id' =>
'ar_rev_id',
185 'text_id' =>
'ar_text_id',
186 'content_model' =>
'ar_content_model',
187 'namespace' =>
'ar_namespace',
188 'title' =>
'ar_title',
191 'slots' => [
'LEFT JOIN',
'ar_rev_id=slot_revision_id' ],
193 $startOption =
'start-archive';
196 if ( !$this->dbw->fieldExists( $table, $fields[
'text_id'], __METHOD__ ) ) {
197 $this->
writeln(
"No need to populate, $table.{$fields['text_id']} field does not exist" );
201 $minmax = $this->dbw->selectRow(
203 [
'min' =>
"MIN( $idField )",
'max' =>
"MAX( $idField )" ],
207 if ( $this->
hasOption( $startOption ) ) {
208 $minmax->min = (int)$this->
getOption( $startOption );
210 if ( !$minmax || !is_numeric( $minmax->min ) || !is_numeric( $minmax->max ) ) {
212 $minmax = (object)[
'min' => 1,
'max' => 0 ];
217 for ( $startId = (
int)$minmax->min; $startId <= $minmax->max; $startId += $batchSize ) {
218 $endId = (int)min( $startId + $batchSize - 1, $minmax->max );
219 $rows = $this->dbw->select(
223 "$idField >= $startId",
224 "$idField <= $endId",
225 'slot_revision_id IS NULL',
228 [
'ORDER BY' =>
'rev_id' ],
231 if ( $rows->numRows() !== 0 ) {
235 $elapsed = microtime(
true ) - $t0;
237 "... $table processed up to revision id $endId of {$minmax->max}"
238 .
" ($this->count rows in $elapsed seconds)"
242 $elapsed = microtime(
true ) - $t0;
243 $this->
writeln(
"Done populating $table table. Processed $this->count rows in $elapsed seconds" );
255 if ( $this->contentRowMap ===
null ) {
265 foreach ( $rows as $row ) {
266 $revisionId = $row->rev_id;
268 Assert::invariant( $revisionId !==
null,
'rev_id must not be null' );
271 $modelId = $this->contentModelStore->acquireId( $model );
272 $address = SqlBlobStore::makeAddressFromTextId( $row->text_id );
274 $key =
"{$modelId}:{$address}";
275 $contentKeys[$revisionId] = $key;
277 if ( !isset( $map[$key] ) ) {
281 'content_size' => (int)$row->len,
282 'content_sha1' => $row->sha1,
283 'content_model' => $modelId,
284 'content_address' => $address,
290 if ( $contentRows ) {
291 $id = $this->dbw->selectField(
'content',
'MAX(content_id)',
'', __METHOD__ );
292 $this->dbw->insert(
'content', $contentRows, __METHOD__ );
293 $res = $this->dbw->select(
295 [
'content_id',
'content_model',
'content_address' ],
296 'content_id > ' . (
int)$id,
299 foreach (
$res as $row ) {
300 $address = $row->content_address;
301 if ( substr( $address, 0, 4 ) ===
'bad:' ) {
302 $address = substr( $address, 4 );
304 $key = $row->content_model .
':' . $address;
305 $map[$key] = $row->content_id;
311 foreach ( $rows as $row ) {
312 $revisionId = $row->rev_id;
313 $contentId = $map[$contentKeys[$revisionId]] ??
false;
314 if ( $contentId ===
false ) {
315 throw new \RuntimeException(
"Content row for $revisionId not found after content insert" );
318 'slot_revision_id' => $revisionId,
320 'slot_content_id' => $contentId,
324 'slot_origin' => $revisionId,
327 $this->dbw->insert(
'slots', $slotRows, __METHOD__ );
328 $this->count += count( $slotRows );
329 $this->totalCount += count( $slotRows );
330 }
catch ( \Exception $e ) {
332 $this->
fatalError(
"Failed to populate content table $table row batch starting at $startId "
333 .
"due to exception: " . $e->__toString() );
344 if ( isset( $row->content_model ) ) {
345 return $row->content_model;
348 $title = Title::makeTitle( $row->namespace, $row->title );
350 return ContentHandler::getDefaultModelFor(
$title );
357 $this->
output(
"$msg\n" );
369 if ( !isset( $row->content_model ) ) {
371 $row->content_model = $model;
374 if ( isset( $row->len ) && isset( $row->sha1 ) && $row->sha1 !==
'' ) {
380 $blob = $this->blobStore->getBlob( $address );
382 $address =
'bad:' . $address;
386 if ( !isset( $row->len ) ) {
388 $row->len = ContentHandler::makeContent(
$blob,
null, $model )->getSize();
391 if ( !isset( $row->sha1 ) || $row->sha1 ===
'' ) {
392 $row->sha1 = SlotRecord::base36Sha1(
$blob );
398require_once RUN_MAINTENANCE_IF_MAIN;
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
rollbackTransaction(IDatabase $dbw, $fname)
Rollback the transaction on a DB handle.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Populate the content and slot tables.
array null $contentRowMap
Map "{$modelId}:{$address}" to content_id.
NameTableStore $slotRoleStore
populateContentTablesForRowBatch(IResultWrapper $rows, $startId, $table)
__construct()
Default constructor.
execute()
Do the actual work.
fillMissingFields( $row, $model, &$address)
Compute any missing fields in $row.
NameTableStore $contentModelStore