27 use Wikimedia\Assert\Assert;
31 require_once __DIR__ .
'/Maintenance.php';
60 parent::__construct();
63 $this->
addOption(
'table',
'revision or archive table, or `all` to populate both',
false,
66 'Reuse content table rows when the address and model are the same. '
67 .
'This will increase the script\'s time and memory usage, perhaps significantly.',
69 $this->
addOption(
'start-revision',
'The rev_id to start at',
false,
true );
70 $this->
addOption(
'start-archive',
'The ar_rev_id to start at',
false,
true );
76 $this->contentModelStore = MediaWikiServices::getInstance()->getContentModelStore();
77 $this->slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
78 $this->blobStore = MediaWikiServices::getInstance()->getBlobStore();
82 $this->contentModelStore->reloadMap();
83 $this->slotRoleStore->reloadMap();
84 $this->mainRoleId = $this->slotRoleStore->acquireId( SlotRecord::MAIN );
88 $multiContentRevisionSchemaMigrationStage =
89 $this->
getConfig()->get(
'MultiContentRevisionSchemaMigrationStage' );
91 $t0 = microtime(
true );
95 '...cannot update while \$wgMultiContentRevisionSchemaMigrationStage '
96 .
'does not have the SCHEMA_COMPAT_WRITE_NEW bit set.'
103 if ( $this->
getOption(
'reuse-content',
false ) ) {
107 foreach ( $this->
getTables() as $table ) {
111 $elapsed = microtime(
true ) - $t0;
112 $this->
writeln(
"Done. Processed $this->totalCount rows in $elapsed seconds" );
120 $table = $this->
getOption(
'table',
'all' );
121 $validTableOptions = [
'all',
'revision',
'archive' ];
123 if ( !in_array( $table, $validTableOptions ) ) {
124 $this->
fatalError(
'Invalid table. Must be either `revision` or `archive` or `all`' );
127 if ( $table ===
'all' ) {
128 $tables = [
'revision',
'archive' ];
130 $tables = [ $table ];
137 $t0 = microtime(
true );
138 $this->
writeln(
"Loading existing content table rows..." );
139 $this->contentRowMap = [];
145 [
'content_id',
'content_address',
'content_model' ],
146 $from ?
"content_id > $from" :
'',
148 [
'ORDER BY' =>
'content_id',
'LIMIT' => $this->
getBatchSize() ]
153 foreach (
$res as $row ) {
154 $from = $row->content_id;
155 $this->contentRowMap[
"{$row->content_model}:{$row->content_address}"] = $row->content_id;
158 $elapsed = microtime(
true ) - $t0;
159 $this->
writeln(
"Loaded " . count( $this->contentRowMap ) .
" rows in $elapsed seconds" );
166 $t0 = microtime(
true );
168 $this->
writeln(
"Populating $table..." );
170 if ( $table ===
'revision' ) {
172 $tables = [
'revision',
'slots',
'page' ];
176 'sha1' =>
'rev_sha1',
177 'text_id' =>
'rev_text_id',
178 'content_model' =>
'rev_content_model',
179 'namespace' =>
'page_namespace',
180 'title' =>
'page_title',
183 'slots' => [
'LEFT JOIN',
'rev_id=slot_revision_id' ],
184 'page' => [
'LEFT JOIN',
'rev_page=page_id' ],
186 $startOption =
'start-revision';
188 $idField =
'ar_rev_id';
189 $tables = [
'archive',
'slots' ];
191 'rev_id' =>
'ar_rev_id',
194 'text_id' =>
'ar_text_id',
195 'content_model' =>
'ar_content_model',
196 'namespace' =>
'ar_namespace',
197 'title' =>
'ar_title',
200 'slots' => [
'LEFT JOIN',
'ar_rev_id=slot_revision_id' ],
202 $startOption =
'start-archive';
205 if ( !$this->dbw->fieldExists( $table, $fields[
'text_id'], __METHOD__ ) ) {
206 $this->
writeln(
"No need to populate, $table.{$fields['text_id']} field does not exist" );
210 $minmax = $this->dbw->selectRow(
212 [
'min' =>
"MIN( $idField )",
'max' =>
"MAX( $idField )" ],
216 if ( $this->
hasOption( $startOption ) ) {
217 $minmax->min = (int)$this->
getOption( $startOption );
219 if ( !$minmax || !is_numeric( $minmax->min ) || !is_numeric( $minmax->max ) ) {
221 $minmax = (object)[
'min' => 1,
'max' => 0 ];
226 for ( $startId = $minmax->min; $startId <= $minmax->max; $startId += $batchSize ) {
227 $endId = min( $startId + $batchSize - 1, $minmax->max );
228 $rows = $this->dbw->select(
232 "$idField >= $startId",
233 "$idField <= $endId",
234 'slot_revision_id IS NULL',
237 [
'ORDER BY' =>
'rev_id' ],
240 if ( $rows->numRows() !== 0 ) {
244 $elapsed = microtime(
true ) - $t0;
246 "... $table processed up to revision id $endId of {$minmax->max}"
247 .
" ($this->count rows in $elapsed seconds)"
251 $elapsed = microtime(
true ) - $t0;
252 $this->
writeln(
"Done populating $table table. Processed $this->count rows in $elapsed seconds" );
264 if ( $this->contentRowMap ===
null ) {
274 foreach ( $rows as $row ) {
275 $revisionId = $row->rev_id;
277 Assert::invariant( $revisionId !==
null,
'rev_id must not be null' );
280 $modelId = $this->contentModelStore->acquireId( $model );
281 $address = SqlBlobStore::makeAddressFromTextId( $row->text_id );
283 $key =
"{$modelId}:{$address}";
284 $contentKeys[$revisionId] = $key;
286 if ( !isset( $map[$key] ) ) {
291 'content_size' => (int)$row->len,
292 'content_sha1' => $row->sha1,
293 'content_model' => $modelId,
294 'content_address' => $address,
300 if ( $contentRows ) {
301 $id = $this->dbw->selectField(
'content',
'MAX(content_id)',
'', __METHOD__ );
302 $this->dbw->insert(
'content', $contentRows, __METHOD__ );
303 $res = $this->dbw->select(
305 [
'content_id',
'content_model',
'content_address' ],
306 'content_id > ' . (
int)$id,
309 foreach (
$res as $row ) {
310 $key = $row->content_model .
':' . $row->content_address;
311 $map[$key] = $row->content_id;
317 foreach ( $rows as $row ) {
318 $revisionId = $row->rev_id;
319 $contentId = $map[$contentKeys[$revisionId]] ??
false;
320 if ( $contentId ===
false ) {
321 throw new \RuntimeException(
"Content row for $revisionId not found after content insert" );
324 'slot_revision_id' => $revisionId,
326 'slot_content_id' => $contentId,
330 'slot_origin' => $revisionId,
333 $this->dbw->insert(
'slots', $slotRows, __METHOD__ );
334 $this->count += count( $slotRows );
335 $this->totalCount += count( $slotRows );
336 }
catch ( \Exception $e ) {
338 $this->
fatalError(
"Failed to populate content table $table row batch starting at $startId "
339 .
"due to exception: " . $e->__toString() );
350 if ( isset( $row->content_model ) ) {
351 return $row->content_model;
363 $this->
output(
"$msg\n" );
375 if ( !isset( $row->content_model ) ) {
377 $row->content_model = $model;
380 if ( isset( $row->len ) && isset( $row->sha1 ) && $row->sha1 !==
'' ) {
385 $blob = $this->blobStore->getBlob( $address );
387 if ( !isset( $row->len ) ) {
395 if ( !isset( $row->sha1 ) || $row->sha1 ===
'' ) {
396 $row->sha1 = SlotRecord::base36Sha1(
$blob );