MediaWiki master
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
9
18 private $totalUpdated = 0;
20 private $lastProgress = 0;
21
22 public function __construct() {
23 parent::__construct();
24 $this->addDescription(
25 'Populates normalization column in links tables.'
26 );
27 $this->addOption(
28 'table',
29 'Table name. Like pagelinks.',
30 true,
31 true
32 );
33 $this->addOption(
34 'sleep',
35 'Sleep time (in seconds) between every batch. Default: 0',
36 false,
37 true
38 );
39 $this->setBatchSize( 1000 );
40 }
41
43 protected function getUpdateKey() {
44 return __CLASS__ . $this->getOption( 'table', '' );
45 }
46
48 protected function doDBUpdates() {
49 $dbw = $this->getDB( DB_PRIMARY );
50 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
51 $table = $this->getOption( 'table', '' );
52 if ( !isset( $mapping[$table] ) ) {
53 $this->output( "Mapping for this table doesn't exist yet.\n" );
54 return false;
55 }
56 $targetColumn = $mapping[$table]['target_id'];
57 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
58 $this->output( "Old fields don't exist. There is no need to run this script\n" );
59 return true;
60 }
61 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
62 $this->output( "Run update.php to create the $targetColumn column.\n" );
63 return false;
64 }
65 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
66 $this->output( "Run update.php to create the linktarget table.\n" );
67 return true;
68 }
69
70 $this->output( "Populating the $targetColumn column\n" );
71 $updated = 0;
72
73 $highestPageId = $dbw->newSelectQueryBuilder()
74 ->select( 'page_id' )
75 ->from( 'page' )
76 ->caller( __METHOD__ )
77 ->orderBy( 'page_id', 'DESC' )
78 ->fetchField();
79 if ( !$highestPageId ) {
80 $this->output( "Page table is empty.\n" );
81 return true;
82 }
83 $pageId = 0;
84 while ( $pageId <= $highestPageId ) {
85 // Given the indexes and the structure of links tables,
86 // we need to split the update into batches of pages.
87 // Otherwise the queries will take a really long time in production and cause read-only.
88 $this->handlePageBatch( $pageId, $mapping, $table );
89 $pageId += $this->getBatchSize();
90 }
91
92 $this->output( "Completed normalization of $table, {$this->totalUpdated} rows updated.\n" );
93
94 return true;
95 }
96
97 private function handlePageBatch( int $lowPageId, array $mapping, string $table ) {
98 $batchSize = $this->getBatchSize();
99 $targetColumn = $mapping[$table]['target_id'];
100 $pageIdColumn = $mapping[$table]['page_id'];
101 // range is inclusive, let's subtract one.
102 $highPageId = $lowPageId + $batchSize - 1;
103 $dbw = $this->getPrimaryDB();
104
105 while ( true ) {
106 $res = $dbw->newSelectQueryBuilder()
107 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
108 ->from( $table )
109 ->where( [
110 $targetColumn => [ null, 0 ],
111 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
112 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
113 ] )
114 ->limit( 1 )
115 ->caller( __METHOD__ )
116 ->fetchResultSet();
117 if ( !$res->numRows() ) {
118 break;
119 }
120 $row = $res->fetchRow();
121 $ns = $row[$mapping[$table]['ns']];
122 $titleString = $row[$mapping[$table]['title']];
123 $title = new TitleValue( (int)$ns, $titleString );
124 $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
125 $dbw->newUpdateQueryBuilder()
126 ->update( $table )
127 ->set( [ $targetColumn => $id ] )
128 ->where( [
129 $targetColumn => [ null, 0 ],
130 $mapping[$table]['ns'] => $ns,
131 $mapping[$table]['title'] => $titleString,
132 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
133 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
134 ] )
135 ->caller( __METHOD__ )->execute();
136 $this->updateProgress( $dbw->affectedRows(), $lowPageId, $highPageId, $ns, $titleString );
137 }
138 }
139
150 private function updateProgress( $updatedInThisBatch, $lowPageId, $highPageId, $ns, $titleString ) {
151 $this->totalUpdated += $updatedInThisBatch;
152 if ( $this->totalUpdated >= $this->lastProgress + $this->getBatchSize() ) {
153 $this->lastProgress = $this->totalUpdated;
154 $this->output( "Updated {$this->totalUpdated} rows, " .
155 "at page_id $lowPageId-$highPageId title $ns:$titleString\n" );
156 $this->waitForReplication();
157 // Sleep between batches for replication to catch up
158 $sleep = (int)$this->getOption( 'sleep', 0 );
159 if ( $sleep > 0 ) {
160 sleep( $sleep );
161 }
162 }
163 }
164
165}
166
167// @codeCoverageIgnoreStart
168$maintClass = MigrateLinksTable::class;
169require_once RUN_MAINTENANCE_IF_MAIN;
170// @codeCoverageIgnoreEnd
const DB_PRIMARY
Definition defines.php:28
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
waitForReplication()
Wait for replica DB servers to catch up.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Represents the target of a wiki link.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.All child classes will need to implement this. Return true to log the update as do...
getUpdateKey()
Get the update key name to go in the update log table.string
__construct()
Default constructor.