MediaWiki master
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
9
18 private $totalUpdated = 0;
20 private $lastProgress = 0;
21
22 public function __construct() {
23 parent::__construct();
24 $this->addDescription(
25 'Populates normalization column in links tables.'
26 );
27 $this->addOption(
28 'table',
29 'Table name. Like pagelinks.',
30 true,
31 true
32 );
33 $this->addOption(
34 'sleep',
35 'Sleep time (in seconds) between every batch. Default: 0',
36 false,
37 true
38 );
39 $this->setBatchSize( 1000 );
40 }
41
42 protected function getUpdateKey() {
43 return __CLASS__ . $this->getOption( 'table', '' );
44 }
45
46 protected function doDBUpdates() {
47 $dbw = $this->getDB( DB_PRIMARY );
48 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
49 $table = $this->getOption( 'table', '' );
50 if ( !isset( $mapping[$table] ) ) {
51 $this->output( "Mapping for this table doesn't exist yet.\n" );
52 return false;
53 }
54 $targetColumn = $mapping[$table]['target_id'];
55 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
56 $this->output( "Old fields don't exist. There is no need to run this script\n" );
57 return true;
58 }
59 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
60 $this->output( "Run update.php to create the $targetColumn column.\n" );
61 return false;
62 }
63 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
64 $this->output( "Run update.php to create the linktarget table.\n" );
65 return true;
66 }
67
68 $this->output( "Populating the $targetColumn column\n" );
69 $updated = 0;
70
71 $highestPageId = $dbw->newSelectQueryBuilder()
72 ->select( 'page_id' )
73 ->from( 'page' )
74 ->limit( 1 )
75 ->caller( __METHOD__ )
76 ->orderBy( 'page_id', 'DESC' )
77 ->fetchResultSet()->fetchRow();
78 if ( !$highestPageId ) {
79 $this->output( "Page table is empty.\n" );
80 return true;
81 }
82 $highestPageId = $highestPageId[0];
83 $pageId = 0;
84 while ( $pageId <= $highestPageId ) {
85 // Given the indexes and the structure of links tables,
86 // we need to split the update into batches of pages.
87 // Otherwise the queries will take a really long time in production and cause read-only.
88 $this->handlePageBatch( $pageId, $mapping, $table );
89 $pageId += $this->getBatchSize();
90 }
91
92 $this->output( "Completed normalization of $table, {$this->totalUpdated} rows updated.\n" );
93
94 return true;
95 }
96
97 private function handlePageBatch( $lowPageId, $mapping, $table ) {
98 $batchSize = $this->getBatchSize();
99 $targetColumn = $mapping[$table]['target_id'];
100 $pageIdColumn = $mapping[$table]['page_id'];
101 // range is inclusive, let's subtract one.
102 $highPageId = $lowPageId + $batchSize - 1;
103 $dbw = $this->getPrimaryDB();
104
105 while ( true ) {
106 $res = $dbw->newSelectQueryBuilder()
107 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
108 ->from( $table )
109 ->where( [
110 $targetColumn => [ null, 0 ],
111 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
112 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
113 ] )
114 ->limit( 1 )
115 ->caller( __METHOD__ )
116 ->fetchResultSet();
117 if ( !$res->numRows() ) {
118 break;
119 }
120 $row = $res->fetchRow();
121 $ns = $row[$mapping[$table]['ns']];
122 $titleString = $row[$mapping[$table]['title']];
123 $title = new TitleValue( (int)$ns, $titleString );
124 $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
125 $dbw->newUpdateQueryBuilder()
126 ->update( $table )
127 ->set( [ $targetColumn => $id ] )
128 ->where( [
129 $targetColumn => [ null, 0 ],
130 $mapping[$table]['ns'] => $ns,
131 $mapping[$table]['title'] => $titleString,
132 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
133 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
134 ] )
135 ->caller( __METHOD__ )->execute();
136 $this->updateProgress( $dbw->affectedRows(), $lowPageId, $highPageId, $ns, $titleString );
137 }
138 }
139
150 private function updateProgress( $updatedInThisBatch, $lowPageId, $highPageId, $ns, $titleString ) {
151 $this->totalUpdated += $updatedInThisBatch;
152 if ( $this->totalUpdated >= $this->lastProgress + $this->getBatchSize() ) {
153 $this->lastProgress = $this->totalUpdated;
154 $this->output( "Updated {$this->totalUpdated} rows, " .
155 "at page_id $lowPageId-$highPageId title $ns:$titleString\n" );
156 $this->waitForReplication();
157 // Sleep between batches for replication to catch up
158 $sleep = (int)$this->getOption( 'sleep', 0 );
159 if ( $sleep > 0 ) {
160 sleep( $sleep );
161 }
162 }
163 }
164
165}
166
167// @codeCoverageIgnoreStart
168$maintClass = MigrateLinksTable::class;
169require_once RUN_MAINTENANCE_IF_MAIN;
170// @codeCoverageIgnoreEnd
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
waitForReplication()
Wait for replica DB servers to catch up.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Represents the target of a wiki link.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
const DB_PRIMARY
Definition defines.php:28