MediaWiki master
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
9
18 private $totalUpdated = 0;
20 private $lastProgress = 0;
21
22 public function __construct() {
23 parent::__construct();
24 $this->addDescription(
25 'Populates normalization column in links tables.'
26 );
27 $this->addOption(
28 'table',
29 'Table name. Like pagelinks.',
30 true,
31 true
32 );
33 $this->addOption(
34 'sleep',
35 'Sleep time (in seconds) between every batch. Default: 0',
36 false,
37 true
38 );
39 $this->setBatchSize( 1000 );
40 }
41
43 protected function getUpdateKey() {
44 return __CLASS__ . $this->getOption( 'table', '' );
45 }
46
48 protected function doDBUpdates() {
49 $dbw = $this->getDB( DB_PRIMARY );
50 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
51 $table = $this->getOption( 'table', '' );
52 if ( !isset( $mapping[$table] ) ) {
53 $this->output( "Mapping for this table doesn't exist yet.\n" );
54 return false;
55 }
56 $targetColumn = $mapping[$table]['target_id'];
57 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
58 $this->output( "Old fields don't exist. There is no need to run this script\n" );
59 return true;
60 }
61 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
62 $this->output( "Run update.php to create the $targetColumn column.\n" );
63 return false;
64 }
65 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
66 $this->output( "Run update.php to create the linktarget table.\n" );
67 return true;
68 }
69
70 $this->output( "Populating the $targetColumn column\n" );
71 $updated = 0;
72
73 $highestPageId = $dbw->newSelectQueryBuilder()
74 ->select( 'page_id' )
75 ->from( 'page' )
76 ->limit( 1 )
77 ->caller( __METHOD__ )
78 ->orderBy( 'page_id', 'DESC' )
79 ->fetchResultSet()->fetchRow();
80 if ( !$highestPageId ) {
81 $this->output( "Page table is empty.\n" );
82 return true;
83 }
84 $highestPageId = $highestPageId[0];
85 $pageId = 0;
86 while ( $pageId <= $highestPageId ) {
87 // Given the indexes and the structure of links tables,
88 // we need to split the update into batches of pages.
89 // Otherwise the queries will take a really long time in production and cause read-only.
90 $this->handlePageBatch( $pageId, $mapping, $table );
91 $pageId += $this->getBatchSize();
92 }
93
94 $this->output( "Completed normalization of $table, {$this->totalUpdated} rows updated.\n" );
95
96 return true;
97 }
98
99 private function handlePageBatch( int $lowPageId, array $mapping, string $table ) {
100 $batchSize = $this->getBatchSize();
101 $targetColumn = $mapping[$table]['target_id'];
102 $pageIdColumn = $mapping[$table]['page_id'];
103 // range is inclusive, let's subtract one.
104 $highPageId = $lowPageId + $batchSize - 1;
105 $dbw = $this->getPrimaryDB();
106
107 while ( true ) {
108 $res = $dbw->newSelectQueryBuilder()
109 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
110 ->from( $table )
111 ->where( [
112 $targetColumn => [ null, 0 ],
113 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
114 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
115 ] )
116 ->limit( 1 )
117 ->caller( __METHOD__ )
118 ->fetchResultSet();
119 if ( !$res->numRows() ) {
120 break;
121 }
122 $row = $res->fetchRow();
123 $ns = $row[$mapping[$table]['ns']];
124 $titleString = $row[$mapping[$table]['title']];
125 $title = new TitleValue( (int)$ns, $titleString );
126 $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
127 $dbw->newUpdateQueryBuilder()
128 ->update( $table )
129 ->set( [ $targetColumn => $id ] )
130 ->where( [
131 $targetColumn => [ null, 0 ],
132 $mapping[$table]['ns'] => $ns,
133 $mapping[$table]['title'] => $titleString,
134 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
135 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
136 ] )
137 ->caller( __METHOD__ )->execute();
138 $this->updateProgress( $dbw->affectedRows(), $lowPageId, $highPageId, $ns, $titleString );
139 }
140 }
141
152 private function updateProgress( $updatedInThisBatch, $lowPageId, $highPageId, $ns, $titleString ) {
153 $this->totalUpdated += $updatedInThisBatch;
154 if ( $this->totalUpdated >= $this->lastProgress + $this->getBatchSize() ) {
155 $this->lastProgress = $this->totalUpdated;
156 $this->output( "Updated {$this->totalUpdated} rows, " .
157 "at page_id $lowPageId-$highPageId title $ns:$titleString\n" );
158 $this->waitForReplication();
159 // Sleep between batches for replication to catch up
160 $sleep = (int)$this->getOption( 'sleep', 0 );
161 if ( $sleep > 0 ) {
162 sleep( $sleep );
163 }
164 }
165 }
166
167}
168
169// @codeCoverageIgnoreStart
170$maintClass = MigrateLinksTable::class;
171require_once RUN_MAINTENANCE_IF_MAIN;
172// @codeCoverageIgnoreEnd
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
waitForReplication()
Wait for replica DB servers to catch up.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Represents the target of a wiki link.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.All child classes will need to implement this. Return true to log the update as do...
getUpdateKey()
Get the update key name to go in the update log table.string
__construct()
Default constructor.
const DB_PRIMARY
Definition defines.php:28