MediaWiki master
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
8
16 public function __construct() {
17 parent::__construct();
18 $this->addDescription(
19 'Populates normalization column in links tables.'
20 );
21 $this->addOption(
22 'table',
23 'Table name. Like pagelinks.',
24 true,
25 true
26 );
27 $this->addOption(
28 'sleep',
29 'Sleep time (in seconds) between every batch. Default: 0',
30 false,
31 true
32 );
33 $this->setBatchSize( 1000 );
34 }
35
36 protected function getUpdateKey() {
37 return __CLASS__ . $this->getOption( 'table', '' );
38 }
39
40 protected function doDBUpdates() {
41 $dbw = $this->getDB( DB_PRIMARY );
42 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
43 $table = $this->getOption( 'table', '' );
44 if ( !isset( $mapping[$table] ) ) {
45 $this->output( "Mapping for this table doesn't exist yet.\n" );
46 return false;
47 }
48 $targetColumn = $mapping[$table]['target_id'];
49 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
50 $this->output( "Old fields don't exist. There is no need to run this script\n" );
51 return true;
52 }
53 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
54 $this->output( "Run update.php to create the $targetColumn column.\n" );
55 return false;
56 }
57 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
58 $this->output( "Run update.php to create the linktarget table.\n" );
59 return true;
60 }
61
62 $this->output( "Populating the $targetColumn column\n" );
63 $updated = 0;
64
65 $highestPageId = $dbw->newSelectQueryBuilder()
66 ->select( 'page_id' )
67 ->from( 'page' )
68 ->limit( 1 )
69 ->caller( __METHOD__ )
70 ->orderBy( 'page_id', 'DESC' )
71 ->fetchResultSet()->fetchRow();
72 if ( !$highestPageId ) {
73 $this->output( "Page table is empty.\n" );
74 return true;
75 }
76 $highestPageId = $highestPageId[0];
77 $pageId = 0;
78 while ( $pageId <= $highestPageId ) {
79 // Given the indexes and the structure of links tables,
80 // we need to split the update into batches of pages.
81 // Otherwise the queries will take a really long time in production and cause read-only.
82 $updated += $this->handlePageBatch( $pageId, $mapping, $table );
83 $pageId += $this->getBatchSize();
84 }
85
86 $this->output( "Completed normalization of $table, $updated rows updated.\n" );
87
88 return true;
89 }
90
91 private function handlePageBatch( $lowPageId, $mapping, $table ) {
92 $batchSize = $this->getBatchSize();
93 $targetColumn = $mapping[$table]['target_id'];
94 $pageIdColumn = $mapping[$table]['page_id'];
95 // range is inclusive, let's subtract one.
96 $highPageId = $lowPageId + $batchSize - 1;
97 $dbw = $this->getPrimaryDB();
98 $updated = 0;
99
100 while ( true ) {
101 $res = $dbw->newSelectQueryBuilder()
102 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
103 ->from( $table )
104 ->where( [
105 $targetColumn => [ null, 0 ],
106 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
107 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
108 ] )
109 ->limit( 1 )
110 ->caller( __METHOD__ )
111 ->fetchResultSet();
112 if ( !$res->numRows() ) {
113 break;
114 }
115 $row = $res->fetchRow();
116 $ns = $row[$mapping[$table]['ns']];
117 $titleString = $row[$mapping[$table]['title']];
118 $title = new TitleValue( (int)$ns, $titleString );
119 $this->output( "Starting backfill of $ns:$titleString " .
120 "title on pages between $lowPageId and $highPageId\n" );
121 $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
122 $dbw->newUpdateQueryBuilder()
123 ->update( $table )
124 ->set( [ $targetColumn => $id ] )
125 ->where( [
126 $targetColumn => [ null, 0 ],
127 $mapping[$table]['ns'] => $ns,
128 $mapping[$table]['title'] => $titleString,
129 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
130 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
131 ] )
132 ->caller( __METHOD__ )->execute();
133 $updatedInThisBatch = $dbw->affectedRows();
134 $updated += $updatedInThisBatch;
135 $this->output( "Updated $updatedInThisBatch rows\n" );
136 // Sleep between batches for replication to catch up
137 $this->waitForReplication();
138 $sleep = (int)$this->getOption( 'sleep', 0 );
139 if ( $sleep > 0 ) {
140 sleep( $sleep );
141 }
142 }
143 return $updated;
144 }
145
146}
147
148// @codeCoverageIgnoreStart
149$maintClass = MigrateLinksTable::class;
150require_once RUN_MAINTENANCE_IF_MAIN;
151// @codeCoverageIgnoreEnd
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Represents the target of a wiki link.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
const DB_PRIMARY
Definition defines.php:28