MediaWiki master
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
3require_once __DIR__ . '/Maintenance.php';
4
6
14 public function __construct() {
15 parent::__construct();
16 $this->addDescription(
17 'Populates normalization column in links tables.'
18 );
19 $this->addOption(
20 'table',
21 'Table name. Like templatelinks.',
22 true,
23 true
24 );
25 $this->addOption(
26 'sleep',
27 'Sleep time (in seconds) between every batch. Default: 0',
28 false,
29 true
30 );
31 $this->setBatchSize( 1000 );
32 }
33
34 protected function getUpdateKey() {
35 return __CLASS__ . $this->getOption( 'table', '' );
36 }
37
38 protected function doDBUpdates() {
39 $dbw = $this->getDB( DB_PRIMARY );
40 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
41 $table = $this->getOption( 'table', '' );
42 if ( !isset( $mapping[$table] ) ) {
43 $this->output( "Mapping for this table doesn't exist yet.\n" );
44 return false;
45 }
46 $targetColumn = $mapping[$table]['target_id'];
47 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
48 $this->output( "Old fields don't exist. There is no need to run this script\n" );
49 return true;
50 }
51 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
52 $this->output( "Run update.php to create the $targetColumn column.\n" );
53 return false;
54 }
55 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
56 $this->output( "Run update.php to create the linktarget table.\n" );
57 return true;
58 }
59
60 $this->output( "Populating the $targetColumn column\n" );
61 $updated = 0;
62
63 $highestPageId = $dbw->newSelectQueryBuilder()
64 ->select( 'page_id' )
65 ->from( 'page' )
66 ->limit( 1 )
67 ->caller( __METHOD__ )
68 ->orderBy( 'page_id', 'DESC' )
69 ->fetchResultSet()->fetchRow();
70 if ( !$highestPageId ) {
71 $this->output( "Page table is empty.\n" );
72 return true;
73 }
74 $highestPageId = $highestPageId[0];
75 $pageId = 0;
76 while ( $pageId <= $highestPageId ) {
77 // Given the indexes and the structure of links tables,
78 // we need to split the update into batches of pages.
79 // Otherwise the queries will take a really long time in production and cause read-only.
80 $updated += $this->handlePageBatch( $pageId, $mapping, $table );
81 $pageId += $this->getBatchSize();
82 }
83
84 $this->output( "Completed normalization of $table, $updated rows updated.\n" );
85
86 return true;
87 }
88
89 private function handlePageBatch( $lowPageId, $mapping, $table ) {
90 $batchSize = $this->getBatchSize();
91 $targetColumn = $mapping[$table]['target_id'];
92 $pageIdColumn = $mapping[$table]['page_id'];
93 // range is inclusive, let's subtract one.
94 $highPageId = $lowPageId + $batchSize - 1;
95 $dbw = $this->getPrimaryDB();
96 $updated = 0;
97
98 while ( true ) {
99 $res = $dbw->newSelectQueryBuilder()
100 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
101 ->from( $table )
102 ->where( [
103 $targetColumn => [ null, 0 ],
104 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
105 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
106 ] )
107 ->limit( 1 )
108 ->caller( __METHOD__ )
109 ->fetchResultSet();
110 if ( !$res->numRows() ) {
111 break;
112 }
113 $row = $res->fetchRow();
114 $ns = $row[$mapping[$table]['ns']];
115 $titleString = $row[$mapping[$table]['title']];
116 $title = new TitleValue( (int)$ns, $titleString );
117 $this->output( "Starting backfill of $ns:$titleString " .
118 "title on pages between $lowPageId and $highPageId\n" );
119 $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
120 $dbw->newUpdateQueryBuilder()
121 ->update( $table )
122 ->set( [ $targetColumn => $id ] )
123 ->where( [
124 $targetColumn => [ null, 0 ],
125 $mapping[$table]['ns'] => $ns,
126 $mapping[$table]['title'] => $titleString,
127 $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
128 $dbw->expr( $pageIdColumn, '<=', $highPageId ),
129 ] )
130 ->caller( __METHOD__ )->execute();
131 $updatedInThisBatch = $dbw->affectedRows();
132 $updated += $updatedInThisBatch;
133 $this->output( "Updated $updatedInThisBatch rows\n" );
134 // Sleep between batches for replication to catch up
135 $this->waitForReplication();
136 $sleep = (int)$this->getOption( 'sleep', 0 );
137 if ( $sleep > 0 ) {
138 sleep( $sleep );
139 }
140 }
141 return $updated;
142 }
143
144}
145
146$maintClass = MigrateLinksTable::class;
147require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Represents the target of a wiki link.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
const DB_PRIMARY
Definition defines.php:28