MediaWiki REL1_39
migrateLinksTable.php
Go to the documentation of this file.
1<?php
2
4
5require_once __DIR__ . '/Maintenance.php';
6
14 public function __construct() {
15 parent::__construct();
16 $this->addDescription(
17 'Populates normalization column in links tables.'
18 );
19 $this->addOption(
20 'table',
21 'Table name. Like templatelinks.',
22 true,
23 true
24 );
25 $this->addOption(
26 'sleep',
27 'Sleep time (in seconds) between every batch. Default: 0',
28 false,
29 true
30 );
31 $this->setBatchSize( 1000 );
32 }
33
34 protected function getUpdateKey() {
35 return __CLASS__ . $this->getOption( 'table', '' );
36 }
37
38 protected function doDBUpdates() {
39 $dbw = $this->getDB( DB_PRIMARY );
40 $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
41 $table = $this->getOption( 'table', '' );
42 if ( !isset( $mapping[$table] ) ) {
43 $this->output( "Mapping for this table doesn't exist yet.\n" );
44 return false;
45 }
46 $targetColumn = $mapping[$table]['target_id'];
47 if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
48 $this->output( "Old fields don't exist. There is no need to run this script\n" );
49 return true;
50 }
51 if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
52 $this->output( "Run update.php to create the $targetColumn column.\n" );
53 return false;
54 }
55 if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
56 $this->output( "Run update.php to create the linktarget table.\n" );
57 return true;
58 }
59
60 $this->output( "Populating the $targetColumn column\n" );
61 $updated = 0;
62
63 $highestPageId = $dbw->newSelectQueryBuilder()
64 ->select( 'page_id' )
65 ->from( 'page' )
66 ->limit( 1 )
67 ->caller( __METHOD__ )
68 ->orderBy( 'page_id', 'DESC' )
69 ->fetchResultSet()->fetchRow();
70 if ( !$highestPageId ) {
71 $this->output( "Page table is empty.\n" );
72 return true;
73 }
74 $highestPageId = $highestPageId[0];
75 $pageId = 0;
76 while ( $pageId <= $highestPageId ) {
77 // Given the indexes and the structure of links tables,
78 // we need to split the update into batches of pages.
79 // Otherwise the queries will take a really long time in production and cause read-only.
80 $updated += $this->handlePageBatch( $pageId, $mapping, $table );
81 $pageId += $this->getBatchSize();
82 }
83
84 $this->output( "Completed normalization of $table, $updated rows updated.\n" );
85
86 return true;
87 }
88
89 private function handlePageBatch( $lowPageId, $mapping, $table ) {
90 $batchSize = $this->getBatchSize();
91 $targetColumn = $mapping[$table]['target_id'];
92 $pageIdColumn = $mapping[$table]['page_id'];
93 // BETWEEN is inclusive, let's subtract one.
94 $highPageId = $lowPageId + $batchSize - 1;
95 $dbw = $this->getDB( DB_PRIMARY );
96 $updated = 0;
97
98 while ( true ) {
99 $res = $dbw->newSelectQueryBuilder()
100 ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
101 ->from( $table )
102 ->where( [
103 $targetColumn => null,
104 "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
105 ] )
106 ->limit( 1 )
107 ->caller( __METHOD__ )
108 ->fetchResultSet();
109 if ( !$res->numRows() ) {
110 break;
111 }
112 $row = $res->fetchRow();
113 $ns = $row[$mapping[$table]['ns']];
114 $titleString = $row[$mapping[$table]['title']];
115 $title = new TitleValue( (int)$ns, $titleString );
116 $this->output( "Starting backfill of $ns:$titleString " .
117 "title on pages between $lowPageId and $highPageId\n" );
118 $id = MediaWikiServices::getInstance()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
119 $conds = [
120 $targetColumn => null,
121 $mapping[$table]['ns'] => $ns,
122 $mapping[$table]['title'] => $titleString,
123 "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
124 ];
125 $dbw->update( $table, [ $targetColumn => $id ], $conds, __METHOD__ );
126 $updatedInThisBatch = $dbw->affectedRows();
127 $updated += $updatedInThisBatch;
128 $this->output( "Updated $updatedInThisBatch rows\n" );
129 // Sleep between batches for replication to catch up
130 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
131 $sleep = (int)$this->getOption( 'sleep', 0 );
132 if ( $sleep > 0 ) {
133 sleep( $sleep );
134 }
135 }
136 return $updated;
137 }
138
139}
140
141$maintClass = MigrateLinksTable::class;
142require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Service locator for MediaWiki core services.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Represents a page (or page fragment) title within MediaWiki.
const DB_PRIMARY
Definition defines.php:28