MediaWiki  master
migrateLinksTable.php
Go to the documentation of this file.
1 <?php
2 
4 
5 require_once __DIR__ . '/Maintenance.php';
6 
14  public function __construct() {
15  parent::__construct();
16  $this->addDescription(
17  'Populates normalization column in links tables.'
18  );
19  $this->addOption(
20  'table',
21  'Table name. Like templatelinks.',
22  true,
23  true
24  );
25  $this->addOption(
26  'sleep',
27  'Sleep time (in seconds) between every batch. Default: 0',
28  false,
29  true
30  );
31  $this->setBatchSize( 1000 );
32  }
33 
34  protected function getUpdateKey() {
35  return __CLASS__ . $this->getOption( 'table', '' );
36  }
37 
38  protected function doDBUpdates() {
39  $dbw = $this->getDB( DB_PRIMARY );
41  $table = $this->getOption( 'table', '' );
42  if ( !isset( $mapping[$table] ) ) {
43  $this->output( "Mapping for this table doesn't exist yet.\n" );
44  return false;
45  }
46  $targetColumn = $mapping[$table]['target_id'];
47  if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
48  $this->output( "Old fields don't exist. There is no need to run this script\n" );
49  return true;
50  }
51  if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
52  $this->output( "Run update.php to create the $targetColumn column.\n" );
53  return false;
54  }
55  if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
56  $this->output( "Run update.php to create the linktarget table.\n" );
57  return true;
58  }
59 
60  $this->output( "Populating the $targetColumn column\n" );
61  $updated = 0;
62 
63  $highestPageId = $dbw->newSelectQueryBuilder()
64  ->select( 'page_id' )
65  ->from( 'page' )
66  ->limit( 1 )
67  ->caller( __METHOD__ )
68  ->orderBy( 'page_id', 'DESC' )
69  ->fetchResultSet()->fetchRow();
70  if ( !$highestPageId ) {
71  $this->output( "Page table is empty.\n" );
72  return true;
73  }
74  $highestPageId = $highestPageId[0];
75  $pageId = 0;
76  while ( $pageId <= $highestPageId ) {
77  // Given the indexes and the structure of links tables,
78  // we need to split the update into batches of pages.
79  // Otherwise the queries will take a really long time in production and cause read-only.
80  $updated += $this->handlePageBatch( $pageId, $mapping, $table );
81  $pageId += $this->getBatchSize();
82  }
83 
84  $this->output( "Completed normalization of $table, $updated rows updated.\n" );
85 
86  return true;
87  }
88 
89  private function handlePageBatch( $lowPageId, $mapping, $table ) {
90  $batchSize = $this->getBatchSize();
91  $targetColumn = $mapping[$table]['target_id'];
92  $pageIdColumn = $mapping[$table]['page_id'];
93  // BETWEEN is inclusive, let's subtract one.
94  $highPageId = $lowPageId + $batchSize - 1;
95  $dbw = $this->getDB( DB_PRIMARY );
96  $updated = 0;
97 
98  while ( true ) {
99  $res = $dbw->newSelectQueryBuilder()
100  ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
101  ->from( $table )
102  ->where( [
103  $targetColumn => null,
104  "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
105  ] )
106  ->limit( 1 )
107  ->caller( __METHOD__ )
108  ->fetchResultSet();
109  if ( !$res->numRows() ) {
110  break;
111  }
112  $row = $res->fetchRow();
113  $ns = $row[$mapping[$table]['ns']];
114  $titleString = $row[$mapping[$table]['title']];
115  $title = new TitleValue( (int)$ns, $titleString );
116  $this->output( "Starting backfill of $ns:$titleString " .
117  "title on pages between $lowPageId and $highPageId\n" );
118  $id = MediaWikiServices::getInstance()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
119  $conds = [
120  $targetColumn => null,
121  $mapping[$table]['ns'] => $ns,
122  $mapping[$table]['title'] => $titleString,
123  "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
124  ];
125  $dbw->update( $table, [ $targetColumn => $id ], $conds, __METHOD__ );
126  $updatedInThisBatch = $dbw->affectedRows();
127  $updated += $updatedInThisBatch;
128  $this->output( "Updated $updatedInThisBatch rows\n" );
129  // Sleep between batches for replication to catch up
130  $this->waitForReplication();
131  $sleep = (int)$this->getOption( 'sleep', 0 );
132  if ( $sleep > 0 ) {
133  sleep( $sleep );
134  }
135  }
136  return $updated;
137  }
138 
139 }
140 
141 $maintClass = MigrateLinksTable::class;
142 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Service locator for MediaWiki core services.
Maintenance script that populates normalization column in links tables.
doDBUpdates()
Do the actual work.
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40
const DB_PRIMARY
Definition: defines.php:28