MediaWiki  master
migrateExternallinks.php
Go to the documentation of this file.
1 <?php
2 
4 
5 require_once __DIR__ . '/Maintenance.php';
6 
14  public function __construct() {
15  parent::__construct();
16  $this->addDescription(
17  'Migrate externallinks data'
18  );
19  $this->addOption(
20  'sleep',
21  'Sleep time (in seconds) between every batch. Default: 0',
22  false,
23  true
24  );
25  $this->setBatchSize( 1000 );
26  }
27 
28  protected function getUpdateKey() {
29  return __CLASS__;
30  }
31 
32  protected function doDBUpdates() {
33  $dbw = $this->getDB( DB_PRIMARY );
34  $table = 'externallinks';
35  if ( !$dbw->fieldExists( $table, 'el_to', __METHOD__ ) ) {
36  $this->output( "Old fields don't exist. There is no need to run this script\n" );
37  return true;
38  }
39  if ( !$dbw->fieldExists( $table, 'el_to_path', __METHOD__ ) ) {
40  $this->output( "Run update.php to create the el_to_path column.\n" );
41  return false;
42  }
43 
44  $this->output( "Populating el_to_domain_index and el_to_path columns\n" );
45  $updated = 0;
46 
47  $highestId = $dbw->newSelectQueryBuilder()
48  ->select( 'el_id' )
49  ->from( $table )
50  ->limit( 1 )
51  ->caller( __METHOD__ )
52  ->orderBy( 'el_id', 'DESC' )
53  ->fetchResultSet()->fetchRow();
54  if ( !$highestId ) {
55  $this->output( "Page table is empty.\n" );
56  return true;
57  }
58  $highestId = $highestId[0];
59  $id = 0;
60  while ( $id <= $highestId ) {
61  $updated += $this->handleBatch( $id );
62  $id += $this->getBatchSize();
63  }
64 
65  $this->output( "Completed normalization of $table, $updated rows updated.\n" );
66 
67  return true;
68  }
69 
70  private function handleBatch( $lowId ) {
71  $batchSize = $this->getBatchSize();
72  // BETWEEN is inclusive, let's subtract one.
73  $highId = $lowId + $batchSize - 1;
74  $dbw = $this->getDB( DB_PRIMARY );
75  $updated = 0;
76  $res = $dbw->newSelectQueryBuilder()
77  ->select( [ 'el_id', 'el_to' ] )
78  ->from( 'externallinks' )
79  ->where( [
80  'el_to_domain_index' => '',
81  "el_id BETWEEN $lowId AND $highId"
82  ] )
83  ->limit( $batchSize )
84  ->caller( __METHOD__ )
85  ->fetchResultSet();
86  if ( !$res->numRows() ) {
87  return $updated;
88  }
89  foreach ( $res as $row ) {
90  $url = $row->el_to;
91  $paths = LinkFilter::makeIndexes( $url );
92  if ( !$paths ) {
93  continue;
94  }
95  // just take the first one, we are not sending proto-relative to LinkFilter
96  $update = [
97  'el_to_domain_index' => substr( $paths[0][0], 0, 255 ),
98  'el_to_path' => $paths[0][1]
99  ];
100  $dbw->update( 'externallinks', $update, [ 'el_id' => $row->el_id ], __METHOD__ );
101  $updated += $dbw->affectedRows();
102  }
103  $this->output( "Updated $updated rows\n" );
104  // Sleep between batches for replication to catch up
105  $this->waitForReplication();
106  $sleep = (int)$this->getOption( 'sleep', 0 );
107  if ( $sleep > 0 ) {
108  sleep( $sleep );
109  }
110  return $updated;
111  }
112 
113 }
114 
115 $maintClass = MigrateExternallinks::class;
116 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
const DB_PRIMARY
Definition: defines.php:28