MediaWiki  master
refreshExternallinksIndex.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
34  public function __construct() {
35  parent::__construct();
36  $this->addDescription(
37  'Refresh the externallinks table el_index and el_index_60 from el_to' );
38  $this->setBatchSize( 10000 );
39  }
40 
41  protected function getUpdateKey() {
42  return static::class
43  . ' v' . LinkFilter::VERSION
44  . ( LinkFilter::supportsIDN() ? '+' : '-' ) . 'IDN';
45  }
46 
47  protected function updateSkippedMessage() {
48  return 'externallinks table indexes up to date.';
49  }
50 
51  protected function doDBUpdates() {
52  $dbw = $this->getDB( DB_PRIMARY );
53  if ( !$dbw->tableExists( 'externallinks', __METHOD__ ) ) {
54  $this->error( "externallinks table does not exist" );
55  return false;
56  }
57  $this->output( "Updating externallinks table index fields\n" );
58 
59  $minmax = $dbw->selectRow(
60  'externallinks',
61  [ 'min' => 'MIN(el_id)', 'max' => 'MAX(el_id)' ],
62  '',
63  __METHOD__
64  );
65 
66  $updated = 0;
67  $deleted = 0;
68  $start = $minmax->min - 1;
69  $last = (int)$minmax->max;
70  while ( $start < $last ) {
71  $end = min( $start + $this->mBatchSize, $last );
72  $this->output( "el_id $start - $end of $last\n" );
73  $res = $dbw->select( 'externallinks', [ 'el_id', 'el_to', 'el_index' ],
74  [
75  "el_id > $start",
76  "el_id <= $end",
77  ],
78  __METHOD__,
79  [ 'ORDER BY' => 'el_id' ]
80  );
81  foreach ( $res as $row ) {
82  $newIndexes = LinkFilter::makeIndexes( $row->el_to );
83  if ( !$newIndexes ) {
84  $dbw->delete( 'externallinks', [ 'el_id' => $row->el_id ], __METHOD__ );
85  $deleted++;
86  continue;
87  }
88  $newIndexes2 = [];
89  foreach ( $newIndexes as $newIndex ) {
90  $newIndexes2[] = implode( '', $newIndex );
91  }
92  if ( in_array( $row->el_index, $newIndexes2, true ) ) {
93  continue;
94  }
95 
96  if ( count( $newIndexes2 ) === 1 ) {
97  $newIndex = $newIndexes2[0];
98  } else {
99  // Assume the scheme is the only difference between the different $newIndexes.
100  // Keep this row's scheme, assuming there's another row with the other scheme.
101  $newIndex = substr( $row->el_index, 0, strpos( $row->el_index, ':' ) ) .
102  substr( $newIndexes2[0], strpos( $newIndexes2[0], ':' ) );
103  }
104  $dbw->update( 'externallinks',
105  [
106  'el_index' => $newIndex,
107  'el_index_60' => substr( $newIndex, 0, 60 ),
108  ],
109  [ 'el_id' => $row->el_id ],
110  __METHOD__
111  );
112  $updated++;
113  }
114  $this->waitForReplication();
115  $start = $end;
116  }
117  $this->output( "Done, $updated rows updated, $deleted deleted.\n" );
118 
119  return true;
120  }
121 }
122 
123 $maintClass = RefreshExternallinksIndex::class;
124 require_once RUN_MAINTENANCE_IF_MAIN;
static makeIndexes( $url)
Converts a URL into a format for el_index.
Definition: LinkFilter.php:174
static supportsIDN()
Indicate whether LinkFilter IDN support is available.
Definition: LinkFilter.php:90
const VERSION
Increment this when makeIndexes output changes.
Definition: LinkFilter.php:41
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
addDescription( $text)
Set the description text.
setBatchSize( $s=0)
Maintenance script that refreshes the externallinks table el_index and el_index_60 from el_to.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
getUpdateKey()
Get the update key name to go in the update log table.
const DB_PRIMARY
Definition: defines.php:28