MediaWiki  master
refreshExternallinksIndex.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
27 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription(
39  'Refresh the externallinks table el_index and el_index_60 from el_to' );
40  $this->setBatchSize( 10000 );
41  }
42 
43  protected function getUpdateKey() {
44  return static::class
45  . ' v' . LinkFilter::VERSION
46  . ( LinkFilter::supportsIDN() ? '+' : '-' ) . 'IDN';
47  }
48 
49  protected function updateSkippedMessage() {
50  return 'externallinks table indexes up to date';
51  }
52 
53  protected function doDBUpdates() {
54  $dbw = $this->getDB( DB_MASTER );
55  if ( !$dbw->tableExists( 'externallinks', __METHOD__ ) ) {
56  $this->error( "externallinks table does not exist" );
57  return false;
58  }
59  $this->output( "Updating externallinks table index fields\n" );
60 
61  $minmax = $dbw->selectRow(
62  'externallinks',
63  [ 'min' => 'MIN(el_id)', 'max' => 'MAX(el_id)' ],
64  '',
65  __METHOD__
66  );
67 
68  $updated = 0;
69  $deleted = 0;
70  $start = $minmax->min - 1;
71  $last = $minmax->max;
72  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
73  while ( $start < $last ) {
74  $end = min( $start + $this->mBatchSize, $last );
75  $this->output( "el_id $start - $end of $last\n" );
76  $res = $dbw->select( 'externallinks', [ 'el_id', 'el_to', 'el_index' ],
77  [
78  "el_id > $start",
79  "el_id <= $end",
80  ],
81  __METHOD__,
82  [ 'ORDER BY' => 'el_id' ]
83  );
84  foreach ( $res as $row ) {
85  $newIndexes = LinkFilter::makeIndexes( $row->el_to );
86  if ( !$newIndexes ) {
87  $dbw->delete( 'externallinks', [ 'el_id' => $row->el_id ], __METHOD__ );
88  $deleted++;
89  continue;
90  }
91  if ( in_array( $row->el_index, $newIndexes, true ) ) {
92  continue;
93  }
94 
95  if ( count( $newIndexes ) === 1 ) {
96  $newIndex = $newIndexes[0];
97  } else {
98  // Assume the scheme is the only difference between the different $newIndexes.
99  // Keep this row's scheme, assuming there's another row with the other scheme.
100  $newIndex = substr( $row->el_index, 0, strpos( $row->el_index, ':' ) ) .
101  substr( $newIndexes[0], strpos( $newIndexes[0], ':' ) );
102  }
103  $dbw->update( 'externallinks',
104  [
105  'el_index' => $newIndex,
106  'el_index_60' => substr( $newIndex, 0, 60 ),
107  ],
108  [ 'el_id' => $row->el_id ],
109  __METHOD__
110  );
111  $updated++;
112  }
113  $lbFactory->waitForReplication();
114  $start = $end;
115  }
116  $this->output( "Done, $updated rows updated, $deleted deleted.\n" );
117 
118  return true;
119  }
120 }
121 
122 $maintClass = RefreshExternallinksIndex::class;
123 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:38
LinkFilter\supportsIDN
static supportsIDN()
Indicate whether LinkFilter IDN support is available.
Definition: LinkFilter.php:90
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:163
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:327
RefreshExternallinksIndex\__construct
__construct()
Default constructor.
Definition: refreshExternallinksIndex.php:36
$res
$res
Definition: testCompression.php:57
RefreshExternallinksIndex\updateSkippedMessage
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
Definition: refreshExternallinksIndex.php:49
LinkFilter\makeIndexes
static makeIndexes( $url)
Converts a URL into a format for el_index.
Definition: LinkFilter.php:173
LinkFilter\VERSION
const VERSION
Increment this when makeIndexes output changes.
Definition: LinkFilter.php:41
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: LoggedUpdateMaintenance.php:26
$maintClass
$maintClass
Definition: refreshExternallinksIndex.php:122
DB_MASTER
const DB_MASTER
Definition: defines.php:26
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1369
RefreshExternallinksIndex\doDBUpdates
doDBUpdates()
Do the actual work.
Definition: refreshExternallinksIndex.php:53
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:462
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:433
RefreshExternallinksIndex\getUpdateKey
getUpdateKey()
Get the update key name to go in the update log table.
Definition: refreshExternallinksIndex.php:43
RefreshExternallinksIndex
Maintenance script that refreshes the externallinks table el_index and el_index_60 from el_to.
Definition: refreshExternallinksIndex.php:35
Maintenance\setBatchSize
setBatchSize( $s=0)
Definition: Maintenance.php:373