MediaWiki fundraising/REL1_35
refreshExternallinksIndex.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Maintenance.php';
25
27
36 public function __construct() {
37 parent::__construct();
38 $this->addDescription(
39 'Refresh the externallinks table el_index and el_index_60 from el_to' );
40 $this->setBatchSize( 10000 );
41 }
42
43 protected function getUpdateKey() {
44 return static::class
45 . ' v' . LinkFilter::VERSION
46 . ( LinkFilter::supportsIDN() ? '+' : '-' ) . 'IDN';
47 }
48
49 protected function updateSkippedMessage() {
50 return 'externallinks table indexes up to date';
51 }
52
53 protected function doDBUpdates() {
54 $dbw = $this->getDB( DB_MASTER );
55 if ( !$dbw->tableExists( 'externallinks', __METHOD__ ) ) {
56 $this->error( "externallinks table does not exist" );
57 return false;
58 }
59 $this->output( "Updating externallinks table index fields\n" );
60
61 $minmax = $dbw->selectRow(
62 'externallinks',
63 [ 'min' => 'MIN(el_id)', 'max' => 'MAX(el_id)' ],
64 '',
65 __METHOD__
66 );
67
68 $updated = 0;
69 $deleted = 0;
70 $start = $minmax->min - 1;
71 $last = $minmax->max;
72 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
73 while ( $start < $last ) {
74 $end = min( $start + $this->mBatchSize, $last );
75 $this->output( "el_id $start - $end of $last\n" );
76 $res = $dbw->select( 'externallinks', [ 'el_id', 'el_to', 'el_index' ],
77 [
78 "el_id > $start",
79 "el_id <= $end",
80 ],
81 __METHOD__,
82 [ 'ORDER BY' => 'el_id' ]
83 );
84 foreach ( $res as $row ) {
85 $newIndexes = LinkFilter::makeIndexes( $row->el_to );
86 if ( !$newIndexes ) {
87 $dbw->delete( 'externallinks', [ 'el_id' => $row->el_id ], __METHOD__ );
88 $deleted++;
89 continue;
90 }
91 if ( in_array( $row->el_index, $newIndexes, true ) ) {
92 continue;
93 }
94
95 if ( count( $newIndexes ) === 1 ) {
96 $newIndex = $newIndexes[0];
97 } else {
98 // Assume the scheme is the only difference between the different $newIndexes.
99 // Keep this row's scheme, assuming there's another row with the other scheme.
100 $newIndex = substr( $row->el_index, 0, strpos( $row->el_index, ':' ) ) .
101 substr( $newIndexes[0], strpos( $newIndexes[0], ':' ) );
102 }
103 $dbw->update( 'externallinks',
104 [
105 'el_index' => $newIndex,
106 'el_index_60' => substr( $newIndex, 0, 60 ),
107 ],
108 [ 'el_id' => $row->el_id ],
109 __METHOD__
110 );
111 $updated++;
112 }
113 $lbFactory->waitForReplication();
114 $start = $end;
115 }
116 $this->output( "Done, $updated rows updated, $deleted deleted.\n" );
117
118 return true;
119 }
120}
121
122$maintClass = RefreshExternallinksIndex::class;
123require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
const RUN_MAINTENANCE_IF_MAIN
static makeIndexes( $url)
Converts a URL into a format for el_index.
static supportsIDN()
Indicate whether LinkFilter IDN support is available.
const VERSION
Increment this when makeIndexes output changes.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
addDescription( $text)
Set the description text.
setBatchSize( $s=0)
Set the batch size.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that refreshes the externallinks table el_index and el_index_60 from el_to.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
getUpdateKey()
Get the update key name to go in the update log table.
const DB_MASTER
Definition defines.php:29