8require_once __DIR__ .
'/Maintenance.php';
19 parent::__construct();
21 'Migrate externallinks data'
25 'Sleep time (in seconds) between every batch. Default: 0',
41 ExternalLinksTable::VIRTUAL_DOMAIN
43 '@phan-var \Wikimedia\Rdbms\Database $dbw';
44 $table =
'externallinks';
45 if ( !$dbw->fieldExists( $table,
'el_to', __METHOD__ ) ) {
46 $this->
output(
"Old fields don't exist. There is no need to run this script\n" );
49 if ( !$dbw->fieldExists( $table,
'el_to_path', __METHOD__ ) ) {
50 $this->
output(
"Run update.php to create the el_to_path column.\n" );
54 $this->
output(
"Populating el_to_domain_index and el_to_path columns\n" );
57 $highestId = $dbw->newSelectQueryBuilder()
60 ->caller( __METHOD__ )
61 ->orderBy(
'el_id',
'DESC' )
64 $this->
output(
"Page table is empty.\n" );
68 while ( $id <= $highestId ) {
69 $updated += $this->handleBatch( $id );
73 $this->
output(
"Completed normalization of $table, $updated rows updated.\n" );
78 private function handleBatch(
int $lowId ): int {
79 $batchSize = $this->getBatchSize();
81 $highId = $lowId + $batchSize - 1;
82 $dbw = $this->getServiceContainer()->getConnectionProvider()->getPrimaryDatabase(
83 ExternalLinksTable::VIRTUAL_DOMAIN
86 $res = $dbw->newSelectQueryBuilder()
87 ->select( [
'el_id',
'el_to' ] )
88 ->from(
'externallinks' )
90 'el_to_domain_index' =>
'',
91 $dbw->expr(
'el_id',
'>=', $lowId ),
92 $dbw->expr(
'el_id',
'<=', $highId ),
95 ->caller( __METHOD__ )
97 if ( !$res->numRows() ) {
100 foreach ( $res as $row ) {
102 $paths = LinkFilter::makeIndexes(
$url );
106 $dbw->newUpdateQueryBuilder()
107 ->update(
'externallinks' )
110 'el_to_domain_index' => substr( $paths[0][0], 0, 255 ),
111 'el_to_path' => $paths[0][1]
113 ->where( [
'el_id' => $row->el_id ] )
114 ->caller( __METHOD__ )->execute();
116 $updated += $dbw->affectedRows();
118 $this->
output(
"Updated $updated rows\n" );
121 $sleep = (int)$this->
getOption(
'sleep', 0 );
132require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Maintenance script that migrates externallinks data.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.string
doDBUpdates()
Do the actual work.All child classes will need to implement this. Return true to log the update as do...