Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 77
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateExternallinks
0.00% covered (danger)
0.00%
0 / 77
0.00% covered (danger)
0.00%
0 / 4
156
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
2
 getUpdateKey
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 doDBUpdates
0.00% covered (danger)
0.00%
0 / 27
0.00% covered (danger)
0.00%
0 / 1
30
 handleBatch
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
30
1<?php
2
3use MediaWiki\ExternalLinks\LinkFilter;
4use MediaWiki\Maintenance\LoggedUpdateMaintenance;
5
6// @codeCoverageIgnoreStart
7require_once __DIR__ . '/Maintenance.php';
8// @codeCoverageIgnoreEnd
9
10/**
11 * Maintenance script that migrates externallinks data
12 *
13 * @ingroup Maintenance
14 * @since 1.40
15 */
16class MigrateExternallinks extends LoggedUpdateMaintenance {
17    public function __construct() {
18        parent::__construct();
19        $this->addDescription(
20            'Migrate externallinks data'
21        );
22        $this->addOption(
23            'sleep',
24            'Sleep time (in seconds) between every batch. Default: 0',
25            false,
26            true
27        );
28        $this->setBatchSize( 1000 );
29    }
30
31    /** @inheritDoc */
32    protected function getUpdateKey() {
33        return __CLASS__;
34    }
35
36    /** @inheritDoc */
37    protected function doDBUpdates() {
38        $dbw = $this->getDB( DB_PRIMARY );
39        $table = 'externallinks';
40        if ( !$dbw->fieldExists( $table, 'el_to', __METHOD__ ) ) {
41            $this->output( "Old fields don't exist. There is no need to run this script\n" );
42            return true;
43        }
44        if ( !$dbw->fieldExists( $table, 'el_to_path', __METHOD__ ) ) {
45            $this->output( "Run update.php to create the el_to_path column.\n" );
46            return false;
47        }
48
49        $this->output( "Populating el_to_domain_index and el_to_path columns\n" );
50        $updated = 0;
51
52        $highestId = $dbw->newSelectQueryBuilder()
53            ->select( 'el_id' )
54            ->from( $table )
55            ->limit( 1 )
56            ->caller( __METHOD__ )
57            ->orderBy( 'el_id', 'DESC' )
58            ->fetchResultSet()->fetchRow();
59        if ( !$highestId ) {
60            $this->output( "Page table is empty.\n" );
61            return true;
62        }
63        $highestId = $highestId[0];
64        $id = 0;
65        while ( $id <= $highestId ) {
66            $updated += $this->handleBatch( $id );
67            $id += $this->getBatchSize();
68        }
69
70        $this->output( "Completed normalization of $table$updated rows updated.\n" );
71
72        return true;
73    }
74
75    private function handleBatch( int $lowId ): int {
76        $batchSize = $this->getBatchSize();
77        // range is inclusive, let's subtract one.
78        $highId = $lowId + $batchSize - 1;
79        $dbw = $this->getPrimaryDB();
80        $updated = 0;
81        $res = $dbw->newSelectQueryBuilder()
82            ->select( [ 'el_id', 'el_to' ] )
83            ->from( 'externallinks' )
84            ->where( [
85                'el_to_domain_index' => '',
86                $dbw->expr( 'el_id', '>=', $lowId ),
87                $dbw->expr( 'el_id', '<=', $highId ),
88            ] )
89            ->limit( $batchSize )
90            ->caller( __METHOD__ )
91            ->fetchResultSet();
92        if ( !$res->numRows() ) {
93            return $updated;
94        }
95        foreach ( $res as $row ) {
96            $url = $row->el_to;
97            $paths = LinkFilter::makeIndexes( $url );
98            if ( !$paths ) {
99                continue;
100            }
101            $dbw->newUpdateQueryBuilder()
102                ->update( 'externallinks' )
103                // just take the first one, we are not sending proto-relative to LinkFilter
104                ->set( [
105                    'el_to_domain_index' => substr( $paths[0][0], 0, 255 ),
106                    'el_to_path' => $paths[0][1]
107                ] )
108                ->where( [ 'el_id' => $row->el_id ] )
109                ->caller( __METHOD__ )->execute();
110
111            $updated += $dbw->affectedRows();
112        }
113        $this->output( "Updated $updated rows\n" );
114        // Sleep between batches for replication to catch up
115        $this->waitForReplication();
116        $sleep = (int)$this->getOption( 'sleep', 0 );
117        if ( $sleep > 0 ) {
118            sleep( $sleep );
119        }
120        return $updated;
121    }
122
123}
124
125// @codeCoverageIgnoreStart
126$maintClass = MigrateExternallinks::class;
127require_once RUN_MAINTENANCE_IF_MAIN;
128// @codeCoverageIgnoreEnd