Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 77
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateExternallinks
0.00% covered (danger)
0.00%
0 / 77
0.00% covered (danger)
0.00%
0 / 4
156
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
2
 getUpdateKey
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 doDBUpdates
0.00% covered (danger)
0.00%
0 / 27
0.00% covered (danger)
0.00%
0 / 1
30
 handleBatch
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
30
1<?php
2
3use MediaWiki\ExternalLinks\LinkFilter;
4
5// @codeCoverageIgnoreStart
6require_once __DIR__ . '/Maintenance.php';
7// @codeCoverageIgnoreEnd
8
9/**
10 * Maintenance script that migrates externallinks data
11 *
12 * @ingroup Maintenance
13 * @since 1.40
14 */
15class MigrateExternallinks extends LoggedUpdateMaintenance {
16    public function __construct() {
17        parent::__construct();
18        $this->addDescription(
19            'Migrate externallinks data'
20        );
21        $this->addOption(
22            'sleep',
23            'Sleep time (in seconds) between every batch. Default: 0',
24            false,
25            true
26        );
27        $this->setBatchSize( 1000 );
28    }
29
30    protected function getUpdateKey() {
31        return __CLASS__;
32    }
33
34    protected function doDBUpdates() {
35        $dbw = $this->getDB( DB_PRIMARY );
36        $table = 'externallinks';
37        if ( !$dbw->fieldExists( $table, 'el_to', __METHOD__ ) ) {
38            $this->output( "Old fields don't exist. There is no need to run this script\n" );
39            return true;
40        }
41        if ( !$dbw->fieldExists( $table, 'el_to_path', __METHOD__ ) ) {
42            $this->output( "Run update.php to create the el_to_path column.\n" );
43            return false;
44        }
45
46        $this->output( "Populating el_to_domain_index and el_to_path columns\n" );
47        $updated = 0;
48
49        $highestId = $dbw->newSelectQueryBuilder()
50            ->select( 'el_id' )
51            ->from( $table )
52            ->limit( 1 )
53            ->caller( __METHOD__ )
54            ->orderBy( 'el_id', 'DESC' )
55            ->fetchResultSet()->fetchRow();
56        if ( !$highestId ) {
57            $this->output( "Page table is empty.\n" );
58            return true;
59        }
60        $highestId = $highestId[0];
61        $id = 0;
62        while ( $id <= $highestId ) {
63            $updated += $this->handleBatch( $id );
64            $id += $this->getBatchSize();
65        }
66
67        $this->output( "Completed normalization of $table$updated rows updated.\n" );
68
69        return true;
70    }
71
72    private function handleBatch( $lowId ) {
73        $batchSize = $this->getBatchSize();
74        // range is inclusive, let's subtract one.
75        $highId = $lowId + $batchSize - 1;
76        $dbw = $this->getPrimaryDB();
77        $updated = 0;
78        $res = $dbw->newSelectQueryBuilder()
79            ->select( [ 'el_id', 'el_to' ] )
80            ->from( 'externallinks' )
81            ->where( [
82                'el_to_domain_index' => '',
83                $dbw->expr( 'el_id', '>=', $lowId ),
84                $dbw->expr( 'el_id', '<=', $highId ),
85            ] )
86            ->limit( $batchSize )
87            ->caller( __METHOD__ )
88            ->fetchResultSet();
89        if ( !$res->numRows() ) {
90            return $updated;
91        }
92        foreach ( $res as $row ) {
93            $url = $row->el_to;
94            $paths = LinkFilter::makeIndexes( $url );
95            if ( !$paths ) {
96                continue;
97            }
98            $dbw->newUpdateQueryBuilder()
99                ->update( 'externallinks' )
100                // just take the first one, we are not sending proto-relative to LinkFilter
101                ->set( [
102                    'el_to_domain_index' => substr( $paths[0][0], 0, 255 ),
103                    'el_to_path' => $paths[0][1]
104                ] )
105                ->where( [ 'el_id' => $row->el_id ] )
106                ->caller( __METHOD__ )->execute();
107
108            $updated += $dbw->affectedRows();
109        }
110        $this->output( "Updated $updated rows\n" );
111        // Sleep between batches for replication to catch up
112        $this->waitForReplication();
113        $sleep = (int)$this->getOption( 'sleep', 0 );
114        if ( $sleep > 0 ) {
115            sleep( $sleep );
116        }
117        return $updated;
118    }
119
120}
121
122// @codeCoverageIgnoreStart
123$maintClass = MigrateExternallinks::class;
124require_once RUN_MAINTENANCE_IF_MAIN;
125// @codeCoverageIgnoreEnd