Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 100
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateLinksTable
0.00% covered (danger)
0.00%
0 / 97
0.00% covered (danger)
0.00%
0 / 4
182
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
2
 getUpdateKey
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 doDBUpdates
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 1
56
 handlePageBatch
0.00% covered (danger)
0.00%
0 / 44
0.00% covered (danger)
0.00%
0 / 1
20
1<?php
2
3require_once __DIR__ . '/Maintenance.php';
4
5use MediaWiki\Title\TitleValue;
6
7/**
8 * Maintenance script that populates normalization column in links tables.
9 *
10 * @ingroup Maintenance
11 * @since 1.39
12 */
13class MigrateLinksTable extends LoggedUpdateMaintenance {
14    public function __construct() {
15        parent::__construct();
16        $this->addDescription(
17            'Populates normalization column in links tables.'
18        );
19        $this->addOption(
20            'table',
21            'Table name. Like templatelinks.',
22            true,
23            true
24        );
25        $this->addOption(
26            'sleep',
27            'Sleep time (in seconds) between every batch. Default: 0',
28            false,
29            true
30        );
31        $this->setBatchSize( 1000 );
32    }
33
34    protected function getUpdateKey() {
35        return __CLASS__ . $this->getOption( 'table', '' );
36    }
37
38    protected function doDBUpdates() {
39        $dbw = $this->getDB( DB_PRIMARY );
40        $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
41        $table = $this->getOption( 'table', '' );
42        if ( !isset( $mapping[$table] ) ) {
43            $this->output( "Mapping for this table doesn't exist yet.\n" );
44            return false;
45        }
46        $targetColumn = $mapping[$table]['target_id'];
47        if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
48            $this->output( "Old fields don't exist. There is no need to run this script\n" );
49            return true;
50        }
51        if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
52            $this->output( "Run update.php to create the $targetColumn column.\n" );
53            return false;
54        }
55        if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
56            $this->output( "Run update.php to create the linktarget table.\n" );
57            return true;
58        }
59
60        $this->output( "Populating the $targetColumn column\n" );
61        $updated = 0;
62
63        $highestPageId = $dbw->newSelectQueryBuilder()
64            ->select( 'page_id' )
65            ->from( 'page' )
66            ->limit( 1 )
67            ->caller( __METHOD__ )
68            ->orderBy( 'page_id', 'DESC' )
69            ->fetchResultSet()->fetchRow();
70        if ( !$highestPageId ) {
71            $this->output( "Page table is empty.\n" );
72            return true;
73        }
74        $highestPageId = $highestPageId[0];
75        $pageId = 0;
76        while ( $pageId <= $highestPageId ) {
77            // Given the indexes and the structure of links tables,
78            // we need to split the update into batches of pages.
79            // Otherwise the queries will take a really long time in production and cause read-only.
80            $updated += $this->handlePageBatch( $pageId, $mapping, $table );
81            $pageId += $this->getBatchSize();
82        }
83
84        $this->output( "Completed normalization of $table$updated rows updated.\n" );
85
86        return true;
87    }
88
89    private function handlePageBatch( $lowPageId, $mapping, $table ) {
90        $batchSize = $this->getBatchSize();
91        $targetColumn = $mapping[$table]['target_id'];
92        $pageIdColumn = $mapping[$table]['page_id'];
93        // BETWEEN is inclusive, let's subtract one.
94        $highPageId = $lowPageId + $batchSize - 1;
95        $dbw = $this->getPrimaryDB();
96        $updated = 0;
97
98        while ( true ) {
99            $res = $dbw->newSelectQueryBuilder()
100                ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
101                ->from( $table )
102                ->where( [
103                    $targetColumn => [ null, 0 ],
104                    "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
105                ] )
106                ->limit( 1 )
107                ->caller( __METHOD__ )
108                ->fetchResultSet();
109            if ( !$res->numRows() ) {
110                break;
111            }
112            $row = $res->fetchRow();
113            $ns = $row[$mapping[$table]['ns']];
114            $titleString = $row[$mapping[$table]['title']];
115            $title = new TitleValue( (int)$ns, $titleString );
116            $this->output( "Starting backfill of $ns:$titleString " .
117                "title on pages between $lowPageId and $highPageId\n" );
118            $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
119            $dbw->newUpdateQueryBuilder()
120                ->update( $table )
121                ->set( [ $targetColumn => $id ] )
122                ->where( [
123                    $targetColumn => [ null, 0 ],
124                    $mapping[$table]['ns'] => $ns,
125                    $mapping[$table]['title'] => $titleString,
126                    "$pageIdColumn BETWEEN $lowPageId AND $highPageId"
127                ] )
128                ->caller( __METHOD__ )->execute();
129            $updatedInThisBatch = $dbw->affectedRows();
130            $updated += $updatedInThisBatch;
131            $this->output( "Updated $updatedInThisBatch rows\n" );
132            // Sleep between batches for replication to catch up
133            $this->waitForReplication();
134            $sleep = (int)$this->getOption( 'sleep', 0 );
135            if ( $sleep > 0 ) {
136                sleep( $sleep );
137            }
138        }
139        return $updated;
140    }
141
142}
143
144$maintClass = MigrateLinksTable::class;
145require_once RUN_MAINTENANCE_IF_MAIN;