Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 98
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateLinksTable
0.00% covered (danger)
0.00%
0 / 98
0.00% covered (danger)
0.00%
0 / 5
240
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
2
 getUpdateKey
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 doDBUpdates
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 1
56
 handlePageBatch
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
12
 updateProgress
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
7use MediaWiki\Title\TitleValue;
8
9/**
10 * Maintenance script that populates normalization column in links tables.
11 *
12 * @ingroup Maintenance
13 * @since 1.39
14 */
15class MigrateLinksTable extends LoggedUpdateMaintenance {
16    /** @var int */
17    private $totalUpdated = 0;
18    /** @var int */
19    private $lastProgress = 0;
20
21    public function __construct() {
22        parent::__construct();
23        $this->addDescription(
24            'Populates normalization column in links tables.'
25        );
26        $this->addOption(
27            'table',
28            'Table name. Like pagelinks.',
29            true,
30            true
31        );
32        $this->addOption(
33            'sleep',
34            'Sleep time (in seconds) between every batch. Default: 0',
35            false,
36            true
37        );
38        $this->setBatchSize( 1000 );
39    }
40
41    protected function getUpdateKey() {
42        return __CLASS__ . $this->getOption( 'table', '' );
43    }
44
45    protected function doDBUpdates() {
46        $dbw = $this->getDB( DB_PRIMARY );
47        $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
48        $table = $this->getOption( 'table', '' );
49        if ( !isset( $mapping[$table] ) ) {
50            $this->output( "Mapping for this table doesn't exist yet.\n" );
51            return false;
52        }
53        $targetColumn = $mapping[$table]['target_id'];
54        if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
55            $this->output( "Old fields don't exist. There is no need to run this script\n" );
56            return true;
57        }
58        if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
59            $this->output( "Run update.php to create the $targetColumn column.\n" );
60            return false;
61        }
62        if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
63            $this->output( "Run update.php to create the linktarget table.\n" );
64            return true;
65        }
66
67        $this->output( "Populating the $targetColumn column\n" );
68        $updated = 0;
69
70        $highestPageId = $dbw->newSelectQueryBuilder()
71            ->select( 'page_id' )
72            ->from( 'page' )
73            ->limit( 1 )
74            ->caller( __METHOD__ )
75            ->orderBy( 'page_id', 'DESC' )
76            ->fetchResultSet()->fetchRow();
77        if ( !$highestPageId ) {
78            $this->output( "Page table is empty.\n" );
79            return true;
80        }
81        $highestPageId = $highestPageId[0];
82        $pageId = 0;
83        while ( $pageId <= $highestPageId ) {
84            // Given the indexes and the structure of links tables,
85            // we need to split the update into batches of pages.
86            // Otherwise the queries will take a really long time in production and cause read-only.
87            $this->handlePageBatch( $pageId, $mapping, $table );
88            $pageId += $this->getBatchSize();
89        }
90
91        $this->output( "Completed normalization of $table{$this->totalUpdated} rows updated.\n" );
92
93        return true;
94    }
95
96    private function handlePageBatch( $lowPageId, $mapping, $table ) {
97        $batchSize = $this->getBatchSize();
98        $targetColumn = $mapping[$table]['target_id'];
99        $pageIdColumn = $mapping[$table]['page_id'];
100        // range is inclusive, let's subtract one.
101        $highPageId = $lowPageId + $batchSize - 1;
102        $dbw = $this->getPrimaryDB();
103
104        while ( true ) {
105            $res = $dbw->newSelectQueryBuilder()
106                ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
107                ->from( $table )
108                ->where( [
109                    $targetColumn => [ null, 0 ],
110                    $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
111                    $dbw->expr( $pageIdColumn, '<=', $highPageId ),
112                ] )
113                ->limit( 1 )
114                ->caller( __METHOD__ )
115                ->fetchResultSet();
116            if ( !$res->numRows() ) {
117                break;
118            }
119            $row = $res->fetchRow();
120            $ns = $row[$mapping[$table]['ns']];
121            $titleString = $row[$mapping[$table]['title']];
122            $title = new TitleValue( (int)$ns, $titleString );
123            $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
124            $dbw->newUpdateQueryBuilder()
125                ->update( $table )
126                ->set( [ $targetColumn => $id ] )
127                ->where( [
128                    $targetColumn => [ null, 0 ],
129                    $mapping[$table]['ns'] => $ns,
130                    $mapping[$table]['title'] => $titleString,
131                    $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
132                    $dbw->expr( $pageIdColumn, '<=', $highPageId ),
133                ] )
134                ->caller( __METHOD__ )->execute();
135            $this->updateProgress( $dbw->affectedRows(), $lowPageId, $highPageId, $ns, $titleString );
136        }
137    }
138
139    /**
140     * Update the total progress metric. If enough progress has been made,
141     * report to the user and do a replication wait.
142     *
143     * @param int $updatedInThisBatch
144     * @param int $lowPageId
145     * @param int $highPageId
146     * @param int $ns
147     * @param string $titleString
148     */
149    private function updateProgress( $updatedInThisBatch, $lowPageId, $highPageId, $ns, $titleString ) {
150        $this->totalUpdated += $updatedInThisBatch;
151        if ( $this->totalUpdated >= $this->lastProgress + $this->getBatchSize() ) {
152            $this->lastProgress = $this->totalUpdated;
153            $this->output( "Updated {$this->totalUpdated} rows, " .
154                "at page_id $lowPageId-$highPageId title $ns:$titleString\n" );
155            $this->waitForReplication();
156            // Sleep between batches for replication to catch up
157            $sleep = (int)$this->getOption( 'sleep', 0 );
158            if ( $sleep > 0 ) {
159                sleep( $sleep );
160            }
161        }
162    }
163
164}
165
166// @codeCoverageIgnoreStart
167$maintClass = MigrateLinksTable::class;
168require_once RUN_MAINTENANCE_IF_MAIN;
169// @codeCoverageIgnoreEnd