Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 98
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateLinksTable
0.00% covered (danger)
0.00%
0 / 98
0.00% covered (danger)
0.00%
0 / 5
240
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
2
 getUpdateKey
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 doDBUpdates
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 1
56
 handlePageBatch
0.00% covered (danger)
0.00%
0 / 36
0.00% covered (danger)
0.00%
0 / 1
12
 updateProgress
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3// @codeCoverageIgnoreStart
4require_once __DIR__ . '/Maintenance.php';
5// @codeCoverageIgnoreEnd
6
7use MediaWiki\Maintenance\LoggedUpdateMaintenance;
8use MediaWiki\Title\TitleValue;
9
10/**
11 * Maintenance script that populates normalization column in links tables.
12 *
13 * @ingroup Maintenance
14 * @since 1.39
15 */
16class MigrateLinksTable extends LoggedUpdateMaintenance {
17    /** @var int */
18    private $totalUpdated = 0;
19    /** @var int */
20    private $lastProgress = 0;
21
22    public function __construct() {
23        parent::__construct();
24        $this->addDescription(
25            'Populates normalization column in links tables.'
26        );
27        $this->addOption(
28            'table',
29            'Table name. Like pagelinks.',
30            true,
31            true
32        );
33        $this->addOption(
34            'sleep',
35            'Sleep time (in seconds) between every batch. Default: 0',
36            false,
37            true
38        );
39        $this->setBatchSize( 1000 );
40    }
41
42    protected function getUpdateKey() {
43        return __CLASS__ . $this->getOption( 'table', '' );
44    }
45
46    protected function doDBUpdates() {
47        $dbw = $this->getDB( DB_PRIMARY );
48        $mapping = \MediaWiki\Linker\LinksMigration::$mapping;
49        $table = $this->getOption( 'table', '' );
50        if ( !isset( $mapping[$table] ) ) {
51            $this->output( "Mapping for this table doesn't exist yet.\n" );
52            return false;
53        }
54        $targetColumn = $mapping[$table]['target_id'];
55        if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
56            $this->output( "Old fields don't exist. There is no need to run this script\n" );
57            return true;
58        }
59        if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
60            $this->output( "Run update.php to create the $targetColumn column.\n" );
61            return false;
62        }
63        if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
64            $this->output( "Run update.php to create the linktarget table.\n" );
65            return true;
66        }
67
68        $this->output( "Populating the $targetColumn column\n" );
69        $updated = 0;
70
71        $highestPageId = $dbw->newSelectQueryBuilder()
72            ->select( 'page_id' )
73            ->from( 'page' )
74            ->limit( 1 )
75            ->caller( __METHOD__ )
76            ->orderBy( 'page_id', 'DESC' )
77            ->fetchResultSet()->fetchRow();
78        if ( !$highestPageId ) {
79            $this->output( "Page table is empty.\n" );
80            return true;
81        }
82        $highestPageId = $highestPageId[0];
83        $pageId = 0;
84        while ( $pageId <= $highestPageId ) {
85            // Given the indexes and the structure of links tables,
86            // we need to split the update into batches of pages.
87            // Otherwise the queries will take a really long time in production and cause read-only.
88            $this->handlePageBatch( $pageId, $mapping, $table );
89            $pageId += $this->getBatchSize();
90        }
91
92        $this->output( "Completed normalization of $table{$this->totalUpdated} rows updated.\n" );
93
94        return true;
95    }
96
97    private function handlePageBatch( $lowPageId, $mapping, $table ) {
98        $batchSize = $this->getBatchSize();
99        $targetColumn = $mapping[$table]['target_id'];
100        $pageIdColumn = $mapping[$table]['page_id'];
101        // range is inclusive, let's subtract one.
102        $highPageId = $lowPageId + $batchSize - 1;
103        $dbw = $this->getPrimaryDB();
104
105        while ( true ) {
106            $res = $dbw->newSelectQueryBuilder()
107                ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
108                ->from( $table )
109                ->where( [
110                    $targetColumn => [ null, 0 ],
111                    $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
112                    $dbw->expr( $pageIdColumn, '<=', $highPageId ),
113                ] )
114                ->limit( 1 )
115                ->caller( __METHOD__ )
116                ->fetchResultSet();
117            if ( !$res->numRows() ) {
118                break;
119            }
120            $row = $res->fetchRow();
121            $ns = $row[$mapping[$table]['ns']];
122            $titleString = $row[$mapping[$table]['title']];
123            $title = new TitleValue( (int)$ns, $titleString );
124            $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
125            $dbw->newUpdateQueryBuilder()
126                ->update( $table )
127                ->set( [ $targetColumn => $id ] )
128                ->where( [
129                    $targetColumn => [ null, 0 ],
130                    $mapping[$table]['ns'] => $ns,
131                    $mapping[$table]['title'] => $titleString,
132                    $dbw->expr( $pageIdColumn, '>=', $lowPageId ),
133                    $dbw->expr( $pageIdColumn, '<=', $highPageId ),
134                ] )
135                ->caller( __METHOD__ )->execute();
136            $this->updateProgress( $dbw->affectedRows(), $lowPageId, $highPageId, $ns, $titleString );
137        }
138    }
139
140    /**
141     * Update the total progress metric. If enough progress has been made,
142     * report to the user and do a replication wait.
143     *
144     * @param int $updatedInThisBatch
145     * @param int $lowPageId
146     * @param int $highPageId
147     * @param int $ns
148     * @param string $titleString
149     */
150    private function updateProgress( $updatedInThisBatch, $lowPageId, $highPageId, $ns, $titleString ) {
151        $this->totalUpdated += $updatedInThisBatch;
152        if ( $this->totalUpdated >= $this->lastProgress + $this->getBatchSize() ) {
153            $this->lastProgress = $this->totalUpdated;
154            $this->output( "Updated {$this->totalUpdated} rows, " .
155                "at page_id $lowPageId-$highPageId title $ns:$titleString\n" );
156            $this->waitForReplication();
157            // Sleep between batches for replication to catch up
158            $sleep = (int)$this->getOption( 'sleep', 0 );
159            if ( $sleep > 0 ) {
160                sleep( $sleep );
161            }
162        }
163    }
164
165}
166
167// @codeCoverageIgnoreStart
168$maintClass = MigrateLinksTable::class;
169require_once RUN_MAINTENANCE_IF_MAIN;
170// @codeCoverageIgnoreEnd