Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
89.74% covered (warning)
89.74%
70 / 78
80.00% covered (warning)
80.00%
4 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
RecalculateCognateNormalizedHashes
95.89% covered (success)
95.89%
70 / 73
80.00% covered (warning)
80.00%
4 / 5
12
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 setupServices
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 execute
94.44% covered (success)
94.44%
51 / 54
0.00% covered (danger)
0.00%
0 / 1
8.01
 getLowestRawKey
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 normalizeAndHash
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace Cognate;
4
5use Maintenance;
6use MediaWiki\MediaWikiServices;
7use Wikimedia\Rdbms\Database;
8use Wikimedia\Rdbms\DBUnexpectedError;
9use Wikimedia\Rdbms\SelectQueryBuilder;
10
11if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
12    require_once getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php';
13} else {
14    require_once __DIR__ . '/../../../maintenance/Maintenance.php';
15}
16
17/**
18 * Maintenance script for recalculating the normalized Cognate hashes
19 *
20 * @license GPL-2.0-or-later
21 * @author Addshore
22 */
23class RecalculateCognateNormalizedHashes extends Maintenance {
24
25    /**
26     * @var Database
27     */
28    private $dbr;
29
30    /**
31     * @var Database
32     */
33    private $dbw;
34
35    /**
36     * @var StringHasher
37     */
38    private $stringHasher;
39
40    /**
41     * @var StringNormalizer
42     */
43    private $stringNormalizer;
44
45    public function __construct() {
46        parent::__construct();
47
48        $this->addDescription( 'Recalculate the normalized Cognate hashes' );
49        $this->addOption( 'dry-run', 'Perform a dry run' );
50        $this->setBatchSize( 100 );
51        $this->requireExtension( 'Cognate' );
52    }
53
54    private function setupServices() {
55        $services = MediaWikiServices::getInstance();
56        $connectionProvider = $services->getConnectionProvider();
57        $this->dbr = $connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN );
58        $this->dbw = $connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN );
59        $this->stringHasher = new StringHasher();
60        $this->stringNormalizer = new StringNormalizer();
61    }
62
63    public function execute() {
64        $this->output( "Started processing...\n" );
65        if ( $this->hasOption( 'dry-run' ) ) {
66            $this->output( "In DRY RUN mode.\n" );
67        }
68        $this->setupServices();
69        $start = $this->getLowestRawKey();
70
71        if ( !$start ) {
72            $this->output( "Nothing to do.\n" );
73            return true;
74        }
75
76        $services = MediaWikiServices::getInstance();
77        $loadBalancerFactory = $services->getDBLoadBalancerFactory();
78        $totalUpdates = 0;
79        $batchStart = (int)$start;
80
81        while ( $batchStart ) {
82            $this->output( "Getting batch starting from $batchStart\n" );
83            $rows = $this->dbr->newSelectQueryBuilder()
84                ->select( [ 'cgti_raw', 'cgti_raw_key', 'cgti_normalized_key' ] )
85                ->from( CognateStore::TITLES_TABLE_NAME )
86                ->where( $this->dbr->expr( 'cgti_raw_key', '>', $batchStart ) )
87                ->orderBy( 'cgti_raw_key', SelectQueryBuilder::SORT_ASC )
88                ->limit( $this->mBatchSize )
89                ->caller( __METHOD__ )
90                ->fetchResultSet();
91
92            $this->output( "Calculating new hashes..\n" );
93            $batchStart = false;
94            $rowsToUpdate = [];
95            foreach ( $rows as $row ) {
96                $batchStart = $row->cgti_raw_key;
97
98                $newNormalizedHash = $this->normalizeAndHash( $row->cgti_raw );
99                if ( $newNormalizedHash != $row->cgti_normalized_key ) {
100                    $newRow = (array)$row;
101                    $newRow['cgti_normalized_key'] = $newNormalizedHash;
102                    $rowsToUpdate[] = $newRow;
103                }
104            }
105
106            $numberOfUpdates = count( $rowsToUpdate );
107            $totalUpdates += $numberOfUpdates;
108
109            if ( $numberOfUpdates > 0 && !$this->hasOption( 'dry-run' ) ) {
110                $this->output( "Performing $numberOfUpdates updates\n" );
111                // @phan-suppress-next-line SecurityCheck-SQLInjection
112                $this->dbw->newInsertQueryBuilder()
113                    ->insertInto( CognateStore::TITLES_TABLE_NAME )
114                    ->rows( $rowsToUpdate )
115                    ->onDuplicateKeyUpdate()
116                    ->uniqueIndexFields( 'cgti_raw_key' )
117                    ->set( [
118                        'cgti_normalized_key=' . $this->dbw->buildExcludedValue( 'cgti_normalized_key' ),
119                    ] )
120                    ->caller( __METHOD__ )
121                    ->execute();
122            }
123
124            $this->output(
125                $rows->numRows() . " rows processed, " .
126                $numberOfUpdates . " rows upserted\n"
127            );
128
129            $loadBalancerFactory->waitForReplication();
130        }
131
132        $this->output( "$totalUpdates hashes recalculated\n" );
133        $this->output( "Done!\n" );
134
135        return true;
136    }
137
138    /**
139     * Select 1 less than the minimum so that > can be used in selects in this script.
140     *
141     * @return int|false
142     * @throws DBUnexpectedError
143     */
144    private function getLowestRawKey() {
145        return $this->dbr->newSelectQueryBuilder()
146            ->select( 'MIN(cgti_raw_key)-1' )
147            ->from( CognateStore::TITLES_TABLE_NAME )
148            ->caller( __METHOD__ )
149            ->fetchField();
150    }
151
152    /**
153     * @param string $string
154     *
155     * @return int
156     */
157    private function normalizeAndHash( $string ) {
158        return $this->stringHasher->hash(
159            $this->stringNormalizer->normalize( $string )
160        );
161    }
162
163}
164
165$maintClass = RecalculateCognateNormalizedHashes::class;
166require_once RUN_MAINTENANCE_IF_MAIN;