Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
89.74% |
70 / 78 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
RecalculateCognateNormalizedHashes | |
95.89% |
70 / 73 |
|
80.00% |
4 / 5 |
12 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setupServices | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
94.44% |
51 / 54 |
|
0.00% |
0 / 1 |
8.01 | |||
getLowestRawKey | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
normalizeAndHash | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Cognate; |
4 | |
5 | use Maintenance; |
6 | use MediaWiki\MediaWikiServices; |
7 | use Wikimedia\Rdbms\Database; |
8 | use Wikimedia\Rdbms\DBUnexpectedError; |
9 | use Wikimedia\Rdbms\SelectQueryBuilder; |
10 | |
11 | if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { |
12 | require_once getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'; |
13 | } else { |
14 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
15 | } |
16 | |
17 | /** |
18 | * Maintenance script for recalculating the normalized Cognate hashes |
19 | * |
20 | * @license GPL-2.0-or-later |
21 | * @author Addshore |
22 | */ |
23 | class RecalculateCognateNormalizedHashes extends Maintenance { |
24 | |
25 | /** |
26 | * @var Database |
27 | */ |
28 | private $dbr; |
29 | |
30 | /** |
31 | * @var Database |
32 | */ |
33 | private $dbw; |
34 | |
35 | /** |
36 | * @var StringHasher |
37 | */ |
38 | private $stringHasher; |
39 | |
40 | /** |
41 | * @var StringNormalizer |
42 | */ |
43 | private $stringNormalizer; |
44 | |
45 | public function __construct() { |
46 | parent::__construct(); |
47 | |
48 | $this->addDescription( 'Recalculate the normalized Cognate hashes' ); |
49 | $this->addOption( 'dry-run', 'Perform a dry run' ); |
50 | $this->setBatchSize( 100 ); |
51 | $this->requireExtension( 'Cognate' ); |
52 | } |
53 | |
54 | private function setupServices() { |
55 | $services = MediaWikiServices::getInstance(); |
56 | $connectionProvider = $services->getConnectionProvider(); |
57 | $this->dbr = $connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN ); |
58 | $this->dbw = $connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
59 | $this->stringHasher = new StringHasher(); |
60 | $this->stringNormalizer = new StringNormalizer(); |
61 | } |
62 | |
63 | public function execute() { |
64 | $this->output( "Started processing...\n" ); |
65 | if ( $this->hasOption( 'dry-run' ) ) { |
66 | $this->output( "In DRY RUN mode.\n" ); |
67 | } |
68 | $this->setupServices(); |
69 | $start = $this->getLowestRawKey(); |
70 | |
71 | if ( !$start ) { |
72 | $this->output( "Nothing to do.\n" ); |
73 | return true; |
74 | } |
75 | |
76 | $services = MediaWikiServices::getInstance(); |
77 | $loadBalancerFactory = $services->getDBLoadBalancerFactory(); |
78 | $totalUpdates = 0; |
79 | $batchStart = (int)$start; |
80 | |
81 | while ( $batchStart ) { |
82 | $this->output( "Getting batch starting from $batchStart\n" ); |
83 | $rows = $this->dbr->newSelectQueryBuilder() |
84 | ->select( [ 'cgti_raw', 'cgti_raw_key', 'cgti_normalized_key' ] ) |
85 | ->from( CognateStore::TITLES_TABLE_NAME ) |
86 | ->where( $this->dbr->expr( 'cgti_raw_key', '>', $batchStart ) ) |
87 | ->orderBy( 'cgti_raw_key', SelectQueryBuilder::SORT_ASC ) |
88 | ->limit( $this->mBatchSize ) |
89 | ->caller( __METHOD__ ) |
90 | ->fetchResultSet(); |
91 | |
92 | $this->output( "Calculating new hashes..\n" ); |
93 | $batchStart = false; |
94 | $rowsToUpdate = []; |
95 | foreach ( $rows as $row ) { |
96 | $batchStart = $row->cgti_raw_key; |
97 | |
98 | $newNormalizedHash = $this->normalizeAndHash( $row->cgti_raw ); |
99 | if ( $newNormalizedHash != $row->cgti_normalized_key ) { |
100 | $newRow = (array)$row; |
101 | $newRow['cgti_normalized_key'] = $newNormalizedHash; |
102 | $rowsToUpdate[] = $newRow; |
103 | } |
104 | } |
105 | |
106 | $numberOfUpdates = count( $rowsToUpdate ); |
107 | $totalUpdates += $numberOfUpdates; |
108 | |
109 | if ( $numberOfUpdates > 0 && !$this->hasOption( 'dry-run' ) ) { |
110 | $this->output( "Performing $numberOfUpdates updates\n" ); |
111 | // @phan-suppress-next-line SecurityCheck-SQLInjection |
112 | $this->dbw->newInsertQueryBuilder() |
113 | ->insertInto( CognateStore::TITLES_TABLE_NAME ) |
114 | ->rows( $rowsToUpdate ) |
115 | ->onDuplicateKeyUpdate() |
116 | ->uniqueIndexFields( 'cgti_raw_key' ) |
117 | ->set( [ |
118 | 'cgti_normalized_key=' . $this->dbw->buildExcludedValue( 'cgti_normalized_key' ), |
119 | ] ) |
120 | ->caller( __METHOD__ ) |
121 | ->execute(); |
122 | } |
123 | |
124 | $this->output( |
125 | $rows->numRows() . " rows processed, " . |
126 | $numberOfUpdates . " rows upserted\n" |
127 | ); |
128 | |
129 | $loadBalancerFactory->waitForReplication(); |
130 | } |
131 | |
132 | $this->output( "$totalUpdates hashes recalculated\n" ); |
133 | $this->output( "Done!\n" ); |
134 | |
135 | return true; |
136 | } |
137 | |
138 | /** |
139 | * Select 1 less than the minimum so that > can be used in selects in this script. |
140 | * |
141 | * @return int|false |
142 | * @throws DBUnexpectedError |
143 | */ |
144 | private function getLowestRawKey() { |
145 | return $this->dbr->newSelectQueryBuilder() |
146 | ->select( 'MIN(cgti_raw_key)-1' ) |
147 | ->from( CognateStore::TITLES_TABLE_NAME ) |
148 | ->caller( __METHOD__ ) |
149 | ->fetchField(); |
150 | } |
151 | |
152 | /** |
153 | * @param string $string |
154 | * |
155 | * @return int |
156 | */ |
157 | private function normalizeAndHash( $string ) { |
158 | return $this->stringHasher->hash( |
159 | $this->stringNormalizer->normalize( $string ) |
160 | ); |
161 | } |
162 | |
163 | } |
164 | |
165 | $maintClass = RecalculateCognateNormalizedHashes::class; |
166 | require_once RUN_MAINTENANCE_IF_MAIN; |