MediaWiki master
populateChangeTagDef.php
Go to the documentation of this file.
1<?php
7
8// @codeCoverageIgnoreStart
9require_once __DIR__ . '/Maintenance.php';
10// @codeCoverageIgnoreEnd
11
18 public function __construct() {
19 parent::__construct();
20 $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
21 $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
22 $this->setBatchSize( 1000 );
23 $this->addOption(
24 'sleep',
25 'Sleep time (in seconds) between every batch, defaults to zero',
26 false,
27 true
28 );
29 $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
30 $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
31 }
32
34 protected function doDBUpdates() {
35 $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
36
37 $dbw = $this->getDB( DB_PRIMARY );
38 if ( $dbw->fieldExists(
39 'change_tag',
40 'ct_tag',
41 __METHOD__
42 )
43 ) {
44 if ( $this->hasOption( 'set-user-tags-only' ) ) {
45 $this->setUserDefinedTags();
46 return true;
47 }
48 if ( !$this->hasOption( 'populate-only' ) ) {
49 $this->updateCountTag();
50 }
51 $this->backpopulateChangeTagId();
52 $this->setUserDefinedTags();
53 } else {
54 $this->updateCountTagId();
55 }
56
57 // TODO: Implement
58 // $this->cleanZeroCountRows();
59
60 return true;
61 }
62
63 private function setUserDefinedTags() {
64 $dbw = $this->getDB( DB_PRIMARY );
65
66 $userTags = null;
67 if ( $dbw->tableExists( 'valid_tag', __METHOD__ ) ) {
68 $userTags = $dbw->newSelectQueryBuilder()
69 ->select( 'vt_tag' )
70 ->from( 'valid_tag' )
71 ->caller( __METHOD__ )->fetchFieldValues();
72 }
73
74 if ( !$userTags ) {
75 $this->output( "No user defined tags to set, moving on...\n" );
76 return;
77 }
78
79 if ( $this->hasOption( 'dry-run' ) ) {
80 $this->output(
81 'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
82 );
83 return;
84 }
85
86 $dbw->newUpdateQueryBuilder()
87 ->update( 'change_tag_def' )
88 ->set( [ 'ctd_user_defined' => 1 ] )
89 ->where( [ 'ctd_name' => $userTags ] )
90 ->caller( __METHOD__ )
91 ->execute();
92 $this->waitForReplication();
93 $this->output( "Finished setting user defined tags in change_tag_def table\n" );
94 }
95
96 private function updateCountTagId() {
97 $dbr = $this->getReplicaDB();
98
99 // This query can be pretty expensive, don't run it on master
100 $res = $dbr->newSelectQueryBuilder()
101 ->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
102 ->from( 'change_tag' )
103 ->groupBy( 'ct_tag_id' )
104 ->caller( __METHOD__ )->fetchResultSet();
105
106 $dbw = $this->getPrimaryDB();
107
108 foreach ( $res as $row ) {
109 if ( !$row->ct_tag_id ) {
110 continue;
111 }
112
113 if ( $this->hasOption( 'dry-run' ) ) {
114 $this->output( 'This row will be updated: id ' . $row->ct_tag_id . ', ' . $row->hitcount . " hits\n" );
115 continue;
116 }
117
118 $dbw->newUpdateQueryBuilder()
119 ->update( 'change_tag_def' )
120 ->set( [ 'ctd_count' => $row->hitcount ] )
121 ->where( [ 'ctd_id' => $row->ct_tag_id ] )
122 ->caller( __METHOD__ )
123 ->execute();
124 }
125 $this->waitForReplication();
126 }
127
128 private function updateCountTag() {
129 $dbr = $this->getReplicaDB();
130
131 // This query can be pretty expensive, don't run it on master
132 $res = $dbr->newSelectQueryBuilder()
133 ->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
134 ->from( 'change_tag' )
135 ->groupBy( 'ct_tag' )
136 ->caller( __METHOD__ )->fetchResultSet();
137
138 $dbw = $this->getPrimaryDB();
139
140 foreach ( $res as $row ) {
141 // Hygiene check
142 if ( !$row->ct_tag ) {
143 continue;
144 }
145
146 if ( $this->hasOption( 'dry-run' ) ) {
147 $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
148 continue;
149 }
150 $dbw->newInsertQueryBuilder()
151 ->insertInto( 'change_tag_def' )
152 ->row( [
153 'ctd_name' => $row->ct_tag,
154 'ctd_user_defined' => 0,
155 'ctd_count' => $row->hitcount
156 ] )
157 ->onDuplicateKeyUpdate()
158 ->uniqueIndexFields( [ 'ctd_name' ] )
159 ->set( [ 'ctd_count' => $row->hitcount ] )
160 ->caller( __METHOD__ )->execute();
161 }
162 $this->waitForReplication();
163 }
164
165 private function backpopulateChangeTagId() {
166 $dbr = $this->getReplicaDB();
167 $changeTagDefs = $dbr->newSelectQueryBuilder()
168 ->select( [ 'ctd_name', 'ctd_id' ] )
169 ->from( 'change_tag_def' )
170 ->orderBy( 'ctd_id' )
171 ->caller( __METHOD__ )->fetchResultSet();
172
173 foreach ( $changeTagDefs as $row ) {
174 $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
175 }
176 }
177
178 private function backpopulateChangeTagPerTag( string $tagName, int $tagId ) {
179 $dbr = $this->getReplicaDB();
180 $dbw = $this->getPrimaryDB();
181 $sleep = (int)$this->getOption( 'sleep', 0 );
182 $lastId = 0;
183 $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
184 while ( true ) {
185 // Given that indexes might not be there, it's better to use replica
186 $ids = $dbr->newSelectQueryBuilder()
187 ->select( 'ct_id' )
188 ->from( 'change_tag' )
189 ->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, $dbr->expr( 'ct_id', '>', $lastId ) ] )
190 ->orderBy( 'ct_id' )
191 ->limit( $this->getBatchSize() )
192 ->caller( __METHOD__ )->fetchFieldValues();
193
194 if ( !$ids ) {
195 break;
196 }
197 $lastId = end( $ids );
198
199 if ( $this->hasOption( 'dry-run' ) ) {
200 $this->output(
201 "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
202 );
203 continue;
204 } else {
205 $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
206 }
207
208 $dbw->newUpdateQueryBuilder()
209 ->update( 'change_tag' )
210 ->set( [ 'ct_tag_id' => $tagId ] )
211 ->where( [ 'ct_id' => $ids ] )
212 ->caller( __METHOD__ )
213 ->execute();
214
215 $this->waitForReplication();
216 if ( $sleep > 0 ) {
217 sleep( $sleep );
218 }
219 }
220
221 $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
222 }
223
225 protected function getUpdateKey() {
226 return __CLASS__;
227 }
228}
229
230// @codeCoverageIgnoreStart
231$maintClass = PopulateChangeTagDef::class;
232require_once RUN_MAINTENANCE_IF_MAIN;
233// @codeCoverageIgnoreEnd
const DB_PRIMARY
Definition defines.php:28
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getReplicaDB(string|false $virtualDomain=false)
getPrimaryDB(string|false $virtualDomain=false)
addDescription( $text)
Set the description text.
Populate and improve accuracy of change_tag_def statistics.
getUpdateKey()
Get the update key name to go in the update log table.string
__construct()
Default constructor.
doDBUpdates()
Do the actual work.All child classes will need to implement this. Return true to log the update as do...