MediaWiki master
populateChangeTagDef.php
Go to the documentation of this file.
1<?php
20
21// @codeCoverageIgnoreStart
22require_once __DIR__ . '/Maintenance.php';
23// @codeCoverageIgnoreEnd
24
31 public function __construct() {
32 parent::__construct();
33 $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
34 $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
35 $this->setBatchSize( 1000 );
36 $this->addOption(
37 'sleep',
38 'Sleep time (in seconds) between every batch, defaults to zero',
39 false,
40 true
41 );
42 $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
43 $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
44 }
45
47 protected function doDBUpdates() {
48 $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
49
50 $dbw = $this->getDB( DB_PRIMARY );
51 if ( $dbw->fieldExists(
52 'change_tag',
53 'ct_tag',
54 __METHOD__
55 )
56 ) {
57 if ( $this->hasOption( 'set-user-tags-only' ) ) {
58 $this->setUserDefinedTags();
59 return true;
60 }
61 if ( !$this->hasOption( 'populate-only' ) ) {
62 $this->updateCountTag();
63 }
64 $this->backpopulateChangeTagId();
65 $this->setUserDefinedTags();
66 } else {
67 $this->updateCountTagId();
68 }
69
70 // TODO: Implement
71 // $this->cleanZeroCountRows();
72
73 return true;
74 }
75
76 private function setUserDefinedTags() {
77 $dbw = $this->getDB( DB_PRIMARY );
78
79 $userTags = null;
80 if ( $dbw->tableExists( 'valid_tag', __METHOD__ ) ) {
81 $userTags = $dbw->newSelectQueryBuilder()
82 ->select( 'vt_tag' )
83 ->from( 'valid_tag' )
84 ->caller( __METHOD__ )->fetchFieldValues();
85 }
86
87 if ( !$userTags ) {
88 $this->output( "No user defined tags to set, moving on...\n" );
89 return;
90 }
91
92 if ( $this->hasOption( 'dry-run' ) ) {
93 $this->output(
94 'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
95 );
96 return;
97 }
98
99 $dbw->newUpdateQueryBuilder()
100 ->update( 'change_tag_def' )
101 ->set( [ 'ctd_user_defined' => 1 ] )
102 ->where( [ 'ctd_name' => $userTags ] )
103 ->caller( __METHOD__ )
104 ->execute();
105 $this->waitForReplication();
106 $this->output( "Finished setting user defined tags in change_tag_def table\n" );
107 }
108
109 private function updateCountTagId() {
110 $dbr = $this->getReplicaDB();
111
112 // This query can be pretty expensive, don't run it on master
113 $res = $dbr->newSelectQueryBuilder()
114 ->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
115 ->from( 'change_tag' )
116 ->groupBy( 'ct_tag_id' )
117 ->caller( __METHOD__ )->fetchResultSet();
118
119 $dbw = $this->getPrimaryDB();
120
121 foreach ( $res as $row ) {
122 if ( !$row->ct_tag_id ) {
123 continue;
124 }
125
126 if ( $this->hasOption( 'dry-run' ) ) {
127 $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
128 continue;
129 }
130
131 $dbw->newUpdateQueryBuilder()
132 ->update( 'change_tag_def' )
133 ->set( [ 'ctd_count' => $row->hitcount ] )
134 ->where( [ 'ctd_id' => $row->ct_tag_id ] )
135 ->caller( __METHOD__ )
136 ->execute();
137 }
138 $this->waitForReplication();
139 }
140
141 private function updateCountTag() {
142 $dbr = $this->getReplicaDB();
143
144 // This query can be pretty expensive, don't run it on master
145 $res = $dbr->newSelectQueryBuilder()
146 ->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
147 ->from( 'change_tag' )
148 ->groupBy( 'ct_tag' )
149 ->caller( __METHOD__ )->fetchResultSet();
150
151 $dbw = $this->getPrimaryDB();
152
153 foreach ( $res as $row ) {
154 // Hygiene check
155 if ( !$row->ct_tag ) {
156 continue;
157 }
158
159 if ( $this->hasOption( 'dry-run' ) ) {
160 $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
161 continue;
162 }
163 $dbw->newInsertQueryBuilder()
164 ->insertInto( 'change_tag_def' )
165 ->row( [
166 'ctd_name' => $row->ct_tag,
167 'ctd_user_defined' => 0,
168 'ctd_count' => $row->hitcount
169 ] )
170 ->onDuplicateKeyUpdate()
171 ->uniqueIndexFields( [ 'ctd_name' ] )
172 ->set( [ 'ctd_count' => $row->hitcount ] )
173 ->caller( __METHOD__ )->execute();
174 }
175 $this->waitForReplication();
176 }
177
178 private function backpopulateChangeTagId() {
179 $dbr = $this->getReplicaDB();
180 $changeTagDefs = $dbr->newSelectQueryBuilder()
181 ->select( [ 'ctd_name', 'ctd_id' ] )
182 ->from( 'change_tag_def' )
183 ->orderBy( 'ctd_id' )
184 ->caller( __METHOD__ )->fetchResultSet();
185
186 foreach ( $changeTagDefs as $row ) {
187 $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
188 }
189 }
190
191 private function backpopulateChangeTagPerTag( string $tagName, int $tagId ) {
192 $dbr = $this->getReplicaDB();
193 $dbw = $this->getPrimaryDB();
194 $sleep = (int)$this->getOption( 'sleep', 0 );
195 $lastId = 0;
196 $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
197 while ( true ) {
198 // Given that indexes might not be there, it's better to use replica
199 $ids = $dbr->newSelectQueryBuilder()
200 ->select( 'ct_id' )
201 ->from( 'change_tag' )
202 ->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, $dbr->expr( 'ct_id', '>', $lastId ) ] )
203 ->orderBy( 'ct_id' )
204 ->limit( $this->getBatchSize() )
205 ->caller( __METHOD__ )->fetchFieldValues();
206
207 if ( !$ids ) {
208 break;
209 }
210 $lastId = end( $ids );
211
212 if ( $this->hasOption( 'dry-run' ) ) {
213 $this->output(
214 "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
215 );
216 continue;
217 } else {
218 $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
219 }
220
221 $dbw->newUpdateQueryBuilder()
222 ->update( 'change_tag' )
223 ->set( [ 'ct_tag_id' => $tagId ] )
224 ->where( [ 'ct_id' => $ids ] )
225 ->caller( __METHOD__ )
226 ->execute();
227
228 $this->waitForReplication();
229 if ( $sleep > 0 ) {
230 sleep( $sleep );
231 }
232 }
233
234 $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
235 }
236
238 protected function getUpdateKey() {
239 return __CLASS__;
240 }
241}
242
243// @codeCoverageIgnoreStart
244$maintClass = PopulateChangeTagDef::class;
245require_once RUN_MAINTENANCE_IF_MAIN;
246// @codeCoverageIgnoreEnd
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
addDescription( $text)
Set the description text.
Populate and improve accuracy of change_tag_def statistics.
getUpdateKey()
Get the update key name to go in the update log table.string
__construct()
Default constructor.
doDBUpdates()
Do the actual work.All child classes will need to implement this. Return true to log the update as do...
const DB_PRIMARY
Definition defines.php:28