MediaWiki master
populateChangeTagDef.php
Go to the documentation of this file.
1<?php
19require_once __DIR__ . '/Maintenance.php';
20
27 public function __construct() {
28 parent::__construct();
29 $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
30 $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
31 $this->setBatchSize( 1000 );
32 $this->addOption(
33 'sleep',
34 'Sleep time (in seconds) between every batch, defaults to zero',
35 false,
36 true
37 );
38 $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
39 $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
40 }
41
42 protected function doDBUpdates() {
43 $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
44
45 $dbw = $this->getDB( DB_PRIMARY );
46 if ( $dbw->fieldExists(
47 'change_tag',
48 'ct_tag',
49 __METHOD__
50 )
51 ) {
52 if ( $this->hasOption( 'set-user-tags-only' ) ) {
53 $this->setUserDefinedTags();
54 return true;
55 }
56 if ( !$this->hasOption( 'populate-only' ) ) {
57 $this->updateCountTag();
58 }
59 $this->backpopulateChangeTagId();
60 $this->setUserDefinedTags();
61 } else {
62 $this->updateCountTagId();
63 }
64
65 // TODO: Implement
66 // $this->cleanZeroCountRows();
67
68 return true;
69 }
70
71 private function setUserDefinedTags() {
72 $dbw = $this->getDB( DB_PRIMARY );
73
74 $userTags = null;
75 if ( $dbw->tableExists( 'valid_tag', __METHOD__ ) ) {
76 $userTags = $dbw->newSelectQueryBuilder()
77 ->select( 'vt_tag' )
78 ->from( 'valid_tag' )
79 ->caller( __METHOD__ )->fetchFieldValues();
80 }
81
82 if ( !$userTags ) {
83 $this->output( "No user defined tags to set, moving on...\n" );
84 return;
85 }
86
87 if ( $this->hasOption( 'dry-run' ) ) {
88 $this->output(
89 'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
90 );
91 return;
92 }
93
94 $dbw->newUpdateQueryBuilder()
95 ->update( 'change_tag_def' )
96 ->set( [ 'ctd_user_defined' => 1 ] )
97 ->where( [ 'ctd_name' => $userTags ] )
98 ->caller( __METHOD__ )
99 ->execute();
100 $this->waitForReplication();
101 $this->output( "Finished setting user defined tags in change_tag_def table\n" );
102 }
103
104 private function updateCountTagId() {
105 $dbr = $this->getReplicaDB();
106
107 // This query can be pretty expensive, don't run it on master
108 $res = $dbr->newSelectQueryBuilder()
109 ->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
110 ->from( 'change_tag' )
111 ->groupBy( 'ct_tag_id' )
112 ->caller( __METHOD__ )->fetchResultSet();
113
114 $dbw = $this->getPrimaryDB();
115
116 foreach ( $res as $row ) {
117 if ( !$row->ct_tag_id ) {
118 continue;
119 }
120
121 if ( $this->hasOption( 'dry-run' ) ) {
122 $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
123 continue;
124 }
125
126 $dbw->newUpdateQueryBuilder()
127 ->update( 'change_tag_def' )
128 ->set( [ 'ctd_count' => $row->hitcount ] )
129 ->where( [ 'ctd_id' => $row->ct_tag_id ] )
130 ->caller( __METHOD__ )
131 ->execute();
132 }
133 $this->waitForReplication();
134 }
135
136 private function updateCountTag() {
137 $dbr = $this->getReplicaDB();
138
139 // This query can be pretty expensive, don't run it on master
140 $res = $dbr->newSelectQueryBuilder()
141 ->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
142 ->from( 'change_tag' )
143 ->groupBy( 'ct_tag' )
144 ->caller( __METHOD__ )->fetchResultSet();
145
146 $dbw = $this->getPrimaryDB();
147
148 foreach ( $res as $row ) {
149 // Hygiene check
150 if ( !$row->ct_tag ) {
151 continue;
152 }
153
154 if ( $this->hasOption( 'dry-run' ) ) {
155 $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
156 continue;
157 }
158 $dbw->newInsertQueryBuilder()
159 ->insertInto( 'change_tag_def' )
160 ->row( [
161 'ctd_name' => $row->ct_tag,
162 'ctd_user_defined' => 0,
163 'ctd_count' => $row->hitcount
164 ] )
165 ->onDuplicateKeyUpdate()
166 ->uniqueIndexFields( [ 'ctd_name' ] )
167 ->set( [ 'ctd_count' => $row->hitcount ] )
168 ->caller( __METHOD__ )->execute();
169 }
170 $this->waitForReplication();
171 }
172
173 private function backpopulateChangeTagId() {
174 $dbr = $this->getReplicaDB();
175 $changeTagDefs = $dbr->newSelectQueryBuilder()
176 ->select( [ 'ctd_name', 'ctd_id' ] )
177 ->from( 'change_tag_def' )
178 ->orderBy( 'ctd_id' )
179 ->caller( __METHOD__ )->fetchResultSet();
180
181 foreach ( $changeTagDefs as $row ) {
182 $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
183 }
184 }
185
186 private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
187 $dbr = $this->getReplicaDB();
188 $dbw = $this->getPrimaryDB();
189 $sleep = (int)$this->getOption( 'sleep', 0 );
190 $lastId = 0;
191 $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
192 while ( true ) {
193 // Given that indexes might not be there, it's better to use replica
194 $ids = $dbr->newSelectQueryBuilder()
195 ->select( 'ct_id' )
196 ->from( 'change_tag' )
197 ->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ] )
198 ->orderBy( 'ct_id' )
199 ->limit( $this->getBatchSize() )
200 ->caller( __METHOD__ )->fetchFieldValues();
201
202 if ( !$ids ) {
203 break;
204 }
205 $lastId = end( $ids );
206
207 if ( $this->hasOption( 'dry-run' ) ) {
208 $this->output(
209 "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
210 );
211 continue;
212 } else {
213 $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
214 }
215
216 $dbw->newUpdateQueryBuilder()
217 ->update( 'change_tag' )
218 ->set( [ 'ct_tag_id' => $tagId ] )
219 ->where( [ 'ct_id' => $ids ] )
220 ->caller( __METHOD__ )
221 ->execute();
222
223 $this->waitForReplication();
224 if ( $sleep > 0 ) {
225 sleep( $sleep );
226 }
227 }
228
229 $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
230 }
231
232 protected function getUpdateKey() {
233 return __CLASS__;
234 }
235}
236
237$maintClass = PopulateChangeTagDef::class;
238require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
doDBUpdates()
Do the actual work.
const DB_PRIMARY
Definition defines.php:28