MediaWiki 1.41.2
populateChangeTagDef.php
Go to the documentation of this file.
1<?php
19require_once __DIR__ . '/Maintenance.php';
20
28 protected $lbFactory;
29
30 public function __construct() {
31 parent::__construct();
32 $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
33 $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
34 $this->setBatchSize( 1000 );
35 $this->addOption(
36 'sleep',
37 'Sleep time (in seconds) between every batch, defaults to zero',
38 false,
39 true
40 );
41 $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
42 $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
43 }
44
45 protected function doDBUpdates() {
46 $this->lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
47 $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
48
49 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
50 if ( $dbr->fieldExists(
51 'change_tag',
52 'ct_tag',
53 __METHOD__
54 )
55 ) {
56 if ( $this->hasOption( 'set-user-tags-only' ) ) {
57 $this->setUserDefinedTags();
58 return true;
59 }
60 if ( !$this->hasOption( 'populate-only' ) ) {
61 $this->updateCountTag();
62 }
63 $this->backpopulateChangeTagId();
64 $this->setUserDefinedTags();
65 } else {
66 $this->updateCountTagId();
67 }
68
69 // TODO: Implement
70 // $this->cleanZeroCountRows();
71
72 return true;
73 }
74
75 private function setUserDefinedTags() {
76 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
77
78 $userTags = null;
79 if ( $dbr->tableExists( 'valid_tag', __METHOD__ ) ) {
80 $userTags = $dbr->newSelectQueryBuilder()
81 ->select( 'vt_tag' )
82 ->from( 'valid_tag' )
83 ->caller( __METHOD__ )->fetchFieldValues();
84 }
85
86 if ( !$userTags ) {
87 $this->output( "No user defined tags to set, moving on...\n" );
88 return;
89 }
90
91 if ( $this->hasOption( 'dry-run' ) ) {
92 $this->output(
93 'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
94 );
95 return;
96 }
97
98 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
99
100 $dbw->update(
101 'change_tag_def',
102 [ 'ctd_user_defined' => 1 ],
103 [ 'ctd_name' => $userTags ],
104 __METHOD__
105 );
106 $this->lbFactory->waitForReplication();
107 $this->output( "Finished setting user defined tags in change_tag_def table\n" );
108 }
109
110 private function updateCountTagId() {
111 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
112
113 // This query can be pretty expensive, don't run it on master
114 $res = $dbr->newSelectQueryBuilder()
115 ->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
116 ->from( 'change_tag' )
117 ->groupBy( 'ct_tag_id' )
118 ->caller( __METHOD__ )->fetchResultSet();
119
120 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
121
122 foreach ( $res as $row ) {
123 if ( !$row->ct_tag_id ) {
124 continue;
125 }
126
127 if ( $this->hasOption( 'dry-run' ) ) {
128 $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
129 continue;
130 }
131
132 $dbw->update(
133 'change_tag_def',
134 [ 'ctd_count' => $row->hitcount ],
135 [ 'ctd_id' => $row->ct_tag_id ],
136 __METHOD__
137 );
138 }
139 $this->lbFactory->waitForReplication();
140 }
141
142 private function updateCountTag() {
143 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
144
145 // This query can be pretty expensive, don't run it on master
146 $res = $dbr->newSelectQueryBuilder()
147 ->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
148 ->from( 'change_tag' )
149 ->groupBy( 'ct_tag' )
150 ->caller( __METHOD__ )->fetchResultSet();
151
152 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
153
154 foreach ( $res as $row ) {
155 // Hygiene check
156 if ( !$row->ct_tag ) {
157 continue;
158 }
159
160 if ( $this->hasOption( 'dry-run' ) ) {
161 $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
162 continue;
163 }
164 $dbw->newInsertQueryBuilder()
165 ->insertInto( 'change_tag_def' )
166 ->row( [
167 'ctd_name' => $row->ct_tag,
168 'ctd_user_defined' => 0,
169 'ctd_count' => $row->hitcount
170 ] )
171 ->onDuplicateKeyUpdate()
172 ->uniqueIndexFields( [ 'ctd_name' ] )
173 ->set( [ 'ctd_count' => $row->hitcount ] )
174 ->caller( __METHOD__ )->execute();
175 }
176 $this->lbFactory->waitForReplication();
177 }
178
179 private function backpopulateChangeTagId() {
180 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
181 $changeTagDefs = $dbr->newSelectQueryBuilder()
182 ->select( [ 'ctd_name', 'ctd_id' ] )
183 ->from( 'change_tag_def' )
184 ->orderBy( 'ctd_id' )
185 ->caller( __METHOD__ )->fetchResultSet();
186
187 foreach ( $changeTagDefs as $row ) {
188 $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
189 }
190 }
191
192 private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
193 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
194 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
195 $sleep = (int)$this->getOption( 'sleep', 0 );
196 $lastId = 0;
197 $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
198 while ( true ) {
199 // Given that indexes might not be there, it's better to use replica
200 $ids = $dbr->newSelectQueryBuilder()
201 ->select( 'ct_id' )
202 ->from( 'change_tag' )
203 ->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ] )
204 ->orderBy( 'ct_id' )
205 ->limit( $this->getBatchSize() )
206 ->caller( __METHOD__ )->fetchFieldValues();
207
208 if ( !$ids ) {
209 break;
210 }
211 $lastId = end( $ids );
212
213 if ( $this->hasOption( 'dry-run' ) ) {
214 $this->output(
215 "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
216 );
217 continue;
218 } else {
219 $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
220 }
221
222 $dbw->update(
223 'change_tag',
224 [ 'ct_tag_id' => $tagId ],
225 [ 'ct_id' => $ids ],
226 __METHOD__
227 );
228
229 $this->lbFactory->waitForReplication();
230 if ( $sleep > 0 ) {
231 sleep( $sleep );
232 }
233 }
234
235 $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
236 }
237
238 protected function getUpdateKey() {
239 return __CLASS__;
240 }
241}
242
243$maintClass = PopulateChangeTagDef::class;
244require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Wikimedia Rdbms ILBFactory $lbFactory
doDBUpdates()
Do the actual work.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28