MediaWiki REL1_39
populateChangeTagDef.php
Go to the documentation of this file.
1<?php
19require_once __DIR__ . '/Maintenance.php';
20
28 protected $lbFactory;
29
30 public function __construct() {
31 parent::__construct();
32 $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
33 $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
34 $this->setBatchSize( 1000 );
35 $this->addOption(
36 'sleep',
37 'Sleep time (in seconds) between every batch, defaults to zero',
38 false,
39 true
40 );
41 $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
42 $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
43 }
44
45 protected function doDBUpdates() {
46 $this->lbFactory = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
47 $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
48
49 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
50 if ( $dbr->fieldExists(
51 'change_tag',
52 'ct_tag',
53 __METHOD__
54 )
55 ) {
56 if ( $this->hasOption( 'set-user-tags-only' ) ) {
57 $this->setUserDefinedTags();
58 return true;
59 }
60 if ( !$this->hasOption( 'populate-only' ) ) {
61 $this->updateCountTag();
62 }
63 $this->backpopulateChangeTagId();
64 $this->setUserDefinedTags();
65 } else {
66 $this->updateCountTagId();
67 }
68
69 // TODO: Implement
70 // $this->cleanZeroCountRows();
71
72 return true;
73 }
74
75 private function setUserDefinedTags() {
76 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
77
78 $userTags = null;
79 if ( $dbr->tableExists( 'valid_tag', __METHOD__ ) ) {
80 $userTags = $dbr->selectFieldValues(
81 'valid_tag',
82 'vt_tag',
83 [],
84 __METHOD__
85 );
86 }
87
88 if ( empty( $userTags ) ) {
89 $this->output( "No user defined tags to set, moving on...\n" );
90 return;
91 }
92
93 if ( $this->hasOption( 'dry-run' ) ) {
94 $this->output(
95 'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
96 );
97 return;
98 }
99
100 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
101
102 $dbw->update(
103 'change_tag_def',
104 [ 'ctd_user_defined' => 1 ],
105 [ 'ctd_name' => $userTags ],
106 __METHOD__
107 );
108 $this->lbFactory->waitForReplication();
109 $this->output( "Finished setting user defined tags in change_tag_def table\n" );
110 }
111
112 private function updateCountTagId() {
113 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
114
115 // This query can be pretty expensive, don't run it on master
116 $res = $dbr->select(
117 'change_tag',
118 [ 'ct_tag_id', 'hitcount' => 'count(*)' ],
119 [],
120 __METHOD__,
121 [ 'GROUP BY' => 'ct_tag_id' ]
122 );
123
124 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
125
126 foreach ( $res as $row ) {
127 if ( !$row->ct_tag_id ) {
128 continue;
129 }
130
131 if ( $this->hasOption( 'dry-run' ) ) {
132 $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
133 continue;
134 }
135
136 $dbw->update(
137 'change_tag_def',
138 [ 'ctd_count' => $row->hitcount ],
139 [ 'ctd_id' => $row->ct_tag_id ],
140 __METHOD__
141 );
142 }
143 $this->lbFactory->waitForReplication();
144 }
145
146 private function updateCountTag() {
147 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
148
149 // This query can be pretty expensive, don't run it on master
150 $res = $dbr->select(
151 'change_tag',
152 [ 'ct_tag', 'hitcount' => 'count(*)' ],
153 [],
154 __METHOD__,
155 [ 'GROUP BY' => 'ct_tag' ]
156 );
157
158 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
159
160 foreach ( $res as $row ) {
161 // Hygiene check
162 if ( !$row->ct_tag ) {
163 continue;
164 }
165
166 if ( $this->hasOption( 'dry-run' ) ) {
167 $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
168 continue;
169 }
170
171 $dbw->upsert(
172 'change_tag_def',
173 [
174 'ctd_name' => $row->ct_tag,
175 'ctd_user_defined' => 0,
176 'ctd_count' => $row->hitcount
177 ],
178 'ctd_name',
179 [ 'ctd_count' => $row->hitcount ],
180 __METHOD__
181 );
182 }
183 $this->lbFactory->waitForReplication();
184 }
185
186 private function backpopulateChangeTagId() {
187 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
188 $changeTagDefs = $dbr->select(
189 'change_tag_def',
190 [ 'ctd_name', 'ctd_id' ],
191 [],
192 __METHOD__,
193 [ 'ORDER BY' => 'ctd_id' ]
194 );
195
196 foreach ( $changeTagDefs as $row ) {
197 $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
198 }
199 }
200
201 private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
202 $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
203 $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
204 $sleep = (int)$this->getOption( 'sleep', 0 );
205 $lastId = 0;
206 $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
207 while ( true ) {
208 // Given that indexes might not be there, it's better to use replica
209 $ids = $dbr->selectFieldValues(
210 'change_tag',
211 'ct_id',
212 [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ],
213 __METHOD__,
214 [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => 'ct_id' ]
215 );
216
217 if ( !$ids ) {
218 break;
219 }
220 $lastId = end( $ids );
221
222 if ( $this->hasOption( 'dry-run' ) ) {
223 $this->output(
224 "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
225 );
226 continue;
227 } else {
228 $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
229 }
230
231 $dbw->update(
232 'change_tag',
233 [ 'ct_tag_id' => $tagId ],
234 [ 'ct_id' => $ids ],
235 __METHOD__
236 );
237
238 $this->lbFactory->waitForReplication();
239 if ( $sleep > 0 ) {
240 sleep( $sleep );
241 }
242 }
243
244 $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
245 }
246
247 protected function getUpdateKey() {
248 return __CLASS__;
249 }
250}
251
252$maintClass = PopulateChangeTagDef::class;
253require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Wikimedia Rdbms ILBFactory $lbFactory
doDBUpdates()
Do the actual work.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28