MediaWiki  master
populateChangeTagDef.php
Go to the documentation of this file.
1 <?php
19 require_once __DIR__ . '/Maintenance.php';
20 
28  protected $lbFactory;
29 
30  public function __construct() {
31  parent::__construct();
32  $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
33  $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
34  $this->setBatchSize( 1000 );
35  $this->addOption(
36  'sleep',
37  'Sleep time (in seconds) between every batch, defaults to zero',
38  false,
39  true
40  );
41  $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
42  $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
43  }
44 
45  protected function doDBUpdates() {
46  $this->lbFactory = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
47  $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
48 
49  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
50  if ( $dbr->fieldExists(
51  'change_tag',
52  'ct_tag',
53  __METHOD__
54  )
55  ) {
56  if ( $this->hasOption( 'set-user-tags-only' ) ) {
57  $this->setUserDefinedTags();
58  return true;
59  }
60  if ( !$this->hasOption( 'populate-only' ) ) {
61  $this->updateCountTag();
62  }
63  $this->backpopulateChangeTagId();
64  $this->setUserDefinedTags();
65  } else {
66  $this->updateCountTagId();
67  }
68 
69  // TODO: Implement
70  // $this->cleanZeroCountRows();
71 
72  return true;
73  }
74 
75  private function setUserDefinedTags() {
76  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
77 
78  $userTags = null;
79  if ( $dbr->tableExists( 'valid_tag', __METHOD__ ) ) {
80  $userTags = $dbr->selectFieldValues(
81  'valid_tag',
82  'vt_tag',
83  [],
84  __METHOD__
85  );
86  }
87 
88  if ( empty( $userTags ) ) {
89  $this->output( "No user defined tags to set, moving on...\n" );
90  return;
91  }
92 
93  if ( $this->hasOption( 'dry-run' ) ) {
94  $this->output(
95  'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
96  );
97  return;
98  }
99 
100  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
101 
102  $dbw->update(
103  'change_tag_def',
104  [ 'ctd_user_defined' => 1 ],
105  [ 'ctd_name' => $userTags ],
106  __METHOD__
107  );
108  $this->lbFactory->waitForReplication();
109  $this->output( "Finished setting user defined tags in change_tag_def table\n" );
110  }
111 
112  private function updateCountTagId() {
113  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
114 
115  // This query can be pretty expensive, don't run it on master
116  $res = $dbr->select(
117  'change_tag',
118  [ 'ct_tag_id', 'hitcount' => 'count(*)' ],
119  [],
120  __METHOD__,
121  [ 'GROUP BY' => 'ct_tag_id' ]
122  );
123 
124  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
125 
126  foreach ( $res as $row ) {
127  if ( !$row->ct_tag_id ) {
128  continue;
129  }
130 
131  if ( $this->hasOption( 'dry-run' ) ) {
132  $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
133  continue;
134  }
135 
136  $dbw->update(
137  'change_tag_def',
138  [ 'ctd_count' => $row->hitcount ],
139  [ 'ctd_id' => $row->ct_tag_id ],
140  __METHOD__
141  );
142  }
143  $this->lbFactory->waitForReplication();
144  }
145 
146  private function updateCountTag() {
147  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
148 
149  // This query can be pretty expensive, don't run it on master
150  $res = $dbr->select(
151  'change_tag',
152  [ 'ct_tag', 'hitcount' => 'count(*)' ],
153  [],
154  __METHOD__,
155  [ 'GROUP BY' => 'ct_tag' ]
156  );
157 
158  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
159 
160  foreach ( $res as $row ) {
161  // Hygiene check
162  if ( !$row->ct_tag ) {
163  continue;
164  }
165 
166  if ( $this->hasOption( 'dry-run' ) ) {
167  $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
168  continue;
169  }
170 
171  $dbw->upsert(
172  'change_tag_def',
173  [
174  'ctd_name' => $row->ct_tag,
175  'ctd_user_defined' => 0,
176  'ctd_count' => $row->hitcount
177  ],
178  'ctd_name',
179  [ 'ctd_count' => $row->hitcount ],
180  __METHOD__
181  );
182  }
183  $this->lbFactory->waitForReplication();
184  }
185 
186  private function backpopulateChangeTagId() {
187  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
188  $changeTagDefs = $dbr->select(
189  'change_tag_def',
190  [ 'ctd_name', 'ctd_id' ],
191  [],
192  __METHOD__,
193  [ 'ORDER BY' => 'ctd_id' ]
194  );
195 
196  foreach ( $changeTagDefs as $row ) {
197  $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
198  }
199  }
200 
201  private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
202  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
203  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
204  $sleep = (int)$this->getOption( 'sleep', 0 );
205  $lastId = 0;
206  $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
207  while ( true ) {
208  // Given that indexes might not be there, it's better to use replica
209  $ids = $dbr->selectFieldValues(
210  'change_tag',
211  'ct_id',
212  [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ],
213  __METHOD__,
214  [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => 'ct_id' ]
215  );
216 
217  if ( !$ids ) {
218  break;
219  }
220  $lastId = end( $ids );
221 
222  if ( $this->hasOption( 'dry-run' ) ) {
223  $this->output(
224  "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
225  );
226  continue;
227  } else {
228  $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
229  }
230 
231  $dbw->update(
232  'change_tag',
233  [ 'ct_tag_id' => $tagId ],
234  [ 'ct_id' => $ids ],
235  __METHOD__
236  );
237 
238  $this->lbFactory->waitForReplication();
239  if ( $sleep > 0 ) {
240  sleep( $sleep );
241  }
242  }
243 
244  $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
245  }
246 
247  protected function getUpdateKey() {
248  return __CLASS__;
249  }
250 }
251 
252 $maintClass = PopulateChangeTagDef::class;
253 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
static getInstance()
Returns the global default instance of the top level service locator.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Wikimedia Rdbms ILBFactory $lbFactory
doDBUpdates()
Do the actual work.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28