MediaWiki  master
populateChangeTagDef.php
Go to the documentation of this file.
1 <?php
19 require_once __DIR__ . '/Maintenance.php';
20 
28  protected $lbFactory;
29 
30  public function __construct() {
31  parent::__construct();
32  $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
33  $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
34  $this->setBatchSize( 1000 );
35  $this->addOption(
36  'sleep',
37  'Sleep time (in seconds) between every batch, defaults to zero',
38  false,
39  true
40  );
41  $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
42  $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
43  }
44 
45  protected function doDBUpdates() {
46  $this->lbFactory = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
47  $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
48 
49  if ( $this->lbFactory->getMainLB()->getConnection( DB_REPLICA )->fieldExists(
50  'change_tag',
51  'ct_tag',
52  __METHOD__
53  )
54  ) {
55  if ( $this->hasOption( 'set-user-tags-only' ) ) {
56  $this->setUserDefinedTags();
57  return true;
58  }
59  if ( !$this->hasOption( 'populate-only' ) ) {
60  $this->updateCountTag();
61  }
62  $this->backpopulateChangeTagId();
63  $this->setUserDefinedTags();
64  } else {
65  $this->updateCountTagId();
66  }
67 
68  // TODO: Implement
69  // $this->cleanZeroCountRows();
70 
71  return true;
72  }
73 
74  private function setUserDefinedTags() {
75  $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
76 
77  $userTags = null;
78  if ( $dbr->tableExists( 'valid_tag' ) ) {
79  $userTags = $dbr->selectFieldValues(
80  'valid_tag',
81  'vt_tag',
82  [],
83  __METHOD__
84  );
85  }
86 
87  if ( empty( $userTags ) ) {
88  $this->output( "No user defined tags to set, moving on...\n" );
89  return;
90  }
91 
92  if ( $this->hasOption( 'dry-run' ) ) {
93  $this->output(
94  'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
95  );
96  return;
97  }
98 
99  $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
100 
101  $dbw->update(
102  'change_tag_def',
103  [ 'ctd_user_defined' => 1 ],
104  [ 'ctd_name' => $userTags ],
105  __METHOD__
106  );
107  $this->lbFactory->waitForReplication();
108  $this->output( "Finished setting user defined tags in change_tag_def table\n" );
109  }
110 
111  private function updateCountTagId() {
112  $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
113 
114  // This query can be pretty expensive, don't run it on master
115  $res = $dbr->select(
116  'change_tag',
117  [ 'ct_tag_id', 'hitcount' => 'count(*)' ],
118  [],
119  __METHOD__,
120  [ 'GROUP BY' => 'ct_tag_id' ]
121  );
122 
123  $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
124 
125  foreach ( $res as $row ) {
126  if ( !$row->ct_tag_id ) {
127  continue;
128  }
129 
130  if ( $this->hasOption( 'dry-run' ) ) {
131  $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
132  continue;
133  }
134 
135  $dbw->update(
136  'change_tag_def',
137  [ 'ctd_count' => $row->hitcount ],
138  [ 'ctd_id' => $row->ct_tag_id ],
139  __METHOD__
140  );
141  }
142  $this->lbFactory->waitForReplication();
143  }
144 
145  private function updateCountTag() {
146  $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
147 
148  // This query can be pretty expensive, don't run it on master
149  $res = $dbr->select(
150  'change_tag',
151  [ 'ct_tag', 'hitcount' => 'count(*)' ],
152  [],
153  __METHOD__,
154  [ 'GROUP BY' => 'ct_tag' ]
155  );
156 
157  $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
158 
159  foreach ( $res as $row ) {
160  // Hygiene check
161  if ( !$row->ct_tag ) {
162  continue;
163  }
164 
165  if ( $this->hasOption( 'dry-run' ) ) {
166  $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
167  continue;
168  }
169 
170  $dbw->upsert(
171  'change_tag_def',
172  [
173  'ctd_name' => $row->ct_tag,
174  'ctd_user_defined' => 0,
175  'ctd_count' => $row->hitcount
176  ],
177  [ 'ctd_name' ],
178  [ 'ctd_count' => $row->hitcount ],
179  __METHOD__
180  );
181  }
182  $this->lbFactory->waitForReplication();
183  }
184 
185  private function backpopulateChangeTagId() {
186  $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
187  $changeTagDefs = $dbr->select(
188  'change_tag_def',
189  [ 'ctd_name', 'ctd_id' ],
190  [],
191  __METHOD__,
192  [ 'ORDER BY' => 'ctd_id' ]
193  );
194 
195  foreach ( $changeTagDefs as $row ) {
196  $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
197  }
198  }
199 
200  private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
201  $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
202  $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
203  $sleep = (int)$this->getOption( 'sleep', 0 );
204  $lastId = 0;
205  $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
206  while ( true ) {
207  // Given that indexes might not be there, it's better to use replica
208  $ids = $dbr->selectFieldValues(
209  'change_tag',
210  'ct_id',
211  [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ],
212  __METHOD__,
213  [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => 'ct_id' ]
214  );
215 
216  if ( !$ids ) {
217  break;
218  }
219  $lastId = end( $ids );
220 
221  if ( $this->hasOption( 'dry-run' ) ) {
222  $this->output(
223  "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
224  );
225  continue;
226  } else {
227  $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
228  }
229 
230  $dbw->update(
231  'change_tag',
232  [ 'ct_tag_id' => $tagId ],
233  [ 'ct_id' => $ids ],
234  __METHOD__
235  );
236 
237  $this->lbFactory->waitForReplication();
238  if ( $sleep > 0 ) {
239  sleep( $sleep );
240  }
241  }
242 
243  $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
244  }
245 
246  protected function getUpdateKey() {
247  return __CLASS__;
248  }
249 }
250 
251 $maintClass = PopulateChangeTagDef::class;
252 require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
static getInstance()
Returns the global default instance of the top level service locator.
hasOption( $name)
Checks to see if a particular option exists.
const DB_MASTER
Definition: defines.php:26
backpopulateChangeTagPerTag( $tagName, $tagId)
addDescription( $text)
Set the description text.
Wikimedia Rdbms ILBFactory $lbFactory
output( $out, $channel=null)
Throw some output to the user.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const DB_REPLICA
Definition: defines.php:25
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...