MediaWiki  master
populateChangeTagDef.php
Go to the documentation of this file.
1 <?php
19 require_once __DIR__ . '/Maintenance.php';
20 
28  protected $lbFactory;
29 
30  public function __construct() {
31  parent::__construct();
32  $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
33  $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
34  $this->setBatchSize( 1000 );
35  $this->addOption(
36  'sleep',
37  'Sleep time (in seconds) between every batch, defaults to zero',
38  false,
39  true
40  );
41  $this->addOption( 'populate-only', 'Do not update change_tag_def table' );
42  $this->addOption( 'set-user-tags-only', 'Only update ctd_user_defined from valid_tag table' );
43  }
44 
45  protected function doDBUpdates() {
46  $this->lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
47  $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
48 
49  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
50  if ( $dbr->fieldExists(
51  'change_tag',
52  'ct_tag',
53  __METHOD__
54  )
55  ) {
56  if ( $this->hasOption( 'set-user-tags-only' ) ) {
57  $this->setUserDefinedTags();
58  return true;
59  }
60  if ( !$this->hasOption( 'populate-only' ) ) {
61  $this->updateCountTag();
62  }
63  $this->backpopulateChangeTagId();
64  $this->setUserDefinedTags();
65  } else {
66  $this->updateCountTagId();
67  }
68 
69  // TODO: Implement
70  // $this->cleanZeroCountRows();
71 
72  return true;
73  }
74 
75  private function setUserDefinedTags() {
76  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
77 
78  $userTags = null;
79  if ( $dbr->tableExists( 'valid_tag', __METHOD__ ) ) {
80  $userTags = $dbr->newSelectQueryBuilder()
81  ->select( 'vt_tag' )
82  ->from( 'valid_tag' )
83  ->caller( __METHOD__ )->fetchFieldValues();
84  }
85 
86  if ( !$userTags ) {
87  $this->output( "No user defined tags to set, moving on...\n" );
88  return;
89  }
90 
91  if ( $this->hasOption( 'dry-run' ) ) {
92  $this->output(
93  'These tags will have ctd_user_defined=1 : ' . implode( ', ', $userTags ) . "\n"
94  );
95  return;
96  }
97 
98  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
99 
100  $dbw->update(
101  'change_tag_def',
102  [ 'ctd_user_defined' => 1 ],
103  [ 'ctd_name' => $userTags ],
104  __METHOD__
105  );
106  $this->lbFactory->waitForReplication();
107  $this->output( "Finished setting user defined tags in change_tag_def table\n" );
108  }
109 
110  private function updateCountTagId() {
111  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
112 
113  // This query can be pretty expensive, don't run it on master
114  $res = $dbr->newSelectQueryBuilder()
115  ->select( [ 'ct_tag_id', 'hitcount' => 'count(*)' ] )
116  ->from( 'change_tag' )
117  ->groupBy( 'ct_tag_id' )
118  ->caller( __METHOD__ )->fetchResultSet();
119 
120  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
121 
122  foreach ( $res as $row ) {
123  if ( !$row->ct_tag_id ) {
124  continue;
125  }
126 
127  if ( $this->hasOption( 'dry-run' ) ) {
128  $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
129  continue;
130  }
131 
132  $dbw->update(
133  'change_tag_def',
134  [ 'ctd_count' => $row->hitcount ],
135  [ 'ctd_id' => $row->ct_tag_id ],
136  __METHOD__
137  );
138  }
139  $this->lbFactory->waitForReplication();
140  }
141 
142  private function updateCountTag() {
143  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
144 
145  // This query can be pretty expensive, don't run it on master
146  $res = $dbr->newSelectQueryBuilder()
147  ->select( [ 'ct_tag', 'hitcount' => 'count(*)' ] )
148  ->from( 'change_tag' )
149  ->groupBy( 'ct_tag' )
150  ->caller( __METHOD__ )->fetchResultSet();
151 
152  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
153 
154  foreach ( $res as $row ) {
155  // Hygiene check
156  if ( !$row->ct_tag ) {
157  continue;
158  }
159 
160  if ( $this->hasOption( 'dry-run' ) ) {
161  $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
162  continue;
163  }
164  $dbw->newInsertQueryBuilder()
165  ->insertInto( 'change_tag_def' )
166  ->row( [
167  'ctd_name' => $row->ct_tag,
168  'ctd_user_defined' => 0,
169  'ctd_count' => $row->hitcount
170  ] )
171  ->onDuplicateKeyUpdate()
172  ->uniqueIndexFields( [ 'ctd_name' ] )
173  ->set( [ 'ctd_count' => $row->hitcount ] )
174  ->caller( __METHOD__ )->execute();
175  }
176  $this->lbFactory->waitForReplication();
177  }
178 
179  private function backpopulateChangeTagId() {
180  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
181  $changeTagDefs = $dbr->newSelectQueryBuilder()
182  ->select( [ 'ctd_name', 'ctd_id' ] )
183  ->from( 'change_tag_def' )
184  ->orderBy( 'ctd_id' )
185  ->caller( __METHOD__ )->fetchResultSet();
186 
187  foreach ( $changeTagDefs as $row ) {
188  $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
189  }
190  }
191 
192  private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
193  $dbr = $this->lbFactory->getMainLB()->getConnectionRef( DB_REPLICA );
194  $dbw = $this->lbFactory->getMainLB()->getConnectionRef( DB_PRIMARY );
195  $sleep = (int)$this->getOption( 'sleep', 0 );
196  $lastId = 0;
197  $this->output( "Starting to add ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
198  while ( true ) {
199  // Given that indexes might not be there, it's better to use replica
200  $ids = $dbr->newSelectQueryBuilder()
201  ->select( 'ct_id' )
202  ->from( 'change_tag' )
203  ->where( [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ] )
204  ->orderBy( 'ct_id' )
205  ->limit( $this->getBatchSize() )
206  ->caller( __METHOD__ )->fetchFieldValues();
207 
208  if ( !$ids ) {
209  break;
210  }
211  $lastId = end( $ids );
212 
213  if ( $this->hasOption( 'dry-run' ) ) {
214  $this->output(
215  "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
216  );
217  continue;
218  } else {
219  $this->output( "Updating ct_tag_id = {$tagId} up to row ct_id = {$lastId}\n" );
220  }
221 
222  $dbw->update(
223  'change_tag',
224  [ 'ct_tag_id' => $tagId ],
225  [ 'ct_id' => $ids ],
226  __METHOD__
227  );
228 
229  $this->lbFactory->waitForReplication();
230  if ( $sleep > 0 ) {
231  sleep( $sleep );
232  }
233  }
234 
235  $this->output( "Finished adding ct_tag_id = {$tagId} for ct_tag = {$tagName}\n" );
236  }
237 
238  protected function getUpdateKey() {
239  return __CLASS__;
240  }
241 }
242 
243 $maintClass = PopulateChangeTagDef::class;
244 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...
getUpdateKey()
Get the update key name to go in the update log table.
__construct()
Default constructor.
Wikimedia Rdbms ILBFactory $lbFactory
doDBUpdates()
Do the actual work.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28