MediaWiki  master
cleanupEmptyCategories.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
27 
35 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription(
39  <<<TEXT
40 This script will clean up the category table by removing entries for empty
41 categories without a description page and adding entries for empty categories
42 with a description page. It will print out progress indicators every batch. The
43 script is perfectly safe to run on large, live wikis, and running it multiple
44 times is harmless. You may want to use the throttling options if it's causing
45 too much load; they will not affect correctness.
46 
47 If the script is stopped and later resumed, you can use the --mode and --begin
48 options with the last printed progress indicator to pick up where you left off.
49 
50 When the script has finished, it will make a note of this in the database, and
51 will not run again without the --force option.
52 TEXT
53  );
54 
55  $this->addOption(
56  'mode',
57  '"add" empty categories with description pages, "remove" empty categories '
58  . 'without description pages, or "both"',
59  false,
60  true
61  );
62  $this->addOption(
63  'begin',
64  'Only do categories whose names are alphabetically after the provided name',
65  false,
66  true
67  );
68  $this->addOption(
69  'throttle',
70  'Wait this many milliseconds after each batch. Default: 0',
71  false,
72  true
73  );
74  }
75 
76  protected function getUpdateKey() {
77  return 'cleanup empty categories';
78  }
79 
80  protected function doDBUpdates() {
81  $mode = $this->getOption( 'mode', 'both' );
82  $begin = $this->getOption( 'begin', '' );
83  $throttle = $this->getOption( 'throttle', 0 );
84 
85  if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
86  $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
87  return false;
88  }
89 
90  $dbw = $this->getDB( DB_PRIMARY );
91  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
92 
93  $throttle = intval( $throttle );
94 
95  if ( $mode === 'add' || $mode === 'both' ) {
96  if ( $begin !== '' ) {
97  $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
98  } else {
99  $where = [];
100  }
101 
102  $this->output( "Adding empty categories with description pages...\n" );
103  while ( true ) {
104  # Find which category to update
105  $rows = $dbw->select(
106  [ 'page', 'category' ],
107  'page_title',
108  array_merge( $where, [
109  'page_namespace' => NS_CATEGORY,
110  'cat_title' => null,
111  ] ),
112  __METHOD__,
113  [
114  'ORDER BY' => 'page_title',
115  'LIMIT' => $this->getBatchSize(),
116  ],
117  [
118  'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
119  ]
120  );
121  if ( !$rows || $rows->numRows() <= 0 ) {
122  break;
123  }
124 
125  foreach ( $rows as $row ) {
126  $name = $row->page_title;
127  $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
128 
129  # Use the row to update the category count
130  $cat = Category::newFromName( $name );
131  if ( !is_object( $cat ) ) {
132  $this->output( "The category named $name is not valid?!\n" );
133  } else {
134  $cat->refreshCounts();
135  }
136  }
137  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable $rows has at at least one item
138  $this->output( "--mode=$mode --begin=$name\n" );
139 
140  $lbFactory->waitForReplication();
141  usleep( $throttle * 1000 );
142  }
143 
144  $begin = '';
145  }
146 
147  if ( $mode === 'remove' || $mode === 'both' ) {
148  if ( $begin !== '' ) {
149  $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
150  } else {
151  $where = [];
152  }
153 
154  $this->output( "Removing empty categories without description pages...\n" );
155  while ( true ) {
156  # Find which category to update
157  $rows = $dbw->select(
158  [ 'category', 'page' ],
159  'cat_title',
160  array_merge( $where, [
161  'page_title' => null,
162  'cat_pages' => 0,
163  ] ),
164  __METHOD__,
165  [
166  'ORDER BY' => 'cat_title',
167  'LIMIT' => $this->getBatchSize(),
168  ],
169  [
170  'page' => [ 'LEFT JOIN', [
171  'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
172  ] ],
173  ]
174  );
175  if ( !$rows || $rows->numRows() <= 0 ) {
176  break;
177  }
178  foreach ( $rows as $row ) {
179  $name = $row->cat_title;
180  $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
181 
182  # Use the row to update the category count
183  $cat = Category::newFromName( $name );
184  if ( !is_object( $cat ) ) {
185  $this->output( "The category named $name is not valid?!\n" );
186  } else {
187  $cat->refreshCounts();
188  }
189  }
190 
191  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
192  $this->output( "--mode=remove --begin=$name\n" );
193 
194  $lbFactory->waitForReplication();
195  usleep( $throttle * 1000 );
196  }
197  }
198 
199  $this->output( "Category cleanup complete.\n" );
200 
201  return true;
202  }
203 }
204 
205 $maintClass = CleanupEmptyCategories::class;
206 require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script to clean up empty categories in the category table.
__construct()
Default constructor.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
addDescription( $text)
Set the description text.
Service locator for MediaWiki core services.