MediaWiki  master
cleanupEmptyCategories.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
27 
35 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription(
39  <<<TEXT
40 This script will clean up the category table by removing entries for empty
41 categories without a description page and adding entries for empty categories
42 with a description page. It will print out progress indicators every batch. The
43 script is perfectly safe to run on large, live wikis, and running it multiple
44 times is harmless. You may want to use the throttling options if it's causing
45 too much load; they will not affect correctness.
46 
47 If the script is stopped and later resumed, you can use the --mode and --begin
48 options with the last printed progress indicator to pick up where you left off.
49 
50 When the script has finished, it will make a note of this in the database, and
51 will not run again without the --force option.
52 TEXT
53  );
54 
55  $this->addOption(
56  'mode',
57  '"add" empty categories with description pages, "remove" empty categories '
58  . 'without description pages, or "both"',
59  false,
60  true
61  );
62  $this->addOption(
63  'begin',
64  'Only do categories whose names are alphabetically after the provided name',
65  false,
66  true
67  );
68  $this->addOption(
69  'throttle',
70  'Wait this many milliseconds after each batch. Default: 0',
71  false,
72  true
73  );
74  }
75 
76  protected function getUpdateKey() {
77  return 'cleanup empty categories';
78  }
79 
80  protected function doDBUpdates() {
81  $mode = $this->getOption( 'mode', 'both' );
82  $begin = $this->getOption( 'begin', '' );
83  $throttle = $this->getOption( 'throttle', 0 );
84 
85  if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
86  $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
87  return false;
88  }
89 
90  $dbw = $this->getDB( DB_MASTER );
91  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
92 
93  $throttle = intval( $throttle );
94 
95  if ( $mode === 'add' || $mode === 'both' ) {
96  if ( $begin !== '' ) {
97  $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
98  } else {
99  $where = [];
100  }
101 
102  $this->output( "Adding empty categories with description pages...\n" );
103  while ( true ) {
104  # Find which category to update
105  $rows = $dbw->select(
106  [ 'page', 'category' ],
107  'page_title',
108  array_merge( $where, [
109  'page_namespace' => NS_CATEGORY,
110  'cat_title' => null,
111  ] ),
112  __METHOD__,
113  [
114  'ORDER BY' => 'page_title',
115  'LIMIT' => $this->getBatchSize(),
116  ],
117  [
118  'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
119  ]
120  );
121  if ( !$rows || $rows->numRows() <= 0 ) {
122  break;
123  }
124 
125  foreach ( $rows as $row ) {
126  $name = $row->page_title;
127  $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
128 
129  # Use the row to update the category count
130  $cat = Category::newFromName( $name );
131  if ( !is_object( $cat ) ) {
132  $this->output( "The category named $name is not valid?!\n" );
133  } else {
134  $cat->refreshCounts();
135  }
136  }
137  $this->output( "--mode=$mode --begin=$name\n" );
138 
139  $lbFactory->waitForReplication();
140  usleep( $throttle * 1000 );
141  }
142 
143  $begin = '';
144  }
145 
146  if ( $mode === 'remove' || $mode === 'both' ) {
147  if ( $begin !== '' ) {
148  $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
149  } else {
150  $where = [];
151  }
152 
153  $this->output( "Removing empty categories without description pages...\n" );
154  while ( true ) {
155  # Find which category to update
156  $rows = $dbw->select(
157  [ 'category', 'page' ],
158  'cat_title',
159  array_merge( $where, [
160  'page_title' => null,
161  'cat_pages' => 0,
162  ] ),
163  __METHOD__,
164  [
165  'ORDER BY' => 'cat_title',
166  'LIMIT' => $this->getBatchSize(),
167  ],
168  [
169  'page' => [ 'LEFT JOIN', [
170  'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
171  ] ],
172  ]
173  );
174  if ( !$rows || $rows->numRows() <= 0 ) {
175  break;
176  }
177  foreach ( $rows as $row ) {
178  $name = $row->cat_title;
179  $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
180 
181  # Use the row to update the category count
182  $cat = Category::newFromName( $name );
183  if ( !is_object( $cat ) ) {
184  $this->output( "The category named $name is not valid?!\n" );
185  } else {
186  $cat->refreshCounts();
187  }
188  }
189 
190  $this->output( "--mode=remove --begin=$name\n" );
191 
192  $lbFactory->waitForReplication();
193  usleep( $throttle * 1000 );
194  }
195  }
196 
197  $this->output( "Category cleanup complete.\n" );
198 
199  return true;
200  }
201 }
202 
203 $maintClass = CleanupEmptyCategories::class;
204 require_once RUN_MAINTENANCE_IF_MAIN;
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:154
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:327
CleanupEmptyCategories
Maintenance script to clean up empty categories in the category table.
Definition: cleanupEmptyCategories.php:34
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: LoggedUpdateMaintenance.php:26
CleanupEmptyCategories\__construct
__construct()
Default constructor.
Definition: cleanupEmptyCategories.php:36