MediaWiki REL1_30
cleanupEmptyCategories.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Maintenance.php';
25
33
34 public function __construct() {
35 parent::__construct();
36 $this->addDescription(
37 <<<TEXT
38This script will clean up the category table by removing entries for empty
39categories without a description page and adding entries for empty categories
40with a description page. It will print out progress indicators every batch. The
41script is perfectly safe to run on large, live wikis, and running it multiple
42times is harmless. You may want to use the throttling options if it's causing
43too much load; they will not affect correctness.
44
45If the script is stopped and later resumed, you can use the --mode and --begin
46options with the last printed progress indicator to pick up where you left off.
47
48When the script has finished, it will make a note of this in the database, and
49will not run again without the --force option.
50TEXT
51 );
52
53 $this->addOption(
54 'mode',
55 '"add" empty categories with description pages, "remove" empty categories '
56 . 'without description pages, or "both"',
57 false,
58 true
59 );
60 $this->addOption(
61 'begin',
62 'Only do categories whose names are alphabetically after the provided name',
63 false,
64 true
65 );
66 $this->addOption(
67 'throttle',
68 'Wait this many milliseconds after each batch. Default: 0',
69 false,
70 true
71 );
72 }
73
74 protected function getUpdateKey() {
75 return 'cleanup empty categories';
76 }
77
78 protected function doDBUpdates() {
79 $mode = $this->getOption( 'mode', 'both' );
80 $begin = $this->getOption( 'begin', '' );
81 $throttle = $this->getOption( 'throttle', 0 );
82
83 if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
84 $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
85 return false;
86 }
87
88 $dbw = $this->getDB( DB_MASTER );
89
90 $throttle = intval( $throttle );
91
92 if ( $mode === 'add' || $mode === 'both' ) {
93 if ( $begin !== '' ) {
94 $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
95 } else {
96 $where = [];
97 }
98
99 $this->output( "Adding empty categories with description pages...\n" );
100 while ( true ) {
101 # Find which category to update
102 $rows = $dbw->select(
103 [ 'page', 'category' ],
104 'page_title',
105 array_merge( $where, [
106 'page_namespace' => NS_CATEGORY,
107 'cat_title' => null,
108 ] ),
109 __METHOD__,
110 [
111 'ORDER BY' => 'page_title',
112 'LIMIT' => $this->mBatchSize,
113 ],
114 [
115 'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
116 ]
117 );
118 if ( !$rows || $rows->numRows() <= 0 ) {
119 # Done, hopefully.
120 break;
121 }
122
123 foreach ( $rows as $row ) {
124 $name = $row->page_title;
125 $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
126
127 # Use the row to update the category count
128 $cat = Category::newFromName( $name );
129 if ( !is_object( $cat ) ) {
130 $this->output( "The category named $name is not valid?!\n" );
131 } else {
132 $cat->refreshCounts();
133 }
134 }
135 $this->output( "--mode=$mode --begin=$name\n" );
136
137 wfWaitForSlaves();
138 usleep( $throttle * 1000 );
139 }
140
141 $begin = '';
142 }
143
144 if ( $mode === 'remove' || $mode === 'both' ) {
145 if ( $begin !== '' ) {
146 $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
147 } else {
148 $where = [];
149 }
150
151 $this->output( "Removing empty categories without description pages...\n" );
152 while ( true ) {
153 # Find which category to update
154 $rows = $dbw->select(
155 [ 'category', 'page' ],
156 'cat_title',
157 array_merge( $where, [
158 'page_title' => null,
159 'cat_pages' => 0,
160 ] ),
161 __METHOD__,
162 [
163 'ORDER BY' => 'cat_title',
164 'LIMIT' => $this->mBatchSize,
165 ],
166 [
167 'page' => [ 'LEFT JOIN', [
168 'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
169 ] ],
170 ]
171 );
172 if ( !$rows || $rows->numRows() <= 0 ) {
173 # Done, hopefully.
174 break;
175 }
176 foreach ( $rows as $row ) {
177 $name = $row->cat_title;
178 $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
179
180 # Use the row to update the category count
181 $cat = Category::newFromName( $name );
182 if ( !is_object( $cat ) ) {
183 $this->output( "The category named $name is not valid?!\n" );
184 } else {
185 $cat->refreshCounts();
186 }
187 }
188
189 $this->output( "--mode=remove --begin=$name\n" );
190
191 wfWaitForSlaves();
192 usleep( $throttle * 1000 );
193 }
194 }
195
196 $this->output( "Category cleanup complete.\n" );
197
198 return true;
199 }
200}
201
203require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script to clean up empty categories in the category table.
__construct()
Default constructor.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
addDescription( $text)
Set the description text.
print
Definition cleanup.php:99
The ContentHandler facility adds support for arbitrary content types on wiki pages
$dbw begin(__METHOD__)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition deferred.txt:16
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing options(say) and put it in one place. Instead of having little title-reversing if-blocks spread all over the codebase in showAnArticle
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a batch
Definition linkcache.txt:14
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by similarly to how extensions are installed You can then make that skin the default by adding
Definition skin.txt:68