MediaWiki master
cleanupEmptyCategories.php
Go to the documentation of this file.
1<?php
12
13// @codeCoverageIgnoreStart
14require_once __DIR__ . '/Maintenance.php';
15// @codeCoverageIgnoreEnd
16
24
25 public function __construct() {
26 parent::__construct();
27 $this->addDescription(
28 <<<TEXT
29This script will clean up the category table by removing entries for empty
30categories without a description page and adding entries for empty categories
31with a description page. It will print out progress indicators every batch. The
32script is perfectly safe to run on large, live wikis, and running it multiple
33times is harmless. You may want to use the throttling options if it's causing
34too much load; they will not affect correctness.
35
36If the script is stopped and later resumed, you can use the --mode and --begin
37options with the last printed progress indicator to pick up where you left off.
38
39When the script has finished, it will make a note of this in the database, and
40will not run again without the --force option.
41TEXT
42 );
43
44 $this->addOption(
45 'mode',
46 '"add" empty categories with description pages, "remove" empty categories '
47 . 'without description pages, or "both"',
48 false,
49 true
50 );
51 $this->addOption(
52 'begin',
53 'Only do categories whose names are alphabetically after the provided name',
54 false,
55 true
56 );
57 $this->addOption(
58 'throttle',
59 'Wait this many milliseconds after each batch. Default: 0',
60 false,
61 true
62 );
63 }
64
66 protected function getUpdateKey() {
67 return 'cleanup empty categories';
68 }
69
71 protected function doDBUpdates() {
72 $mode = $this->getOption( 'mode', 'both' );
73 $begin = $this->getOption( 'begin', '' );
74 $throttle = $this->getOption( 'throttle', 0 );
75
76 if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
77 $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
78 return false;
79 }
80
81 $dbw = $this->getPrimaryDB();
82
83 $throttle = intval( $throttle );
84
85 if ( $mode === 'add' || $mode === 'both' ) {
86 if ( $begin !== '' ) {
87 $where = [ $dbw->expr( 'page_title', '>', $begin ) ];
88 } else {
89 $where = [];
90 }
91
92 $this->output( "Adding empty categories with description pages...\n" );
93 while ( true ) {
94 # Find which category to update
95 $rows = $dbw->newSelectQueryBuilder()
96 ->select( 'page_title' )
97 ->from( 'page' )
98 ->leftJoin( 'category', null, 'page_title = cat_title' )
99 ->where( $where )
100 ->andWhere( [ 'page_namespace' => NS_CATEGORY, 'cat_title' => null ] )
101 ->orderBy( 'page_title' )
102 ->limit( $this->getBatchSize() )
103 ->caller( __METHOD__ )->fetchResultSet();
104 if ( !$rows || $rows->numRows() <= 0 ) {
105 break;
106 }
107
108 foreach ( $rows as $row ) {
109 $name = $row->page_title;
110 $where = [ $dbw->expr( 'page_title', '>', $name ) ];
111
112 # Use the row to update the category count
113 $cat = Category::newFromName( $name );
114 if ( !is_object( $cat ) ) {
115 $this->output( "The category named $name is not valid?!\n" );
116 } else {
117 $cat->refreshCounts();
118 }
119 }
120 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable $rows has at at least one item
121 $this->output( "--mode=$mode --begin=$name\n" );
122
123 $this->waitForReplication();
124 usleep( $throttle * 1000 );
125 }
126
127 $begin = '';
128 }
129
130 if ( $mode === 'remove' || $mode === 'both' ) {
131 if ( $begin !== '' ) {
132 $where = [ $dbw->expr( 'cat_title', '>', $begin ) ];
133 } else {
134 $where = [];
135 }
136
137 $this->output( "Removing empty categories without description pages...\n" );
138 while ( true ) {
139 # Find which category to update
140 $rows = $dbw->newSelectQueryBuilder()
141 ->select( 'cat_title' )
142 ->from( 'category' )
143 ->leftJoin( 'page', null, [ 'page_namespace' => NS_CATEGORY, 'page_title = cat_title' ] )
144 ->where( $where )
145 ->andWhere( [ 'page_title' => null, 'cat_pages' => 0 ] )
146 ->orderBy( 'cat_title' )
147 ->limit( $this->getBatchSize() )
148 ->caller( __METHOD__ )->fetchResultSet();
149 if ( !$rows || $rows->numRows() <= 0 ) {
150 break;
151 }
152 foreach ( $rows as $row ) {
153 $name = $row->cat_title;
154 $where = [ $dbw->expr( 'cat_title', '>', $name ) ];
155
156 # Use the row to update the category count
157 $cat = Category::newFromName( $name );
158 if ( !is_object( $cat ) ) {
159 $this->output( "The category named $name is not valid?!\n" );
160 } else {
161 $cat->refreshCounts();
162 }
163 }
164
165 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
166 $this->output( "--mode=remove --begin=$name\n" );
167
168 $this->waitForReplication();
169 usleep( $throttle * 1000 );
170 }
171 }
172
173 $this->output( "Category cleanup complete.\n" );
174
175 return true;
176 }
177}
178
179// @codeCoverageIgnoreStart
180$maintClass = CleanupEmptyCategories::class;
181require_once RUN_MAINTENANCE_IF_MAIN;
182// @codeCoverageIgnoreEnd
Maintenance script to clean up empty categories in the category table.
__construct()
Default constructor.
Category objects are immutable, strictly speaking.
Definition Category.php:29
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
addDescription( $text)
Set the description text.