MediaWiki master
cleanupTitles.php
Go to the documentation of this file.
1<?php
30
31// @codeCoverageIgnoreStart
32require_once __DIR__ . '/TableCleanup.php';
33// @codeCoverageIgnoreEnd
34
41
42 private string $prefix;
43
44 public function __construct() {
45 parent::__construct();
46 $this->addDescription( 'Script to clean up broken, unparseable titles' );
47 $this->addOption( 'prefix', "Broken pages will be renamed to titles with " .
48 "<prefix> prepended before the article name. Defaults to 'Broken'", false, true );
49 $this->setBatchSize( 1000 );
50 }
51
55 public function execute() {
56 $this->prefix = $this->getOption( 'prefix', 'Broken' ) . "/";
57 // Make sure the prefix itself is a valid title now
58 // rather than spewing errors for every page being cleaned up
59 // if it's not (We assume below that concatenating the prefix to a title leaves it in NS0)
60 // The trailing slash above ensures that concatenating the title to something
61 // can't turn it into a namespace or interwiki
62 $title = Title::newFromText( $this->prefix );
63 if ( !$title || !$title->canExist() || $title->getInterwiki() || $title->getNamespace() !== 0 ) {
64 $this->fatalError( "Invalid prefix {$this->prefix}. Must be a valid mainspace title." );
65 }
66 parent::execute();
67 }
68
72 protected function processRow( $row ) {
73 $display = Title::makeName( $row->page_namespace, $row->page_title );
74 $verified = $this->getServiceContainer()->getContentLanguage()->normalize( $display );
75 $title = Title::newFromText( $verified );
76
77 if ( $title !== null
78 && $title->canExist()
79 && $title->getNamespace() == $row->page_namespace
80 && $title->getDBkey() === $row->page_title
81 ) {
82 // all is fine
83 $this->progress( 0 );
84
85 return;
86 }
87
88 if ( $row->page_namespace == NS_FILE && $this->fileExists( $row->page_title ) ) {
89 $this->output( "file $row->page_title needs cleanup, please run cleanupImages.php.\n" );
90 $this->progress( 0 );
91 } elseif ( $title === null ) {
92 $this->output( "page $row->page_id ($display) is illegal.\n" );
93 $this->moveIllegalPage( $row );
94 $this->progress( 1 );
95 } else {
96 $this->output( "page $row->page_id ($display) doesn't match self.\n" );
97 $this->moveInconsistentPage( $row, $title );
98 $this->progress( 1 );
99 }
100 }
101
106 protected function fileExists( $name ) {
107 // XXX: Doesn't actually check for file existence, just presence of image record.
108 // This is reasonable, since cleanupImages.php only iterates over the image table.
109 $dbr = $this->getReplicaDB();
110 $row = $dbr->newSelectQueryBuilder()
111 ->select( '*' )
112 ->from( 'image' )
113 ->where( [ 'img_name' => $name ] )
114 ->caller( __METHOD__ )
115 ->fetchRow();
116
117 return $row !== false;
118 }
119
123 protected function moveIllegalPage( $row ) {
124 $legalChars = Title::legalChars();
125 $legalizedUnprefixed = preg_replace_callback( "/([^$legalChars])/",
126 [ $this, 'hexChar' ],
127 $row->page_title );
128 if ( $legalizedUnprefixed == '.' ) {
129 $legalizedUnprefixed = '(dot)';
130 }
131 if ( $legalizedUnprefixed == '_' ) {
132 $legalizedUnprefixed = '(space)';
133 }
134 $ns = (int)$row->page_namespace;
135
136 $title = null;
137 // Try to move "Talk:Project:Foo" -> "Project talk:Foo"
138 if ( $ns === 1 ) {
139 $subjectTitle = Title::newFromText( $legalizedUnprefixed );
140 if ( $subjectTitle && !$subjectTitle->isTalkPage() ) {
141 $talkTitle = $subjectTitle->getTalkPageIfDefined();
142 if ( $talkTitle !== null && !$talkTitle->exists() ) {
143 $ns = $talkTitle->getNamespace();
144 $title = $talkTitle;
145 }
146 }
147 }
148
149 if ( $title === null ) {
150 // Not a talk page or that didn't work
151 // move any other broken pages to the main namespace so they can be found together
152 if ( $ns !== 0 ) {
153 $namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
154 $namespaceName = $namespaceInfo->getCanonicalName( $ns );
155 if ( $namespaceName === false ) {
156 $namespaceName = "NS$ns"; // Fallback for unknown namespaces
157 }
158 $ns = 0;
159 $legalizedUnprefixed = "$namespaceName:$legalizedUnprefixed";
160 }
161 $title = Title::newFromText( $this->prefix . $legalizedUnprefixed );
162 }
163
164 if ( $title === null ) {
165 // It's still not a valid title, try again with a much smaller
166 // allowed character set. This will mangle any titles with non-ASCII
167 // characters, but if we don't do this the result will be
168 // falling back to the Broken/id:foo failsafe below which is worse
169 $legalizedUnprefixed = preg_replace_callback( '!([^A-Za-z0-9_:\\-])!',
170 [ $this, 'hexChar' ],
171 $legalizedUnprefixed
172 );
173 $title = Title::newFromText( $this->prefix . $legalizedUnprefixed );
174 }
175
176 if ( $title === null ) {
177 // Oh well, we tried
178 $clean = $this->prefix . 'id:' . $row->page_id;
179 $legalized = $this->prefix . $legalizedUnprefixed;
180 $this->output( "Couldn't legalize; form '$legalized' still invalid; using '$clean'\n" );
181 $title = Title::newFromText( $clean );
182 } elseif ( $title->exists( IDBAccessObject::READ_LATEST ) ) {
183 $clean = $this->prefix . 'id:' . $row->page_id;
184 $conflict = $title->getDBKey();
185 $this->output( "Legalized for '$conflict' exists; using '$clean'\n" );
186 $title = Title::newFromText( $clean );
187 }
188
189 if ( !$title || $title->exists( IDBAccessObject::READ_LATEST ) ) {
190 // This can happen in corner cases like if numbers are made not valid
191 // title characters using the (deprecated) $wgLegalTitleChars or
192 // a 'Broken/id:foo' title already exists
193 $this->error( "Destination page {$title->getText()} is invalid or already exists, skipping." );
194 return;
195 }
196
197 $dest = $title->getDBkey();
198 if ( $this->dryrun ) {
199 $this->output( "DRY RUN: would rename $row->page_id ($row->page_namespace," .
200 "'$row->page_title') to ($ns,'$dest')\n" );
201 } else {
202 $this->output( "renaming $row->page_id ($row->page_namespace," .
203 "'$row->page_title') to ($ns,'$dest')\n" );
204 $this->getPrimaryDB()
205 ->newUpdateQueryBuilder()
206 ->update( 'page' )
207 ->set( [ 'page_title' => $dest, 'page_namespace' => $ns ] )
208 ->where( [ 'page_id' => $row->page_id ] )
209 ->caller( __METHOD__ )->execute();
210 }
211 }
212
217 protected function moveInconsistentPage( $row, Title $title ) {
218 $titleImpossible = $title->getInterwiki() || !$title->canExist();
219 if ( $title->exists( IDBAccessObject::READ_LATEST ) || $titleImpossible ) {
220 if ( $titleImpossible ) {
221 $prior = $title->getPrefixedDBkey();
222 } else {
223 $prior = $title->getDBkey();
224 }
225
226 $ns = (int)$row->page_namespace;
227 # If a page is saved in the main namespace with a namespace prefix then try to move it into
228 # that namespace. If there's no conflict then it will succeed. Otherwise it will hit the condition
229 # } else if ($ns !== 0) { and be moved to Broken/Namespace:Title
230 # whereas without this check it would just go to Broken/Title
231 if ( $ns === 0 ) {
232 $ns = $title->getNamespace();
233 }
234
235 # Old cleanupTitles could move articles there. See T25147.
236 # or a page could be stored as (0, "Special:Foo") in which case the $titleImpossible
237 # condition would be true and we've already added a prefix so pretend we're in mainspace
238 # and don't add another
239 if ( $ns < 0 ) {
240 $ns = 0;
241 }
242
243 # Namespace which no longer exists. Put the page in the main namespace
244 # since we don't have any idea of the old namespace name. See T70501.
245 # We build the new title ourself rather than relying on getDBKey() because
246 # that will return Special:BadTitle
247 $namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
248 if ( !$namespaceInfo->exists( $ns ) ) {
249 $clean = "{$this->prefix}NS$ns:$row->page_title";
250 $ns = 0;
251 } elseif ( !$titleImpossible && !$title->exists( IDBAccessObject::READ_LATEST ) ) {
252 // Looks like the current title, after cleaning it up, is valid and available
253 $clean = $prior;
254 } elseif ( $ns !== 0 ) {
255 // Put all broken pages in the main namespace so that they can be found via Special:PrefixIndex
256 $nsName = $namespaceInfo->getCanonicalName( $ns );
257 $clean = "{$this->prefix}$nsName:{$prior}";
258 $ns = 0;
259 } else {
260 $clean = $this->prefix . $prior;
261 }
262 $verified = Title::makeTitleSafe( $ns, $clean );
263 if ( !$verified || $verified->exists( IDBAccessObject::READ_LATEST ) ) {
264 $lastResort = "{$this->prefix}id: {$row->page_id}";
265 $this->output( "Couldn't legalize; form '$clean' exists; using '$lastResort'\n" );
266 $verified = Title::makeTitleSafe( $ns, $lastResort );
267 if ( !$verified || $verified->exists( IDBAccessObject::READ_LATEST ) ) {
268 // This can happen in corner cases like if numbers are made not valid
269 // title characters using the (deprecated) $wgLegalTitleChars or
270 // a 'Broken/id:foo' title already exists
271 $this->error( "Destination page $lastResort invalid or already exists." );
272 return;
273 }
274 }
275 $title = $verified;
276 }
277
278 $ns = $title->getNamespace();
279 $dest = $title->getDBkey();
280
281 if ( $this->dryrun ) {
282 $this->output( "DRY RUN: would rename $row->page_id ($row->page_namespace," .
283 "'$row->page_title') to ($ns,'$dest')\n" );
284 } else {
285 $this->output( "renaming $row->page_id ($row->page_namespace," .
286 "'$row->page_title') to ($ns,'$dest')\n" );
287 $this->getPrimaryDB()
288 ->newUpdateQueryBuilder()
289 ->update( 'page' )
290 ->set( [
291 'page_namespace' => $ns,
292 'page_title' => $dest
293 ] )
294 ->where( [ 'page_id' => $row->page_id ] )
295 ->caller( __METHOD__ )->execute();
296 $this->getServiceContainer()->getLinkCache()->clear();
297 }
298 }
299}
300
301// @codeCoverageIgnoreStart
302$maintClass = TitleCleanup::class;
303require_once RUN_MAINTENANCE_IF_MAIN;
304// @codeCoverageIgnoreEnd
const NS_FILE
Definition Defines.php:71
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Represents a title within MediaWiki.
Definition Title.php:78
canExist()
Can this title represent a page in the wiki's database?
Definition Title.php:1211
exists( $flags=0)
Check if page exists.
Definition Title.php:3138
getNamespace()
Get the namespace index, i.e.
Definition Title.php:1043
getInterwiki()
Get the interwiki prefix.
Definition Title.php:944
getDBkey()
Get the main part with underscores.
Definition Title.php:1034
getPrefixedDBkey()
Get the prefixed database key form.
Definition Title.php:1846
Generic class to cleanup a database table.
progress( $updated)
Maintenance script to clean up broken, unparseable titles.
moveIllegalPage( $row)
moveInconsistentPage( $row, Title $title)
__construct()
Default constructor.
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
$maintClass
Interface for database access objects.