MediaWiki master
cleanupTitles.php
Go to the documentation of this file.
1<?php
29
30require_once __DIR__ . '/TableCleanup.php';
31
38
39 private string $prefix;
40
41 public function __construct() {
42 parent::__construct();
43 $this->addDescription( 'Script to clean up broken, unparseable titles' );
44 $this->addOption( 'prefix', "Broken pages will be renamed to titles with " .
45 "<prefix> prepended before the article name. Defaults to 'Broken'", false, true );
46 $this->setBatchSize( 1000 );
47 }
48
52 public function execute() {
53 $this->prefix = $this->getOption( 'prefix', 'Broken' ) . "/";
54 // Make sure the prefix itself is a valid title now
55 // rather than spewing errors for every page being cleaned up
56 // if it's not (We assume below that concatenating the prefix to a title leaves it in NS0)
57 // The trailing slash above ensures that concatenating the title to something
58 // can't turn it into a namespace or interwiki
59 $title = Title::newFromText( $this->prefix );
60 if ( !$title || !$title->canExist() || $title->getInterwiki() || $title->getNamespace() !== 0 ) {
61 $this->fatalError( "Invalid prefix {$this->prefix}. Must be a valid mainspace title." );
62 }
63 parent::execute();
64 }
65
69 protected function processRow( $row ) {
70 $display = Title::makeName( $row->page_namespace, $row->page_title );
71 $verified = $this->getServiceContainer()->getContentLanguage()->normalize( $display );
72 $title = Title::newFromText( $verified );
73
74 if ( $title !== null
75 && $title->canExist()
76 && $title->getNamespace() == $row->page_namespace
77 && $title->getDBkey() === $row->page_title
78 ) {
79 // all is fine
80 $this->progress( 0 );
81
82 return;
83 }
84
85 if ( $row->page_namespace == NS_FILE && $this->fileExists( $row->page_title ) ) {
86 $this->output( "file $row->page_title needs cleanup, please run cleanupImages.php.\n" );
87 $this->progress( 0 );
88 } elseif ( $title === null ) {
89 $this->output( "page $row->page_id ($display) is illegal.\n" );
90 $this->moveIllegalPage( $row );
91 $this->progress( 1 );
92 } else {
93 $this->output( "page $row->page_id ($display) doesn't match self.\n" );
94 $this->moveInconsistentPage( $row, $title );
95 $this->progress( 1 );
96 }
97 }
98
103 protected function fileExists( $name ) {
104 // XXX: Doesn't actually check for file existence, just presence of image record.
105 // This is reasonable, since cleanupImages.php only iterates over the image table.
106 $dbr = $this->getReplicaDB();
107 $row = $dbr->newSelectQueryBuilder()
108 ->select( '*' )
109 ->from( 'image' )
110 ->where( [ 'img_name' => $name ] )
111 ->caller( __METHOD__ )
112 ->fetchRow();
113
114 return $row !== false;
115 }
116
120 protected function moveIllegalPage( $row ) {
121 $legalChars = Title::legalChars();
122 $legalizedUnprefixed = preg_replace_callback( "/([^$legalChars])/",
123 [ $this, 'hexChar' ],
124 $row->page_title );
125 if ( $legalizedUnprefixed == '.' ) {
126 $legalizedUnprefixed = '(dot)';
127 }
128 if ( $legalizedUnprefixed == '_' ) {
129 $legalizedUnprefixed = '(space)';
130 }
131 $ns = (int)$row->page_namespace;
132 // Move all broken pages to the main namespace so they can be found together
133 if ( $ns !== 0 ) {
134 $namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
135 $namespaceName = $namespaceInfo->getCanonicalName( $ns );
136 if ( $namespaceName === false ) {
137 $namespaceName = "NS$ns"; // Fallback for unknown namespaces
138 }
139 $legalizedUnprefixed = "$namespaceName:$legalizedUnprefixed";
140 }
141 $legalized = $this->prefix . $legalizedUnprefixed;
142
143 $title = Title::newFromText( $legalized );
144
145 if ( $title === null ) {
146 // It's still not a valid title, try again with a much smaller
147 // allowed character set. This will mangle any titles with non-ASCII
148 // characters, but if we don't do this the result will be
149 // falling back to the Broken/id:foo failsafe below which is worse
150 $legalizedUnprefixed = preg_replace_callback( '!([^A-Za-z0-9_\\-:])!',
151 [ $this, 'hexChar' ],
152 $legalizedUnprefixed
153 );
154 $legalized = $this->prefix . $legalizedUnprefixed;
155 $title = Title::newFromText( $legalized );
156 }
157
158 if ( $title === null ) {
159 // Oh well, we tried
160 $clean = $this->prefix . 'id:' . $row->page_id;
161 $this->output( "Couldn't legalize; form '$legalized' still invalid; using '$clean'\n" );
162 $title = Title::newFromText( $clean );
163 } elseif ( $title->exists() ) {
164 $clean = $this->prefix . 'id:' . $row->page_id;
165 $this->output( "Legalized for '$legalized' exists; using '$clean'\n" );
166 $title = Title::newFromText( $clean );
167 }
168
169 if ( !$title || $title->exists() ) {
170 // This can happen in corner cases like if numbers are made not valid
171 // title characters using the (deprecated) $wgLegalTitleChars or
172 // a 'Broken/id:foo' title already exists
173 $this->error( "Destination page {$title->getText()} is invalid or already exists, skipping." );
174 return;
175 }
176
177 $dest = $title->getDBkey();
178 if ( $this->dryrun ) {
179 $this->output( "DRY RUN: would rename $row->page_id ($row->page_namespace," .
180 "'$row->page_title') to (0,'$dest')\n" );
181 } else {
182 $this->output( "renaming $row->page_id ($row->page_namespace," .
183 "'$row->page_title') to ($row->page_namespace,'$dest')\n" );
184 $this->getPrimaryDB()
185 ->newUpdateQueryBuilder()
186 ->update( 'page' )
187 ->set( [ 'page_title' => $dest, 'page_namespace' => 0 ] )
188 ->where( [ 'page_id' => $row->page_id ] )
189 ->caller( __METHOD__ )->execute();
190 }
191 }
192
197 protected function moveInconsistentPage( $row, Title $title ) {
198 $titleImpossible = $title->getInterwiki() || !$title->canExist();
199 if ( $title->exists( IDBAccessObject::READ_LATEST ) || $titleImpossible ) {
200 if ( $titleImpossible ) {
201 $prior = $title->getPrefixedDBkey();
202 } else {
203 $prior = $title->getDBkey();
204 }
205
206 $ns = (int)$row->page_namespace;
207 # If a page is saved in the main namespace with a namespace prefix then try to move it into
208 # that namespace. If there's no conflict then it will succeed. Otherwise it will hit the condition
209 # } else if ($ns !== 0) { and be moved to Broken/Namespace:Title
210 # whereas without this check it would just go to Broken/Title
211 if ( $ns === 0 ) {
212 $ns = $title->getNamespace();
213 }
214
215 # Old cleanupTitles could move articles there. See T25147.
216 # or a page could be stored as (0, "Special:Foo") in which case the $titleImpossible
217 # condition would be true and we've already added a prefix so pretend we're in mainspace
218 # and don't add another
219 if ( $ns < 0 ) {
220 $ns = 0;
221 }
222
223 # Namespace which no longer exists. Put the page in the main namespace
224 # since we don't have any idea of the old namespace name. See T70501.
225 # We build the new title ourself rather than relying on getDBKey() because
226 # that will return Special:BadTitle
227 $namespaceInfo = $this->getServiceContainer()->getNamespaceInfo();
228 if ( !$namespaceInfo->exists( $ns ) ) {
229 $clean = "{$this->prefix}NS$ns:$row->page_title";
230 $ns = 0;
231 } elseif ( !$titleImpossible && !$title->exists() ) {
232 // Looks like the current title, after cleaning it up, is valid and available
233 $clean = $prior;
234 } elseif ( $ns !== 0 ) {
235 // Put all broken pages in the main namespace so that they can be found via Special:PrefixIndex
236 $nsName = $namespaceInfo->getCanonicalName( $ns );
237 $clean = "{$this->prefix}$nsName:{$prior}";
238 $ns = 0;
239 } else {
240 $clean = $this->prefix . $prior;
241 }
242 $verified = Title::makeTitleSafe( $ns, $clean );
243 if ( !$verified || $verified->exists() ) {
244 $lastResort = "{$this->prefix}id: {$row->page_id}";
245 $this->output( "Couldn't legalize; form '$clean' exists; using '$lastResort'\n" );
246 $verified = Title::makeTitleSafe( $ns, $lastResort );
247 if ( !$verified || $verified->exists() ) {
248 // This can happen in corner cases like if numbers are made not valid
249 // title characters using the (deprecated) $wgLegalTitleChars or
250 // a 'Broken/id:foo' title already exists
251 $this->error( "Destination page $lastResort invalid or already exists." );
252 return;
253 }
254 }
255 $title = $verified;
256 }
257
258 $ns = $title->getNamespace();
259 $dest = $title->getDBkey();
260
261 if ( $this->dryrun ) {
262 $this->output( "DRY RUN: would rename $row->page_id ($row->page_namespace," .
263 "'$row->page_title') to ($ns,'$dest')\n" );
264 } else {
265 $this->output( "renaming $row->page_id ($row->page_namespace," .
266 "'$row->page_title') to ($ns,'$dest')\n" );
267 $this->getPrimaryDB()
268 ->newUpdateQueryBuilder()
269 ->update( 'page' )
270 ->set( [
271 'page_namespace' => $ns,
272 'page_title' => $dest
273 ] )
274 ->where( [ 'page_id' => $row->page_id ] )
275 ->caller( __METHOD__ )->execute();
276 $this->getServiceContainer()->getLinkCache()->clear();
277 }
278 }
279}
280
281$maintClass = TitleCleanup::class;
282require_once RUN_MAINTENANCE_IF_MAIN;
const NS_FILE
Definition Defines.php:71
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Represents a title within MediaWiki.
Definition Title.php:79
canExist()
Can this title represent a page in the wiki's database?
Definition Title.php:1213
exists( $flags=0)
Check if page exists.
Definition Title.php:3150
getNamespace()
Get the namespace index, i.e.
Definition Title.php:1045
getInterwiki()
Get the interwiki prefix.
Definition Title.php:946
getDBkey()
Get the main part with underscores.
Definition Title.php:1036
getPrefixedDBkey()
Get the prefixed database key form.
Definition Title.php:1850
Generic class to cleanup a database table.
progress( $updated)
Maintenance script to clean up broken, unparseable titles.
moveIllegalPage( $row)
moveInconsistentPage( $row, Title $title)
__construct()
Default constructor.
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
$maintClass