MediaWiki
master
cleanupTitles.php
Go to the documentation of this file.
1
<?php
28
use
MediaWiki\Title\Title
;
29
30
require_once __DIR__ .
'/TableCleanup.php'
;
31
37
class
TitleCleanup
extends
TableCleanup
{
38
public
function
__construct
() {
39
parent::__construct();
40
$this->
addDescription
(
'Script to clean up broken, unparseable titles'
);
41
$this->
setBatchSize
( 1000 );
42
}
43
47
protected
function
processRow
( $row ) {
48
$display = Title::makeName( $row->page_namespace, $row->page_title );
49
$verified = $this->
getServiceContainer
()->getContentLanguage()->normalize( $display );
50
$title = Title::newFromText( $verified );
51
52
if
( $title !==
null
53
&& $title->canExist()
54
&& $title->getNamespace() == $row->page_namespace
55
&& $title->getDBkey() === $row->page_title
56
) {
57
// all is fine
58
$this->
progress
( 0 );
59
60
return
;
61
}
62
63
if
( $row->page_namespace ==
NS_FILE
&& $this->fileExists( $row->page_title ) ) {
64
$this->
output
(
"file $row->page_title needs cleanup, please run cleanupImages.php.\n"
);
65
$this->
progress
( 0 );
66
} elseif ( $title ===
null
) {
67
$this->
output
(
"page $row->page_id ($display) is illegal.\n"
);
68
$this->
moveIllegalPage
( $row );
69
$this->
progress
( 1 );
70
}
else
{
71
$this->
output
(
"page $row->page_id ($display) doesn't match self.\n"
);
72
$this->
moveInconsistentPage
( $row, $title );
73
$this->
progress
( 1 );
74
}
75
}
76
81
protected
function
fileExists
( $name ) {
82
// XXX: Doesn't actually check for file existence, just presence of image record.
83
// This is reasonable, since cleanupImages.php only iterates over the image table.
84
$dbr = $this->
getReplicaDB
();
85
$row = $dbr->newSelectQueryBuilder()
86
->select(
'*'
)
87
->from(
'image'
)
88
->where( [
'img_name'
=> $name ] )
89
->caller( __METHOD__ )
90
->fetchRow();
91
92
return
$row !==
false
;
93
}
94
98
protected
function
moveIllegalPage
( $row ) {
99
$legal =
'A-Za-z0-9_/\\\\-'
;
100
$legalized = preg_replace_callback(
"!([^$legal])!"
,
101
[ $this,
'hexChar'
],
102
$row->page_title );
103
if
( $legalized ==
'.'
) {
104
$legalized =
'(dot)'
;
105
}
106
if
( $legalized ==
'_'
) {
107
$legalized =
'(space)'
;
108
}
109
$legalized =
'Broken/'
. $legalized;
110
111
$title = Title::newFromText( $legalized );
112
if
( $title ===
null
) {
113
$clean =
'Broken/id:'
. $row->page_id;
114
$this->
output
(
"Couldn't legalize; form '$legalized' still invalid; using '$clean'\n"
);
115
$title = Title::newFromText( $clean );
116
} elseif ( $title->exists() ) {
117
$clean =
'Broken/id:'
. $row->page_id;
118
$this->
output
(
"Legalized for '$legalized' exists; using '$clean'\n"
);
119
$title = Title::newFromText( $clean );
120
}
121
122
$dest = $title->getDBkey();
123
if
( $this->dryrun ) {
124
$this->
output
(
"DRY RUN: would rename $row->page_id ($row->page_namespace,"
.
125
"'$row->page_title') to ($row->page_namespace,'$dest')\n"
);
126
}
else
{
127
$this->
output
(
"renaming $row->page_id ($row->page_namespace,"
.
128
"'$row->page_title') to ($row->page_namespace,'$dest')\n"
);
129
$this->
getPrimaryDB
()
130
->newUpdateQueryBuilder()
131
->update(
'page'
)
132
->set( [
'page_title'
=> $dest ] )
133
->where( [
'page_id'
=> $row->page_id ] )
134
->caller( __METHOD__ )->execute();
135
}
136
}
137
142
protected
function
moveInconsistentPage
( $row,
Title
$title ) {
143
if
( $title->
exists
( IDBAccessObject::READ_LATEST )
144
|| $title->
getInterwiki
()
145
|| !$title->
canExist
()
146
) {
147
$titleImpossible = $title->
getInterwiki
() || !$title->
canExist
();
148
if
( $titleImpossible ) {
149
$prior = $title->
getPrefixedDBkey
();
150
}
else
{
151
$prior = $title->
getDBkey
();
152
}
153
154
# Old cleanupTitles could move articles there. See T25147.
155
$ns = $row->page_namespace;
156
if
( $ns < 0 ) {
157
$ns = 0;
158
}
159
160
# Namespace which no longer exists. Put the page in the main namespace
161
# since we don't have any idea of the old namespace name. See T70501.
162
if
( !$this->
getServiceContainer
()->getNamespaceInfo()->exists( $ns ) ) {
163
$ns = 0;
164
}
165
166
if
( !$titleImpossible && !$title->
exists
() ) {
167
// Looks like the current title, after cleaning it up, is valid and available
168
$clean = $prior;
169
}
else
{
170
$clean =
'Broken/'
. $prior;
171
}
172
$verified = Title::makeTitleSafe( $ns, $clean );
173
if
( !$verified || $verified->exists() ) {
174
$blah =
"Broken/id:"
. $row->page_id;
175
$this->
output
(
"Couldn't legalize; form '$clean' exists; using '$blah'\n"
);
176
$verified = Title::makeTitleSafe( $ns, $blah );
177
}
178
$title = $verified;
179
}
180
if
( $title ===
null
) {
181
$this->
fatalError
(
"Something awry; empty title."
);
182
}
183
$ns = $title->
getNamespace
();
184
$dest = $title->
getDBkey
();
185
186
if
( $this->dryrun ) {
187
$this->
output
(
"DRY RUN: would rename $row->page_id ($row->page_namespace,"
.
188
"'$row->page_title') to ($ns,'$dest')\n"
);
189
}
else
{
190
$this->
output
(
"renaming $row->page_id ($row->page_namespace,"
.
191
"'$row->page_title') to ($ns,'$dest')\n"
);
192
$this->
getPrimaryDB
()
193
->newUpdateQueryBuilder()
194
->update(
'page'
)
195
->set( [
196
'page_namespace'
=> $ns,
197
'page_title'
=> $dest
198
] )
199
->where( [
'page_id'
=> $row->page_id ] )
200
->caller( __METHOD__ )->execute();
201
$this->
getServiceContainer
()->getLinkCache()->clear();
202
}
203
}
204
}
205
206
$maintClass
= TitleCleanup::class;
207
require_once RUN_MAINTENANCE_IF_MAIN;
NS_FILE
const NS_FILE
Definition
Defines.php:70
Maintenance\getReplicaDB
getReplicaDB()
Definition
Maintenance.php:1082
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition
Maintenance.php:467
Maintenance\getServiceContainer
getServiceContainer()
Returns the main service container.
Definition
Maintenance.php:641
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition
Maintenance.php:329
Maintenance\getPrimaryDB
getPrimaryDB()
Definition
Maintenance.php:1093
Maintenance\setBatchSize
setBatchSize( $s=0)
Definition
Maintenance.php:407
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition
Maintenance.php:521
MediaWiki\Title\Title
Represents a title within MediaWiki.
Definition
Title.php:78
MediaWiki\Title\Title\canExist
canExist()
Can this title represent a page in the wiki's database?
Definition
Title.php:1212
MediaWiki\Title\Title\exists
exists( $flags=0)
Check if page exists.
Definition
Title.php:3201
MediaWiki\Title\Title\getNamespace
getNamespace()
Get the namespace index, i.e.
Definition
Title.php:1044
MediaWiki\Title\Title\getInterwiki
getInterwiki()
Get the interwiki prefix.
Definition
Title.php:945
MediaWiki\Title\Title\getDBkey
getDBkey()
Get the main part with underscores.
Definition
Title.php:1035
MediaWiki\Title\Title\getPrefixedDBkey
getPrefixedDBkey()
Get the prefixed database key form.
Definition
Title.php:1849
TableCleanup
Generic class to cleanup a database table.
Definition
TableCleanup.php:33
TableCleanup\progress
progress( $updated)
Definition
TableCleanup.php:82
TitleCleanup
Maintenance script to clean up broken, unparseable titles.
Definition
cleanupTitles.php:37
TitleCleanup\moveIllegalPage
moveIllegalPage( $row)
Definition
cleanupTitles.php:98
TitleCleanup\processRow
processRow( $row)
Definition
cleanupTitles.php:47
TitleCleanup\moveInconsistentPage
moveInconsistentPage( $row, Title $title)
Definition
cleanupTitles.php:142
TitleCleanup\__construct
__construct()
Default constructor.
Definition
cleanupTitles.php:38
TitleCleanup\fileExists
fileExists( $name)
Definition
cleanupTitles.php:81
$maintClass
$maintClass
Definition
cleanupTitles.php:206
maintenance
cleanupTitles.php
Generated on Tue Apr 23 2024 12:25:54 for MediaWiki by
1.10.0