MediaWiki REL1_32
orphans.php
Go to the documentation of this file.
1<?php
31require_once __DIR__ . '/Maintenance.php';
32
35
42class Orphans extends Maintenance {
43 public function __construct() {
44 parent::__construct();
45 $this->addDescription( "Look for 'orphan' revisions hooked to pages which don't exist\n" .
46 "and 'childless' pages with no revisions\n" .
47 "Then, kill the poor widows and orphans\n" .
48 "Man this is depressing"
49 );
50 $this->addOption( 'fix', 'Actually fix broken entries' );
51 }
52
53 public function execute() {
54 $this->checkOrphans( $this->hasOption( 'fix' ) );
55 $this->checkSeparation( $this->hasOption( 'fix' ) );
56 # Does not work yet, do not use
57 # $this->checkWidows( $this->hasOption( 'fix' ) );
58 }
59
65 private function lockTables( $db, $extraTable = [] ) {
66 $tbls = [ 'page', 'revision', 'redirect' ];
67 if ( $extraTable ) {
68 $tbls = array_merge( $tbls, $extraTable );
69 }
70 $db->lockTables( [], $tbls, __METHOD__, false );
71 }
72
77 private function checkOrphans( $fix ) {
78 $dbw = $this->getDB( DB_MASTER );
79 $commentStore = CommentStore::getStore();
80
81 if ( $fix ) {
82 $this->lockTables( $dbw );
83 }
84
85 $commentQuery = $commentStore->getJoin( 'rev_comment' );
86 $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
87
88 $this->output( "Checking for orphan revision table entries... "
89 . "(this may take a while on a large wiki)\n" );
90 $result = $dbw->select(
91 [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
92 [ 'rev_id', 'rev_page', 'rev_timestamp' ] + $commentQuery['fields'] + $actorQuery['fields'],
93 [ 'page_id' => null ],
94 __METHOD__,
95 [],
96 [ 'page' => [ 'LEFT JOIN', [ 'rev_page=page_id' ] ] ] + $commentQuery['joins']
97 + $actorQuery['joins']
98 );
99 $orphans = $result->numRows();
100 if ( $orphans > 0 ) {
101 $this->output( "$orphans orphan revisions...\n" );
102 $this->output( sprintf(
103 "%10s %10s %14s %20s %s\n",
104 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment'
105 ) );
106
107 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
108 foreach ( $result as $row ) {
109 $comment = $commentStore->getComment( 'rev_comment', $row )->text;
110 if ( $comment !== '' ) {
111 $comment = '(' . $contLang->truncateForVisual( $comment, 40 ) . ')';
112 }
113 $rev_user_text = $contLang->truncateForVisual( $row->rev_user_text, 20 );
114 # pad $rev_user_text to 20 characters. Note that this may
115 # yield poor results if $rev_user_text contains combining
116 # or half-width characters. Alas.
117 if ( mb_strlen( $rev_user_text ) < 20 ) {
118 $rev_user_text = str_repeat( ' ', 20 - mb_strlen( $rev_user_text ) );
119 }
120 $this->output( sprintf( "%10d %10d %14s %s %s\n",
121 $row->rev_id,
122 $row->rev_page,
123 $row->rev_timestamp,
124 $rev_user_text,
125 $comment ) );
126 if ( $fix ) {
127 $dbw->delete( 'revision', [ 'rev_id' => $row->rev_id ] );
128 }
129 }
130 if ( !$fix ) {
131 $this->output( "Run again with --fix to remove these entries automatically.\n" );
132 }
133 } else {
134 $this->output( "No orphans! Yay!\n" );
135 }
136
137 if ( $fix ) {
138 $dbw->unlockTables( __METHOD__ );
139 }
140 }
141
148 private function checkWidows( $fix ) {
149 $dbw = $this->getDB( DB_MASTER );
150 $page = $dbw->tableName( 'page' );
151 $revision = $dbw->tableName( 'revision' );
152
153 if ( $fix ) {
154 $this->lockTables( $dbw );
155 }
156
157 $this->output( "\nChecking for childless page table entries... "
158 . "(this may take a while on a large wiki)\n" );
159 $result = $dbw->query( "
160 SELECT *
161 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
162 WHERE rev_id IS NULL
163 " );
164 $widows = $result->numRows();
165 if ( $widows > 0 ) {
166 $this->output( "$widows childless pages...\n" );
167 $this->output( sprintf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' ) );
168 foreach ( $result as $row ) {
169 printf( "%10d %11d %2d %s\n",
170 $row->page_id,
171 $row->page_latest,
172 $row->page_namespace,
173 $row->page_title );
174 if ( $fix ) {
175 $dbw->delete( 'page', [ 'page_id' => $row->page_id ] );
176 }
177 }
178 if ( !$fix ) {
179 $this->output( "Run again with --fix to remove these entries automatically.\n" );
180 }
181 } else {
182 $this->output( "No childless pages! Yay!\n" );
183 }
184
185 if ( $fix ) {
186 $dbw->unlockTables( __METHOD__ );
187 }
188 }
189
194 private function checkSeparation( $fix ) {
195 $dbw = $this->getDB( DB_MASTER );
196 $page = $dbw->tableName( 'page' );
197 $revision = $dbw->tableName( 'revision' );
198
199 if ( $fix ) {
200 $this->lockTables( $dbw, [ 'user', 'text' ] );
201 }
202
203 $this->output( "\nChecking for pages whose page_latest links are incorrect... "
204 . "(this may take a while on a large wiki)\n" );
205 $result = $dbw->query( "
206 SELECT *
207 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
208 " );
209 $found = 0;
210 foreach ( $result as $row ) {
211 $result2 = $dbw->query( "
212 SELECT MAX(rev_timestamp) as max_timestamp
213 FROM $revision
214 WHERE rev_page=" . (int)( $row->page_id )
215 );
216 $row2 = $dbw->fetchObject( $result2 );
217 if ( $row2 ) {
218 if ( $row->rev_timestamp != $row2->max_timestamp ) {
219 if ( $found == 0 ) {
220 $this->output( sprintf( "%10s %10s %14s %14s\n",
221 'page_id', 'rev_id', 'timestamp', 'max timestamp' ) );
222 }
223 ++$found;
224 $this->output( sprintf( "%10d %10d %14s %14s\n",
225 $row->page_id,
226 $row->page_latest,
227 $row->rev_timestamp,
228 $row2->max_timestamp ) );
229 if ( $fix ) {
230 # ...
231 $maxId = $dbw->selectField(
232 'revision',
233 'rev_id',
234 [
235 'rev_page' => $row->page_id,
236 'rev_timestamp' => $row2->max_timestamp ] );
237 $this->output( "... updating to revision $maxId\n" );
238 $maxRev = Revision::newFromId( $maxId );
239 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
240 $article = WikiPage::factory( $title );
241 $article->updateRevisionOn( $dbw, $maxRev );
242 }
243 }
244 } else {
245 $this->output( "wtf\n" );
246 }
247 }
248
249 if ( $found ) {
250 $this->output( "Found $found pages with incorrect latest revision.\n" );
251 } else {
252 $this->output( "No pages with incorrect latest revision. Yay!\n" );
253 }
254 if ( !$fix && $found > 0 ) {
255 $this->output( "Run again with --fix to remove these entries automatically.\n" );
256 }
257
258 if ( $fix ) {
259 $dbw->unlockTables( __METHOD__ );
260 }
261 }
262}
263
264$maintClass = Orphans::class;
265require_once RUN_MAINTENANCE_IF_MAIN;
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option exists.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that looks for 'orphan' revisions hooked to pages which don't exist and 'childless...
Definition orphans.php:42
lockTables( $db, $extraTable=[])
Lock the appropriate tables for the script.
Definition orphans.php:65
checkSeparation( $fix)
Check for pages where page_latest is wrong.
Definition orphans.php:194
checkWidows( $fix)
Definition orphans.php:148
execute()
Do the actual work.
Definition orphans.php:53
__construct()
Default constructor.
Definition orphans.php:43
checkOrphans( $fix)
Check for orphan revisions.
Definition orphans.php:77
static newFromId( $id, $flags=0)
Load a page revision from a given revision ID number.
Definition Revision.php:114
Advanced database interface for IDatabase handles that include maintenance methods.
require_once RUN_MAINTENANCE_IF_MAIN
$maintClass
Definition orphans.php:264
const DB_MASTER
Definition defines.php:26