39 protected $namespace =
false;
42 parent::__construct();
44 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
45 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
46 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
47 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
48 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
49 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
50 'query, default 100000',
false,
true );
51 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
52 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
53 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
54 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
63 $start = (int)$this->
getArg( 0 ) ?:
null;
64 $end = (int)$this->
getOption(
'e' ) ?:
null;
65 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
69 $this->
namespace = false;
71 $this->
namespace = (int)$ns;
75 $category = $this->
getOption(
'category' );
78 $this->
fatalError(
"'$category' is an invalid category name!\n" );
80 $this->refreshCategory(
$title );
81 } elseif ( $this->
hasOption(
'tracking-category' ) ) {
82 $this->refreshTrackingCategory( $this->
getOption(
'trackingcategory' ) );
83 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
85 $redir = $this->
hasOption(
'redirects-only' );
86 $oldRedir = $this->
hasOption(
'old-redirects-only' );
87 $this->doRefreshLinks( $start, $new, $end, $redir, $oldRedir );
88 $this->deleteLinksFromNonexistent(
null,
null, $this->
getBatchSize(), $dfnChunkSize );
90 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
94 private function namespaceCond() {
95 return $this->
namespace !== false
96 ? [ 'page_namespace' => $this->namespace ]
108 private function doRefreshLinks( $start, $newOnly =
false,
109 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
113 if ( $start ===
null ) {
118 $this->
getHookRunner()->onMaintenanceRefreshLinksInit( $this );
120 $what = $redirectsOnly ?
"redirects" :
"links";
121 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
123 if ( $oldRedirectsOnly ) {
124 # This entire code path is cut-and-pasted from below. Hurrah.
127 "page_is_redirect=1",
129 self::intervalCond(
$dbr,
'page_id', $start, $end ),
130 ] + $this->namespaceCond();
133 [
'page',
'redirect' ],
138 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
140 $num =
$res->numRows();
141 $this->
output(
"Refreshing $num old redirects from $start...\n" );
145 foreach (
$res as $row ) {
146 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
148 $lbFactory->waitForReplication();
150 $this->fixRedirect( $row->page_id );
152 } elseif ( $newOnly ) {
153 $this->
output(
"Refreshing $what from " );
158 self::intervalCond(
$dbr,
'page_id', $start, $end ),
159 ] + $this->namespaceCond(),
162 $num =
$res->numRows();
163 $this->
output(
"$num new articles...\n" );
166 foreach (
$res as $row ) {
167 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
169 $lbFactory->waitForReplication();
171 if ( $redirectsOnly ) {
172 $this->fixRedirect( $row->page_id );
174 self::fixLinksFromArticle( $row->page_id, $this->namespace );
179 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
180 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
'', __METHOD__ );
181 $end = max( $maxPage, $maxRD );
183 $this->
output(
"Refreshing redirects table.\n" );
184 $this->
output(
"Starting from page_id $start of $end.\n" );
186 for ( $id = $start; $id <= $end; $id++ ) {
187 if ( !( $id % self::REPORTING_INTERVAL ) ) {
189 $lbFactory->waitForReplication();
191 $this->fixRedirect( $id );
194 if ( !$redirectsOnly ) {
195 $this->
output(
"Refreshing links tables.\n" );
196 $this->
output(
"Starting from page_id $start of $end.\n" );
198 for ( $id = $start; $id <= $end; $id++ ) {
199 if ( !( $id % self::REPORTING_INTERVAL ) ) {
201 $lbFactory->waitForReplication();
203 self::fixLinksFromArticle( $id, $this->
namespace );
221 private function fixRedirect( $id ) {
222 $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromID( $id );
225 if ( $page ===
null ) {
228 $dbw->delete(
'redirect', [
'rd_from' => $id ],
232 } elseif ( $this->
namespace !==
false
233 && !$page->getTitle()->inNamespace( $this->namespace )
239 $content = $page->getContent( RevisionRecord::RAW );
241 $rt =
$content->getRedirectTarget();
244 if ( $rt ===
null ) {
247 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
250 $page->insertRedirectEntry( $rt );
255 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
256 [
'page_id' => $id ], __METHOD__ );
265 $services = MediaWikiServices::getInstance();
266 $page = $services->getWikiPageFactory()->newFromID( $id );
268 $services->getLinkCache()->clear();
270 if ( $page ===
null ) {
272 } elseif ( $ns !==
false
273 && !$page->getTitle()->inNamespace( $ns ) ) {
280 $page->doSecondaryDataUpdates( [
281 'defer' => DeferredUpdates::POSTSEND,
282 'recursive' =>
false,
284 DeferredUpdates::doUpdates();
298 private function deleteLinksFromNonexistent( $start =
null, $end =
null, $batchSize = 100,
301 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
302 $this->output(
"Deleting illegal entries from the links tables...\n" );
307 $nextStart =
$dbr->selectField(
310 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
311 + $this->namespaceCond(),
313 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
316 if ( $nextStart !==
false ) {
321 $chunkEnd = $nextStart - 1;
327 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
328 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
329 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
330 $this->dfnCheckInterval( $start, $chunkEnd, $batchSize );
334 }
while ( $nextStart !==
false );
343 private function dfnCheckInterval( $start =
null, $end =
null, $batchSize = 100 ) {
349 'pagelinks' =>
'pl_from',
350 'imagelinks' =>
'il_from',
351 'categorylinks' =>
'cl_from',
352 'templatelinks' =>
'tl_from',
353 'externallinks' =>
'el_from',
354 'iwlinks' =>
'iwl_from',
355 'langlinks' =>
'll_from',
356 'redirect' =>
'rd_from',
357 'page_props' =>
'pp_page',
360 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
361 foreach ( $linksTables as $table => $field ) {
362 $this->
output(
" $table: 0" );
363 $tableStart = $start;
366 $ids =
$dbr->selectFieldValues(
370 self::intervalCond(
$dbr, $field, $tableStart, $end ),
371 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id', [], __METHOD__ )})",
374 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
377 $numIds = count( $ids );
380 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
381 $this->
output(
", $counter" );
382 $tableStart = $ids[$numIds - 1] + 1;
383 $lbFactory->waitForReplication();
386 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
388 $this->
output(
" deleted.\n" );
404 private static function intervalCond(
IDatabase $db, $var, $start, $end ) {
405 if ( $start ===
null && $end ===
null ) {
406 return "$var IS NOT NULL";
407 } elseif ( $end ===
null ) {
408 return "$var >= " . $db->
addQuotes( $start );
409 } elseif ( $start ===
null ) {
410 return "$var <= " . $db->
addQuotes( $end );
421 private function refreshTrackingCategory( $category ) {
422 $cats = $this->getPossibleCategories( $category );
425 $this->
error(
"Tracking category '$category' is disabled\n" );
429 foreach ( $cats as $cat ) {
430 $this->refreshCategory( $cat );
439 private function refreshCategory(
Title $category ) {
440 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
447 if ( $this->
namespace !==
false ) {
448 $conds[
'page_namespace'] = $this->namespace;
454 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
456 $finalConds = $conds;
457 $timestamp =
$dbr->addQuotes( $timestamp );
459 "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
460 $res =
$dbr->select( [
'page',
'categorylinks' ],
461 [
'page_id',
'cl_timestamp' ],
465 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
470 foreach (
$res as $row ) {
471 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
473 $lbFactory->waitForReplication();
475 $lastId = $row->page_id;
476 $timestamp = $row->cl_timestamp;
477 self::fixLinksFromArticle( $row->page_id );
480 }
while (
$res->numRows() == $this->getBatchSize() );
489 private function getPossibleCategories( $categoryKey ) {
490 $cats = MediaWikiServices::getInstance()->getTrackingCategories()->getTrackingCategories();
491 if ( isset( $cats[$categoryKey] ) ) {
492 return $cats[$categoryKey][
'cats'];
494 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getHookRunner()
Get a HookRunner for running core hooks.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.