35 private const REPORTING_INTERVAL = 100;
44 parent::__construct();
46 $this->
addOption(
'verbose',
'Output information about link refresh progress',
false,
false,
'v' );
47 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
48 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
49 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
50 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
51 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
52 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
53 'query, default 100000',
false,
true );
54 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
55 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
56 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
57 $this->
addOption(
'before-timestamp',
'Only fix pages that were last updated before this timestamp',
59 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
68 $start = (int)$this->
getArg( 0 ) ?:
null;
69 $end = (int)$this->
getOption(
'e' ) ?:
null;
70 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
74 $this->
namespace = false;
76 $this->
namespace = (int)$ns;
78 $this->beforeTimestamp = $this->
getOption(
'before-timestamp',
false );
81 $category = $this->
getOption(
'category' );
84 $this->
fatalError(
"'$category' is an invalid category name!\n" );
86 $this->refreshCategory(
$title );
87 } elseif ( $this->
hasOption(
'tracking-category' ) ) {
88 $category = $this->
getOption(
'tracking-category' );
91 $this->
fatalError(
"'$category' is an invalid category name!\n" );
93 $this->refreshTrackingCategory( $this->
getOption(
'tracking-category' ) );
94 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
96 $redir = $this->
hasOption(
'redirects-only' );
97 $oldRedir = $this->
hasOption(
'old-redirects-only' );
98 $this->doRefreshLinks( $start, $new, $end, $redir, $oldRedir );
99 $this->deleteLinksFromNonexistent(
null,
null, $this->
getBatchSize(), $dfnChunkSize );
101 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
105 private function namespaceCond() {
106 return $this->
namespace !== false
107 ? [ 'page_namespace' => $this->namespace ]
119 private function doRefreshLinks( $start, $newOnly =
false,
120 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
124 if ( $start ===
null ) {
129 $this->
getHookRunner()->onMaintenanceRefreshLinksInit( $this );
131 $what = $redirectsOnly ?
"redirects" :
"links";
133 if ( $oldRedirectsOnly ) {
134 # This entire code path is cut-and-pasted from below. Hurrah.
137 "page_is_redirect=1",
139 self::intervalCond(
$dbr,
'page_id', $start, $end ),
140 ] + $this->namespaceCond();
143 [
'page',
'redirect' ],
148 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
150 $num =
$res->numRows();
151 $this->
output(
"Refreshing $num old redirects from $start...\n" );
155 foreach (
$res as $row ) {
156 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
160 $this->fixRedirect( $row->page_id );
162 } elseif ( $newOnly ) {
163 $this->
output(
"Refreshing $what from " );
168 self::intervalCond(
$dbr,
'page_id', $start, $end ),
169 ] + $this->namespaceCond(),
172 $num =
$res->numRows();
173 $this->
output(
"$num new articles...\n" );
176 foreach (
$res as $row ) {
177 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
181 if ( $redirectsOnly ) {
182 $this->fixRedirect( $row->page_id );
184 self::fixLinksFromArticle( $row->page_id, $this->namespace, $this->beforeTimestamp );
189 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
190 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
'', __METHOD__ );
191 $end = max( $maxPage, $maxRD );
193 $this->
output(
"Refreshing redirects table.\n" );
194 $this->
output(
"Starting from page_id $start of $end.\n" );
196 for ( $id = $start; $id <= $end; $id++ ) {
197 if ( !( $id % self::REPORTING_INTERVAL ) ) {
201 $this->fixRedirect( $id );
204 if ( !$redirectsOnly ) {
205 $this->
output(
"Refreshing links tables.\n" );
206 $this->
output(
"Starting from page_id $start of $end.\n" );
208 for ( $id = $start; $id <= $end; $id++ ) {
209 if ( !( $id % self::REPORTING_INTERVAL ) ) {
213 self::fixLinksFromArticle( $id, $this->
namespace, $this->beforeTimestamp );
231 private function fixRedirect( $id ) {
232 $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromID( $id );
235 if ( $page ===
null ) {
238 $dbw->delete(
'redirect', [
'rd_from' => $id ],
242 } elseif ( $this->
namespace !==
false
243 && !$page->getTitle()->inNamespace( $this->namespace )
246 } elseif ( $this->beforeTimestamp !==
false
247 && $page->getLinksTimestamp() >= $this->beforeTimestamp
253 $content = $page->getContent( RevisionRecord::RAW );
255 $rt =
$content->getRedirectTarget();
258 if ( $rt ===
null ) {
261 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
264 $page->insertRedirectEntry( $rt );
269 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
270 [
'page_id' => $id ], __METHOD__ );
280 $services = MediaWikiServices::getInstance();
281 $page = $services->getWikiPageFactory()->newFromID( $id );
283 $services->getLinkCache()->clear();
285 if ( $page ===
null ) {
287 } elseif ( $ns !==
false
288 && !$page->getTitle()->inNamespace( $ns )
300 $page->doSecondaryDataUpdates( [
301 'defer' => DeferredUpdates::POSTSEND,
302 'recursive' =>
false,
304 DeferredUpdates::doUpdates();
318 private function deleteLinksFromNonexistent( $start =
null, $end =
null, $batchSize = 100,
321 $this->waitForReplication();
322 $this->output(
"Deleting illegal entries from the links tables...\n" );
327 $nextStart =
$dbr->selectField(
330 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
331 + $this->namespaceCond(),
333 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
336 if ( $nextStart !==
false ) {
341 $chunkEnd = $nextStart - 1;
347 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
348 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
349 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
350 $this->dfnCheckInterval( $start, $chunkEnd, $batchSize );
354 }
while ( $nextStart !==
false );
363 private function dfnCheckInterval( $start =
null, $end =
null, $batchSize = 100 ) {
369 'pagelinks' =>
'pl_from',
370 'imagelinks' =>
'il_from',
371 'categorylinks' =>
'cl_from',
372 'templatelinks' =>
'tl_from',
373 'externallinks' =>
'el_from',
374 'iwlinks' =>
'iwl_from',
375 'langlinks' =>
'll_from',
376 'redirect' =>
'rd_from',
377 'page_props' =>
'pp_page',
380 foreach ( $linksTables as $table => $field ) {
381 $this->
output(
" $table: 0" );
382 $tableStart = $start;
385 $ids =
$dbr->selectFieldValues(
389 self::intervalCond(
$dbr, $field, $tableStart, $end ),
390 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id', [], __METHOD__ )})",
393 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
396 $numIds = count( $ids );
399 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
400 $this->
output(
", $counter" );
401 $tableStart = $ids[$numIds - 1] + 1;
405 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
407 $this->
output(
" deleted.\n" );
423 private static function intervalCond(
IReadableDatabase $db, $var, $start, $end ) {
424 if ( $start ===
null && $end ===
null ) {
425 return "$var IS NOT NULL";
426 } elseif ( $end ===
null ) {
427 return "$var >= " . $db->
addQuotes( $start );
428 } elseif ( $start ===
null ) {
429 return "$var <= " . $db->
addQuotes( $end );
440 private function refreshTrackingCategory( $category ) {
441 $cats = $this->getPossibleCategories( $category );
444 $this->
error(
"Tracking category '$category' is disabled\n" );
448 foreach ( $cats as $cat ) {
449 $this->refreshCategory( Title::newFromLinkTarget( $cat ) );
458 private function refreshCategory(
Title $category ) {
459 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
465 ] + $this->namespaceCond();
471 $finalConds = $conds;
472 $finalConds[] =
$dbr->buildComparison(
'>', [
473 'cl_timestamp' => $timestamp,
474 'cl_from' => $lastId,
476 $res =
$dbr->select( [
'page',
'categorylinks' ],
477 [
'page_id',
'cl_timestamp' ],
481 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
487 $this->
output(
"Refreshing links for {$res->numRows()} pages\n" );
490 foreach (
$res as $row ) {
491 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
495 $lastId = $row->page_id;
496 $timestamp = $row->cl_timestamp;
498 $this->
output(
"Refreshing links for page ID {$row->page_id}\n" );
500 self::fixLinksFromArticle( $row->page_id,
false, $this->beforeTimestamp );
503 }
while (
$res->numRows() == $this->getBatchSize() );
512 private function getPossibleCategories( $categoryKey ) {
513 $cats = MediaWikiServices::getInstance()->getTrackingCategories()->getTrackingCategories();
514 if ( isset( $cats[$categoryKey] ) ) {
515 return $cats[$categoryKey][
'cats'];
517 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getHookRunner()
Get a HookRunner for running core hooks.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.