28require_once __DIR__ .
'/Maintenance.php';
42 parent::__construct();
44 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
45 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
46 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
47 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
48 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
49 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
50 'query, default 100000',
false,
true );
51 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
52 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
53 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
54 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
63 $start = (int)$this->
getArg( 0 ) ?:
null;
64 $end = (int)$this->
getOption(
'e' ) ?:
null;
65 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
68 $this->
namespace = false;
70 $this->
namespace = (int)$ns;
72 if ( ( $category = $this->
getOption(
'category',
false ) ) !==
false ) {
75 $this->
fatalError(
"'$category' is an invalid category name!\n" );
78 } elseif ( ( $category = $this->
getOption(
'tracking-category',
false ) ) !==
false ) {
80 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
82 $redir = $this->
hasOption(
'redirects-only' );
83 $oldRedir = $this->
hasOption(
'old-redirects-only' );
92 return $this->
namespace !== false
93 ? [ 'page_namespace' => $this->namespace ]
106 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
110 if ( $start ===
null ) {
115 Hooks::run(
'MaintenanceRefreshLinksInit', [ $this ] );
117 $what = $redirectsOnly ?
"redirects" :
"links";
119 if ( $oldRedirectsOnly ) {
120 # This entire code path is cut-and-pasted from below. Hurrah.
123 "page_is_redirect=1",
125 self::intervalCond(
$dbr,
'page_id', $start, $end ),
129 [
'page',
'redirect' ],
134 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
136 $num =
$res->numRows();
137 $this->
output(
"Refreshing $num old redirects from $start...\n" );
141 foreach (
$res as $row ) {
142 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
148 } elseif ( $newOnly ) {
149 $this->
output(
"Refreshing $what from " );
154 self::intervalCond(
$dbr,
'page_id', $start, $end ),
158 $num =
$res->numRows();
159 $this->
output(
"$num new articles...\n" );
162 foreach (
$res as $row ) {
163 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
167 if ( $redirectsOnly ) {
170 self::fixLinksFromArticle( $row->page_id, $this->namespace );
175 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
176 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
'', __METHOD__ );
177 $end = max( $maxPage, $maxRD );
179 $this->
output(
"Refreshing redirects table.\n" );
180 $this->
output(
"Starting from page_id $start of $end.\n" );
182 for ( $id = $start; $id <= $end; $id++ ) {
183 if ( !( $id % self::REPORTING_INTERVAL ) ) {
190 if ( !$redirectsOnly ) {
191 $this->
output(
"Refreshing links tables.\n" );
192 $this->
output(
"Starting from page_id $start of $end.\n" );
194 for ( $id = $start; $id <= $end; $id++ ) {
195 if ( !( $id % self::REPORTING_INTERVAL ) ) {
199 self::fixLinksFromArticle( $id, $this->
namespace );
218 $page = WikiPage::newFromID( $id );
221 if ( $page ===
null ) {
224 $dbw->delete(
'redirect', [
'rd_from' => $id ],
228 } elseif ( $this->
namespace !==
false
229 && !$page->getTitle()->inNamespace( $this->namespace )
235 $content = $page->getContent( RevisionRecord::RAW );
237 $rt =
$content->getUltimateRedirectTarget();
240 if ( $rt ===
null ) {
243 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
246 $page->insertRedirectEntry( $rt );
251 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
252 [
'page_id' => $id ], __METHOD__ );
261 $page = WikiPage::newFromID( $id );
263 MediaWikiServices::getInstance()->getLinkCache()->clear();
265 if ( $page ===
null ) {
267 } elseif ( $ns !==
false
268 && !$page->getTitle()->inNamespace( $ns ) ) {
275 $page->doSecondaryDataUpdates( [
276 'defer' => DeferredUpdates::POSTSEND,
277 'recursive' =>
false,
279 DeferredUpdates::doUpdates();
297 $this->
output(
"Deleting illegal entries from the links tables...\n" );
302 $nextStart =
$dbr->selectField(
305 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
308 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
311 if ( $nextStart !==
false ) {
316 $chunkEnd = $nextStart - 1;
322 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
323 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
324 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
329 }
while ( $nextStart !==
false );
343 'pagelinks' =>
'pl_from',
344 'imagelinks' =>
'il_from',
345 'categorylinks' =>
'cl_from',
346 'templatelinks' =>
'tl_from',
347 'externallinks' =>
'el_from',
348 'iwlinks' =>
'iwl_from',
349 'langlinks' =>
'll_from',
350 'redirect' =>
'rd_from',
351 'page_props' =>
'pp_page',
354 foreach ( $linksTables as $table => $field ) {
355 $this->
output(
" $table: 0" );
356 $tableStart = $start;
359 $ids =
$dbr->selectFieldValues(
363 self::intervalCond(
$dbr, $field, $tableStart, $end ),
364 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id' )})",
367 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
370 $numIds = count( $ids );
373 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
374 $this->
output(
", $counter" );
375 $tableStart = $ids[$numIds - 1] + 1;
379 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
381 $this->
output(
" deleted.\n" );
398 if ( $start ===
null && $end ===
null ) {
399 return "$var IS NOT NULL";
400 } elseif ( $end ===
null ) {
401 return "$var >= {$db->addQuotes( $start )}";
402 } elseif ( $start ===
null ) {
403 return "$var <= {$db->addQuotes( $end )}";
405 return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
418 $this->
error(
"Tracking category '$category' is disabled\n" );
422 foreach ( $cats as $cat ) {
433 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
440 if ( $this->
namespace !==
false ) {
441 $conds[
'page_namespace'] = $this->namespace;
448 $finalConds = $conds;
449 $timestamp =
$dbr->addQuotes( $timestamp );
451 "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
452 $res =
$dbr->select( [
'page',
'categorylinks' ],
453 [
'page_id',
'cl_timestamp' ],
457 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
462 foreach (
$res as $row ) {
463 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
467 $lastId = $row->page_id;
468 $timestamp = $row->cl_timestamp;
469 self::fixLinksFromArticle( $row->page_id );
472 }
while (
$res->numRows() == $this->getBatchSize() );
483 $cats = $trackingCategories->getTrackingCategories();
484 if ( isset( $cats[$categoryKey] ) ) {
485 return $cats[$categoryKey][
'cats'];
487 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
const RUN_MAINTENANCE_IF_MAIN
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
getBatchSize()
Returns batch size.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Maintenance script to refresh link tables.
dfnCheckInterval( $start=null, $end=null, $batchSize=100)
doRefreshLinks( $start, $newOnly=false, $end=null, $redirectsOnly=false, $oldRedirectsOnly=false)
Do the actual link refreshing.
static intervalCond(IDatabase $db, $var, $start, $end)
Build a SQL expression for a closed interval (i.e.
getPossibleCategories( $categoryKey)
Returns a list of possible categories for a given tracking category key.
__construct()
Default constructor.
refreshTrackingCategory( $category)
Refershes links for pages in a tracking category.
static fixLinksFromArticle( $id, $ns=false)
Run LinksUpdate for all links on a given page_id.
execute()
Do the actual work.
refreshCategory(Title $category)
Refreshes links to a category.
fixRedirect( $id)
Update the redirect entry for a given page.
deleteLinksFromNonexistent( $start=null, $end=null, $batchSize=100, $chunkSize=100000)
Removes non-existing links from pages from pagelinks, imagelinks, categorylinks, templatelinks,...
Represents a title within MediaWiki.
getDBkey()
Get the main part with underscores.
This class performs some operations related to tracking categories, such as creating a list of all su...