28require_once __DIR__ .
'/Maintenance.php';
42 parent::__construct();
44 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
45 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
46 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
47 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
48 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
49 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
50 'query, default 100000',
false,
true );
51 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
52 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
53 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
54 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
63 $start = (int)$this->
getArg( 0 ) ?:
null;
64 $end = (int)$this->
getOption(
'e' ) ?:
null;
65 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
68 $this->
namespace = false;
70 $this->
namespace = (int)$ns;
72 if ( ( $category = $this->
getOption(
'category',
false ) ) !==
false ) {
75 $this->
fatalError(
"'$category' is an invalid category name!\n" );
78 } elseif ( ( $category = $this->
getOption(
'tracking-category',
false ) ) !==
false ) {
80 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
82 $redir = $this->
hasOption(
'redirects-only' );
83 $oldRedir = $this->
hasOption(
'old-redirects-only' );
92 return $this->
namespace !== false
93 ? [ 'page_namespace' => $this->namespace ]
106 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
110 if ( $start ===
null ) {
115 $this->
getHookRunner()->onMaintenanceRefreshLinksInit( $this );
117 $what = $redirectsOnly ?
"redirects" :
"links";
118 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
120 if ( $oldRedirectsOnly ) {
121 # This entire code path is cut-and-pasted from below. Hurrah.
124 "page_is_redirect=1",
126 self::intervalCond(
$dbr,
'page_id', $start, $end ),
130 [
'page',
'redirect' ],
135 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
137 $num =
$res->numRows();
138 $this->
output(
"Refreshing $num old redirects from $start...\n" );
142 foreach (
$res as $row ) {
143 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
145 $lbFactory->waitForReplication();
149 } elseif ( $newOnly ) {
150 $this->
output(
"Refreshing $what from " );
155 self::intervalCond(
$dbr,
'page_id', $start, $end ),
159 $num =
$res->numRows();
160 $this->
output(
"$num new articles...\n" );
163 foreach (
$res as $row ) {
164 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
166 $lbFactory->waitForReplication();
168 if ( $redirectsOnly ) {
171 self::fixLinksFromArticle( $row->page_id, $this->namespace );
176 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
177 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
'', __METHOD__ );
178 $end = max( $maxPage, $maxRD );
180 $this->
output(
"Refreshing redirects table.\n" );
181 $this->
output(
"Starting from page_id $start of $end.\n" );
183 for ( $id = $start; $id <= $end; $id++ ) {
184 if ( !( $id % self::REPORTING_INTERVAL ) ) {
186 $lbFactory->waitForReplication();
191 if ( !$redirectsOnly ) {
192 $this->
output(
"Refreshing links tables.\n" );
193 $this->
output(
"Starting from page_id $start of $end.\n" );
195 for ( $id = $start; $id <= $end; $id++ ) {
196 if ( !( $id % self::REPORTING_INTERVAL ) ) {
198 $lbFactory->waitForReplication();
200 self::fixLinksFromArticle( $id, $this->
namespace );
219 $page = WikiPage::newFromID( $id );
222 if ( $page ===
null ) {
225 $dbw->delete(
'redirect', [
'rd_from' => $id ],
229 } elseif ( $this->
namespace !==
false
230 && !$page->getTitle()->inNamespace( $this->namespace )
236 $content = $page->getContent( RevisionRecord::RAW );
238 $rt =
$content->getUltimateRedirectTarget();
241 if ( $rt ===
null ) {
244 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
247 $page->insertRedirectEntry( $rt );
252 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
253 [
'page_id' => $id ], __METHOD__ );
262 $page = WikiPage::newFromID( $id );
264 MediaWikiServices::getInstance()->getLinkCache()->clear();
266 if ( $page ===
null ) {
268 } elseif ( $ns !==
false
269 && !$page->getTitle()->inNamespace( $ns ) ) {
276 $page->doSecondaryDataUpdates( [
277 'defer' => DeferredUpdates::POSTSEND,
278 'recursive' =>
false,
280 DeferredUpdates::doUpdates();
297 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
298 $this->
output(
"Deleting illegal entries from the links tables...\n" );
303 $nextStart =
$dbr->selectField(
306 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
309 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
312 if ( $nextStart !==
false ) {
317 $chunkEnd = $nextStart - 1;
323 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
324 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
325 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
330 }
while ( $nextStart !==
false );
345 'pagelinks' =>
'pl_from',
346 'imagelinks' =>
'il_from',
347 'categorylinks' =>
'cl_from',
348 'templatelinks' =>
'tl_from',
349 'externallinks' =>
'el_from',
350 'iwlinks' =>
'iwl_from',
351 'langlinks' =>
'll_from',
352 'redirect' =>
'rd_from',
353 'page_props' =>
'pp_page',
356 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
357 foreach ( $linksTables as $table => $field ) {
358 $this->
output(
" $table: 0" );
359 $tableStart = $start;
362 $ids =
$dbr->selectFieldValues(
366 self::intervalCond(
$dbr, $field, $tableStart, $end ),
367 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id', [], __METHOD__ )})",
370 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
373 $numIds = count( $ids );
376 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
377 $this->
output(
", $counter" );
378 $tableStart = $ids[$numIds - 1] + 1;
379 $lbFactory->waitForReplication();
382 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
384 $this->
output(
" deleted.\n" );
401 if ( $start ===
null && $end ===
null ) {
402 return "$var IS NOT NULL";
403 } elseif ( $end ===
null ) {
404 return "$var >= {$db->addQuotes( $start )}";
405 } elseif ( $start ===
null ) {
406 return "$var <= {$db->addQuotes( $end )}";
408 return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
421 $this->
error(
"Tracking category '$category' is disabled\n" );
425 foreach ( $cats as $cat ) {
436 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
443 if ( $this->
namespace !==
false ) {
444 $conds[
'page_namespace'] = $this->namespace;
450 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
452 $finalConds = $conds;
453 $timestamp =
$dbr->addQuotes( $timestamp );
455 "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
456 $res =
$dbr->select( [
'page',
'categorylinks' ],
457 [
'page_id',
'cl_timestamp' ],
461 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
466 foreach (
$res as $row ) {
467 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
469 $lbFactory->waitForReplication();
471 $lastId = $row->page_id;
472 $timestamp = $row->cl_timestamp;
473 self::fixLinksFromArticle( $row->page_id );
476 }
while (
$res->numRows() == $this->getBatchSize() );
487 $cats = $trackingCategories->getTrackingCategories();
488 if ( isset( $cats[$categoryKey] ) ) {
489 return $cats[$categoryKey][
'cats'];
491 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
const RUN_MAINTENANCE_IF_MAIN
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getHookRunner()
Get a HookRunner for running core hooks.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Maintenance script to refresh link tables.
dfnCheckInterval( $start=null, $end=null, $batchSize=100)
doRefreshLinks( $start, $newOnly=false, $end=null, $redirectsOnly=false, $oldRedirectsOnly=false)
Do the actual link refreshing.
static intervalCond(IDatabase $db, $var, $start, $end)
Build a SQL expression for a closed interval (i.e.
getPossibleCategories( $categoryKey)
Returns a list of possible categories for a given tracking category key.
__construct()
Default constructor.
refreshTrackingCategory( $category)
Refershes links for pages in a tracking category.
static fixLinksFromArticle( $id, $ns=false)
Run LinksUpdate for all links on a given page_id.
execute()
Do the actual work.
refreshCategory(Title $category)
Refreshes links to a category.
fixRedirect( $id)
Update the redirect entry for a given page.
deleteLinksFromNonexistent( $start=null, $end=null, $batchSize=100, $chunkSize=100000)
Removes non-existing links from pages from pagelinks, imagelinks, categorylinks, templatelinks,...
Represents a title within MediaWiki.
getDBkey()
Get the main part with underscores.
This class performs some operations related to tracking categories, such as creating a list of all su...