26require_once __DIR__ .
'/Maintenance.php';
40 parent::__construct();
42 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
43 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
44 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
45 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
46 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
47 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
48 'query, default 100000',
false,
true );
49 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
50 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
51 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
52 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
61 $start = (int)$this->
getArg( 0 ) ?:
null;
62 $end = (int)$this->
getOption(
'e' ) ?:
null;
63 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
66 $this->
namespace = false;
68 $this->
namespace = (int)$ns;
70 if ( ( $category = $this->
getOption(
'category',
false ) ) !==
false ) {
71 $title = Title::makeTitleSafe(
NS_CATEGORY, $category );
73 $this->
error(
"'$category' is an invalid category name!\n",
true );
76 } elseif ( ( $category = $this->
getOption(
'tracking-category',
false ) ) !==
false ) {
78 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
80 $redir = $this->
hasOption(
'redirects-only' );
81 $oldRedir = $this->
hasOption(
'old-redirects-only' );
90 return $this->
namespace !== false
104 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
108 if ( $start ===
null ) {
113 Hooks::run(
'MaintenanceRefreshLinksInit', [ $this ] );
115 $what = $redirectsOnly ?
"redirects" :
"links";
117 if ( $oldRedirectsOnly ) {
118 # This entire code path is cut-and-pasted from below. Hurrah.
121 "page_is_redirect=1",
127 [
'page',
'redirect' ],
132 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
134 $num =
$res->numRows();
135 $this->
output(
"Refreshing $num old redirects from $start...\n" );
139 foreach (
$res as $row ) {
140 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
146 } elseif ( $newOnly ) {
147 $this->
output(
"Refreshing $what from " );
152 self::intervalCond(
$dbr,
'page_id', $start, $end ),
156 $num =
$res->numRows();
157 $this->
output(
"$num new articles...\n" );
160 foreach (
$res as $row ) {
161 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
165 if ( $redirectsOnly ) {
173 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
false );
174 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
false );
175 $end = max( $maxPage, $maxRD );
177 $this->
output(
"Refreshing redirects table.\n" );
178 $this->
output(
"Starting from page_id $start of $end.\n" );
180 for ( $id = $start; $id <= $end; $id++ ) {
181 if ( !( $id % self::REPORTING_INTERVAL ) ) {
188 if ( !$redirectsOnly ) {
189 $this->
output(
"Refreshing links tables.\n" );
190 $this->
output(
"Starting from page_id $start of $end.\n" );
192 for ( $id = $start; $id <= $end; $id++ ) {
193 if ( !( $id % self::REPORTING_INTERVAL ) ) {
219 if ( $page ===
null ) {
222 $dbw->delete(
'redirect', [
'rd_from' => $id ],
226 } elseif ( $this->
namespace !==
false
227 && !$page->getTitle()->inNamespace( $this->namespace )
234 if ( $content !==
null ) {
235 $rt = $content->getUltimateRedirectTarget();
238 if ( $rt ===
null ) {
241 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
244 $page->insertRedirectEntry( $rt );
249 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
250 [
'page_id' => $id ], __METHOD__ );
261 LinkCache::singleton()->clear();
263 if ( $page ===
null ) {
265 } elseif ( $ns !==
false
266 && !$page->getTitle()->inNamespace( $ns ) ) {
271 if ( $content ===
null ) {
275 $updates = $content->getSecondaryDataUpdates(
276 $page->getTitle(),
null,
false );
277 foreach ( $updates as $update ) {
278 DeferredUpdates::addUpdate( $update );
279 DeferredUpdates::doUpdates();
298 $this->
output(
"Deleting illegal entries from the links tables...\n" );
303 $nextStart =
$dbr->selectField(
306 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
309 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
312 if ( $nextStart !==
false ) {
317 $chunkEnd = $nextStart - 1;
323 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
324 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
325 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
330 }
while ( $nextStart !==
false );
344 'pagelinks' =>
'pl_from',
345 'imagelinks' =>
'il_from',
346 'categorylinks' =>
'cl_from',
347 'templatelinks' =>
'tl_from',
348 'externallinks' =>
'el_from',
349 'iwlinks' =>
'iwl_from',
350 'langlinks' =>
'll_from',
351 'redirect' =>
'rd_from',
352 'page_props' =>
'pp_page',
355 foreach ( $linksTables as $table => $field ) {
356 $this->
output(
" $table: 0" );
357 $tableStart = $start;
360 $ids =
$dbr->selectFieldValues(
364 self::intervalCond(
$dbr, $field, $tableStart, $end ),
365 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id' )})",
368 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
371 $numIds = count( $ids );
374 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
375 $this->
output(
", $counter" );
376 $tableStart = $ids[$numIds - 1] + 1;
380 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
382 $this->
output(
" deleted.\n" );
399 if ( $start ===
null && $end ===
null ) {
400 return "$var IS NOT NULL";
401 } elseif ( $end ===
null ) {
402 return "$var >= {$db->addQuotes( $start )}";
403 } elseif ( $start ===
null ) {
404 return "$var <= {$db->addQuotes( $end )}";
406 return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
419 $this->
error(
"Tracking category '$category' is disabled\n" );
423 foreach ( $cats as $cat ) {
434 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
441 if ( $this->
namespace !==
false ) {
449 $finalConds = $conds;
450 $timestamp =
$dbr->addQuotes( $timestamp );
452 "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
453 $res =
$dbr->select( [
'page',
'categorylinks' ],
454 [
'page_id',
'cl_timestamp' ],
458 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
459 'LIMIT' => $this->mBatchSize,
463 foreach (
$res as $row ) {
464 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
468 $lastId = $row->page_id;
469 $timestamp = $row->cl_timestamp;
473 }
while (
$res->numRows() == $this->mBatchSize );
484 $cats = $trackingCategories->getTrackingCategories();
485 if ( isset( $cats[$categoryKey] ) ) {
486 return $cats[$categoryKey][
'cats'];
488 $this->
error(
"Unknown tracking category {$categoryKey}\n",
true );
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true)
Add some args that are needed.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular param exists.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
Maintenance script to refresh link tables.
dfnCheckInterval( $start=null, $end=null, $batchSize=100)
doRefreshLinks( $start, $newOnly=false, $end=null, $redirectsOnly=false, $oldRedirectsOnly=false)
Do the actual link refreshing.
static intervalCond(IDatabase $db, $var, $start, $end)
Build a SQL expression for a closed interval (i.e.
getPossibleCategories( $categoryKey)
Returns a list of possible categories for a given tracking category key.
__construct()
Default constructor.
refreshTrackingCategory( $category)
Refershes links for pages in a tracking category.
static fixLinksFromArticle( $id, $ns=false)
Run LinksUpdate for all links on a given page_id.
execute()
Do the actual work.
refreshCategory(Title $category)
Refreshes links to a category.
fixRedirect( $id)
Update the redirect entry for a given page.
deleteLinksFromNonexistent( $start=null, $end=null, $batchSize=100, $chunkSize=100000)
Removes non-existing links from pages from pagelinks, imagelinks, categorylinks, templatelinks,...
Represents a title within MediaWiki.
getDBkey()
Get the main part with underscores.
This class performs some operations related to tracking categories, such as creating a list of all su...
static newFromID( $id, $from='fromdb')
Constructor from a page id.
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all. It could be easily changed to send incrementally if that becomes useful
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults error
require_once RUN_MAINTENANCE_IF_MAIN