28 require_once __DIR__ .
'/Maintenance.php';
36 private const REPORTING_INTERVAL = 100;
39 parent::__construct();
41 $this->
addOption(
'verbose',
'Output information about link refresh progress',
false,
false,
'v' );
42 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
43 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
44 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
45 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
46 $this->
addOption(
'touched-only',
'Only fix pages that have been touched after last update' );
47 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
48 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
49 'query, default 100000',
false,
true );
50 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
51 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
52 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
53 $this->
addOption(
'before-timestamp',
'Only fix pages that were last updated before this timestamp',
55 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
64 $start = (int)$this->
getArg( 0 ) ?:
null;
65 $end = (int)$this->
getOption(
'e' ) ?:
null;
66 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
69 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
74 $builder = $dbr->newSelectQueryBuilder()
76 ->where( self::intervalCond( $dbr,
'page_id', $start, $end ) )
80 $builder->
andWhere( [
'page_namespace' => (
int)$this->
getOption(
'namespace' ) ] );
83 if ( $this->
hasOption(
'before-timestamp' ) ) {
84 $timeCond = $dbr->buildComparison(
'<', [
85 'page_links_updated' => $this->
getOption(
'before-timestamp' )
87 $builder->
andWhere( [
"$timeCond OR page_links_updated IS NULL" ] );
91 $category = $this->
getOption(
'category' );
92 $title = Title::makeTitleSafe(
NS_CATEGORY, $category );
94 $this->
fatalError(
"'$category' is an invalid category name!\n" );
96 $this->refreshCategory( $builder, $title );
97 } elseif ( $this->
hasOption(
'tracking-category' ) ) {
99 $this->refreshTrackingCategory( $builder, $this->
getOption(
'tracking-category' ) );
102 $redir = $this->
hasOption(
'redirects-only' );
103 $oldRedir = $this->
hasOption(
'old-redirects-only' );
104 $touched = $this->
hasOption(
'touched-only' );
105 $what = $redir ?
'redirects' :
'links';
107 $builder->
leftJoin(
'redirect',
null,
'page_id=rd_from' )
109 'page_is_redirect' => 1,
112 $this->
output(
"Refreshing old redirects from $start...\n" );
114 $builder->
andWhere( [
'page_is_new' => 1 ] );
115 $this->
output(
"Refreshing $what from new pages...\n" );
119 'page_touched > page_links_updated OR page_links_updated IS NULL',
122 $this->
output(
"Refreshing $what from pages...\n" );
124 $this->doRefreshLinks( $builder, $redir || $oldRedir );
125 if ( !$this->
hasOption(
'namespace' ) ) {
126 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
137 private function doRefreshLinks(
139 bool $redirectsOnly =
false,
140 array $indexFields = [
'page_id' ]
143 $this->
getHookRunner()->onMaintenanceRefreshLinksInit( $this );
146 $this->
output(
"Estimated page count: $estimateCount\n" );
149 $lastIndexes = array_fill_keys( $indexFields, 0 );
150 $selectFields = in_array(
'page_id', $indexFields )
151 ? $indexFields : [
'page_id', ...$indexFields ];
152 $verbose = $this->
hasOption(
'verbose' );
155 $batchCond = $dbr->buildComparison(
'>', $lastIndexes );
156 $res = ( clone $builder )->select( $selectFields )
157 ->andWhere( [ $batchCond ] )
158 ->orderBy( $indexFields )
159 ->caller( __METHOD__ )->fetchResultSet();
162 $this->
output(
"Refreshing links for {$res->numRows()} pages\n" );
165 foreach ( $res as $row ) {
166 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
171 $this->
output(
"Refreshing links for page ID {$row->page_id}\n" );
174 if ( !$redirectsOnly ) {
178 if ( $res->numRows() ) {
179 $res->seek( $res->numRows() - 1 );
180 foreach ( $indexFields as $field ) {
181 $lastIndexes[$field] = $res->current()->$field;
185 }
while ( $res->numRows() == $this->getBatchSize() );
206 if ( $page ===
null ) {
211 $content = $page->getContent( RevisionRecord::RAW );
213 $rt =
$content->getRedirectTarget();
217 if ( $rt ===
null ) {
220 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
223 $page->insertRedirectEntry( $rt );
228 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
229 [
'page_id' => $id ], __METHOD__ );
237 $services = MediaWikiServices::getInstance();
238 $page = $services->getWikiPageFactory()->newFromID( $id );
241 if ( $page ===
null ) {
248 $page->doSecondaryDataUpdates( [
249 'defer' => DeferredUpdates::POSTSEND,
250 'causeAction' =>
'refresh-links-maintenance',
251 'recursive' =>
false,
267 private function deleteLinksFromNonexistent( $start =
null, $end =
null, $batchSize = 100,
271 $this->
output(
"Deleting illegal entries from the links tables...\n" );
276 $nextStart = $dbr->newSelectQueryBuilder()
277 ->select(
'page_id' )
279 ->where( [ self::intervalCond( $dbr,
'page_id', $start, $end ) ] )
280 ->orderBy(
'page_id' )
281 ->offset( $chunkSize )
282 ->caller( __METHOD__ )->fetchField();
284 if ( $nextStart !==
false ) {
289 $chunkEnd = $nextStart - 1;
295 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
296 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
297 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
298 $this->dfnCheckInterval( $start, $chunkEnd, $batchSize );
302 }
while ( $nextStart !==
false );
311 private function dfnCheckInterval( $start =
null, $end =
null, $batchSize = 100 ) {
317 'pagelinks' =>
'pl_from',
318 'imagelinks' =>
'il_from',
319 'categorylinks' =>
'cl_from',
320 'templatelinks' =>
'tl_from',
321 'externallinks' =>
'el_from',
322 'iwlinks' =>
'iwl_from',
323 'langlinks' =>
'll_from',
324 'redirect' =>
'rd_from',
325 'page_props' =>
'pp_page',
328 foreach ( $linksTables as $table => $field ) {
329 $this->
output(
" $table: 0" );
330 $tableStart = $start;
333 $ids = $dbr->newSelectQueryBuilder()
337 ->leftJoin(
'page',
null,
"$field = page_id" )
338 ->where( self::intervalCond( $dbr, $field, $tableStart, $end ) )
339 ->andWhere( [
'page_id' =>
null ] )
341 ->limit( $batchSize )
342 ->caller( __METHOD__ )->fetchFieldValues();
344 $numIds = count( $ids );
347 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
348 $this->
output(
", $counter" );
349 $tableStart = $ids[$numIds - 1] + 1;
353 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
355 $this->
output(
" deleted.\n" );
371 private static function intervalCond(
IReadableDatabase $db, $var, $start, $end ) {
372 if ( $start ===
null && $end ===
null ) {
373 return "$var IS NOT NULL";
374 } elseif ( $end ===
null ) {
375 return "$var >= " . $db->
addQuotes( $start );
376 } elseif ( $start ===
null ) {
377 return "$var <= " . $db->
addQuotes( $end );
390 $cats = $this->getPossibleCategories( $category );
393 $this->
error(
"Tracking category '$category' is disabled\n" );
397 foreach ( $cats as $cat ) {
398 $this->refreshCategory( clone $builder, $cat );
409 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
411 $builder->
join(
'categorylinks',
null,
'page_id=cl_from' )
412 ->andWhere( [
'cl_to' => $category->
getDBkey() ] );
413 $this->doRefreshLinks( $builder,
false, [
'cl_timestamp',
'cl_from' ] );
422 private function getPossibleCategories( $categoryKey ) {
424 if ( isset( $cats[$categoryKey] ) ) {
425 return $cats[$categoryKey][
'cats'];
427 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
432 require_once RUN_MAINTENANCE_IF_MAIN;
static doUpdates( $stage=self::ALL)
Consume and execute all pending updates.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getHookRunner()
Get a HookRunner for running core hooks.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static fixRedirect(Maintenance $maint, $id)
Update the redirect entry for a given page.
__construct()
Default constructor.
static fixLinksFromArticle( $id)
Run LinksUpdate for all links on a given page_id.
execute()
Do the actual work.