35 private const REPORTING_INTERVAL = 100;
38 parent::__construct();
40 $this->
addOption(
'verbose',
'Output information about link refresh progress',
false,
false,
'v' );
41 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
42 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
43 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
44 $this->
addOption(
'touched-only',
'Only fix pages that have been touched after last update' );
45 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
46 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
47 'query, default 100,000',
false,
true );
48 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
49 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
50 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
51 $this->
addOption(
'before-timestamp',
'Only fix pages that were last updated before this timestamp',
53 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
62 $start = (int)$this->
getArg( 0 ) ?:
null;
63 $end = (int)$this->
getOption(
'e' ) ?:
null;
64 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100_000 );
67 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
72 $builder = $dbr->newSelectQueryBuilder()
74 ->where( self::intervalCond( $dbr,
'page_id', $start, $end ) )
78 $builder->
andWhere( [
'page_namespace' => (
int)$this->
getOption(
'namespace' ) ] );
81 if ( $this->
hasOption(
'before-timestamp' ) ) {
83 $dbr->expr(
'page_links_updated',
'<', $this->getOption(
'before-timestamp' ) )
84 ->or(
'page_links_updated',
'=',
null )
89 $category = $this->
getOption(
'category' );
90 $title = Title::makeTitleSafe(
NS_CATEGORY, $category );
92 $this->
fatalError(
"'$category' is an invalid category name!\n" );
94 $this->refreshCategory( $builder, $title );
95 } elseif ( $this->
hasOption(
'tracking-category' ) ) {
97 $this->refreshTrackingCategory( $builder, $this->
getOption(
'tracking-category' ) );
100 $redir = $this->
hasOption(
'redirects-only' );
101 $touched = $this->
hasOption(
'touched-only' );
102 $what = $redir ?
'redirects' :
'links';
104 $builder->
andWhere( [
'page_is_new' => 1 ] );
105 $this->
output(
"Refreshing $what from new pages...\n" );
109 $dbr->expr(
'page_links_updated',
'=',
null )
113 $this->
output(
"Refreshing $what from pages...\n" );
115 $this->doRefreshLinks( $builder, $redir );
116 if ( !$this->
hasOption(
'namespace' ) ) {
117 $this->deleteLinksFromNonexistent( $start, $end, $this->
getBatchSize(), $dfnChunkSize );
128 private function doRefreshLinks(
130 bool $redirectsOnly =
false,
131 array $indexFields = [
'page_id' ]
134 $this->getHookRunner()->onMaintenanceRefreshLinksInit( $this );
136 $estimateCount = $builder->
caller( __METHOD__ )->estimateRowCount();
137 $this->output(
"Estimated page count: $estimateCount\n" );
140 $lastIndexes = array_fill_keys( $indexFields, 0 );
141 $selectFields = in_array(
'page_id', $indexFields )
142 ? $indexFields : [
'page_id', ...$indexFields ];
143 $verbose = $this->hasOption(
'verbose' );
144 $dbr = $this->getDB(
DB_REPLICA, [
'vslow' ] );
146 $batchCond = $dbr->buildComparison(
'>', $lastIndexes );
147 $res = ( clone $builder )->select( $selectFields )
148 ->andWhere( [ $batchCond ] )
149 ->orderBy( $indexFields )
150 ->caller( __METHOD__ )->fetchResultSet();
153 $this->output(
"Refreshing links for {$res->numRows()} pages\n" );
156 foreach ( $res as $row ) {
157 if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
162 $this->
output(
"Refreshing links for page ID {$row->page_id}\n" );
164 self::fixRedirect( $this, $row->page_id );
165 if ( !$redirectsOnly ) {
166 self::fixLinksFromArticle( $row->page_id );
169 if ( $res->numRows() ) {
170 $res->seek( $res->numRows() - 1 );
171 foreach ( $indexFields as $field ) {
172 $lastIndexes[$field] = $res->current()->$field;
176 }
while ( $res->numRows() == $this->getBatchSize() );
197 if ( $page ===
null ) {
202 $content = $page->getContent( RevisionRecord::RAW );
203 if ( $content !==
null ) {
204 $rt = $content->getRedirectTarget();
208 if ( $rt ===
null ) {
211 $dbw->newDeleteQueryBuilder()
212 ->deleteFrom(
'redirect' )
213 ->where( [
'rd_from' => $id ] )
214 ->caller( __METHOD__ )->execute();
217 $page->insertRedirectEntry( $rt );
222 $update = $dbw->newUpdateQueryBuilder()
224 ->set( [
'page_is_redirect' => $fieldValue ] )
225 ->where( [
'page_id' => $id ] )
226 ->caller( __METHOD__ );
236 $services = MediaWikiServices::getInstance();
237 $page = $services->getWikiPageFactory()->newFromID( $id );
240 if ( $page ===
null ) {
247 $page->doSecondaryDataUpdates( [
248 'defer' => DeferredUpdates::POSTSEND,
249 'causeAction' =>
'refresh-links-maintenance',
250 'recursive' =>
false,
252 DeferredUpdates::doUpdates();
266 private function deleteLinksFromNonexistent( $start =
null, $end =
null, $batchSize = 100,
269 $this->waitForReplication();
270 $this->output(
"Deleting illegal entries from the links tables...\n" );
271 $dbr = $this->getDB(
DB_REPLICA, [
'vslow' ] );
275 $nextStart = $dbr->newSelectQueryBuilder()
276 ->select(
'page_id' )
278 ->where( [ self::intervalCond( $dbr,
'page_id', $start, $end ) ] )
279 ->orderBy(
'page_id' )
280 ->offset( $chunkSize )
281 ->caller( __METHOD__ )->fetchField();
283 if ( $nextStart !==
false ) {
288 $chunkEnd = $nextStart - 1;
294 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
295 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
296 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
297 $this->dfnCheckInterval( $start, $chunkEnd, $batchSize );
301 }
while ( $nextStart !==
false );
310 private function dfnCheckInterval( $start =
null, $end =
null, $batchSize = 100 ) {
313 'pagelinks' =>
'pl_from',
314 'imagelinks' =>
'il_from',
315 'categorylinks' =>
'cl_from',
316 'templatelinks' =>
'tl_from',
317 'externallinks' =>
'el_from',
318 'iwlinks' =>
'iwl_from',
319 'langlinks' =>
'll_from',
320 'redirect' =>
'rd_from',
321 'page_props' =>
'pp_page',
325 'categorylinks' => CategoryLinksTable::VIRTUAL_DOMAIN,
326 'externallinks' => ExternalLinksTable::VIRTUAL_DOMAIN,
327 'imagelinks' => ImageLinksTable::VIRTUAL_DOMAIN,
328 'iwlinks' => InterwikiLinksTable::VIRTUAL_DOMAIN,
329 'langlinks' => LangLinksTable::VIRTUAL_DOMAIN,
330 'pagelinks' => PageLinksTable::VIRTUAL_DOMAIN,
331 'templatelinks' => TemplateLinksTable::VIRTUAL_DOMAIN,
334 foreach ( $linksTables as $table => $field ) {
335 $domain = $domains[$table] ??
false;
336 $dbw = $this->
getServiceContainer()->getConnectionProvider()->getPrimaryDatabase( $domain );
337 $dbr = $this->
getServiceContainer()->getConnectionProvider()->getReplicaDatabase( $domain,
'vslow' );
339 $this->
output(
" $table: 0" );
340 $tableStart = $start;
343 $ids = $dbr->newSelectQueryBuilder()
347 ->leftJoin(
'page',
null,
"$field = page_id" )
348 ->where( self::intervalCond( $dbr, $field, $tableStart, $end ) )
349 ->andWhere( [
'page_id' =>
null ] )
351 ->limit( $batchSize )
352 ->caller( __METHOD__ )->fetchFieldValues();
354 $numIds = count( $ids );
357 $dbw->newDeleteQueryBuilder()
358 ->deleteFrom( $table )
359 ->where( [ $field => $ids ] )
360 ->caller( __METHOD__ )->execute();
361 $this->
output(
", $counter" );
362 $tableStart = $ids[$numIds - 1] + 1;
366 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
368 $this->
output(
" deleted.\n" );
384 private static function intervalCond(
IReadableDatabase $db, $var, $start, $end ) {
385 if ( $start ===
null && $end ===
null ) {
386 return $db->
expr( $var,
'!=',
null );
387 } elseif ( $end ===
null ) {
388 return $db->
expr( $var,
'>=', $start );
389 } elseif ( $start ===
null ) {
390 return $db->
expr( $var,
'<=', $end );
392 return $db->
expr( $var,
'>=', $start )->and( $var,
'<=', $end );
403 $cats = $this->getPossibleCategories( $category );
406 $this->
error(
"Tracking category '$category' is disabled\n" );
410 foreach ( $cats as $cat ) {
411 $this->refreshCategory( clone $builder, $cat );
422 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
424 $builder->
join(
'categorylinks',
null,
'page_id=cl_from' )
425 ->join(
'linktarget',
null,
'lt_id=cl_target_id' )
427 $this->doRefreshLinks( $builder,
false, [
'cl_timestamp',
'cl_from' ] );
436 private function getPossibleCategories( $categoryKey ) {
438 if ( isset( $cats[$categoryKey] ) ) {
439 return $cats[$categoryKey][
'cats'];
441 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );