28 require_once __DIR__ .
'/Maintenance.php';
42 parent::__construct();
44 $this->
addOption(
'dfn-only',
'Delete links from nonexistent articles only' );
45 $this->
addOption(
'new-only',
'Only affect articles with just a single edit' );
46 $this->
addOption(
'redirects-only',
'Only fix redirects, not all links' );
47 $this->
addOption(
'old-redirects-only',
'Only fix redirects with no redirect table entry' );
48 $this->
addOption(
'e',
'Last page id to refresh',
false,
true );
49 $this->
addOption(
'dfn-chunk-size',
'Maximum number of existent IDs to check per ' .
50 'query, default 100000',
false,
true );
51 $this->
addOption(
'namespace',
'Only fix pages in this namespace',
false,
true );
52 $this->
addOption(
'category',
'Only fix pages in this category',
false,
true );
53 $this->
addOption(
'tracking-category',
'Only fix pages in this tracking category',
false,
true );
54 $this->
addArg(
'start',
'Page_id to start from, default 1',
false );
63 $start = (int)$this->
getArg( 0 ) ?:
null;
64 $end = (int)$this->
getOption(
'e' ) ?:
null;
65 $dfnChunkSize = (int)$this->
getOption(
'dfn-chunk-size', 100000 );
68 $this->
namespace = false;
70 $this->
namespace = (int)$ns;
72 if ( ( $category = $this->
getOption(
'category',
false ) ) !==
false ) {
75 $this->
fatalError(
"'$category' is an invalid category name!\n" );
78 } elseif ( ( $category = $this->
getOption(
'tracking-category',
false ) ) !==
false ) {
80 } elseif ( !$this->
hasOption(
'dfn-only' ) ) {
82 $redir = $this->
hasOption(
'redirects-only' );
83 $oldRedir = $this->
hasOption(
'old-redirects-only' );
92 return $this->
namespace !== false
106 $end =
null, $redirectsOnly =
false, $oldRedirectsOnly =
false
110 if ( $start ===
null ) {
115 $this->
getHookRunner()->onMaintenanceRefreshLinksInit( $this );
117 $what = $redirectsOnly ?
"redirects" :
"links";
118 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
120 if ( $oldRedirectsOnly ) {
121 # This entire code path is cut-and-pasted from below. Hurrah.
124 "page_is_redirect=1",
130 [
'page',
'redirect' ],
135 [
'redirect' => [
"LEFT JOIN",
"page_id=rd_from" ] ]
137 $num =
$res->numRows();
138 $this->
output(
"Refreshing $num old redirects from $start...\n" );
142 foreach (
$res as $row ) {
145 $lbFactory->waitForReplication();
149 } elseif ( $newOnly ) {
150 $this->
output(
"Refreshing $what from " );
155 self::intervalCond(
$dbr,
'page_id', $start, $end ),
159 $num =
$res->numRows();
160 $this->
output(
"$num new articles...\n" );
163 foreach (
$res as $row ) {
166 $lbFactory->waitForReplication();
168 if ( $redirectsOnly ) {
176 $maxPage =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
177 $maxRD =
$dbr->selectField(
'redirect',
'max(rd_from)',
'', __METHOD__ );
178 $end = max( $maxPage, $maxRD );
180 $this->
output(
"Refreshing redirects table.\n" );
181 $this->
output(
"Starting from page_id $start of $end.\n" );
183 for ( $id = $start; $id <= $end; $id++ ) {
186 $lbFactory->waitForReplication();
191 if ( !$redirectsOnly ) {
192 $this->
output(
"Refreshing links tables.\n" );
193 $this->
output(
"Starting from page_id $start of $end.\n" );
195 for ( $id = $start; $id <= $end; $id++ ) {
198 $lbFactory->waitForReplication();
219 $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromID( $id );
222 if ( $page ===
null ) {
225 $dbw->delete(
'redirect', [
'rd_from' => $id ],
229 } elseif ( $this->
namespace !==
false
230 && !$page->getTitle()->inNamespace( $this->namespace )
236 $content = $page->getContent( RevisionRecord::RAW );
238 $rt =
$content->getUltimateRedirectTarget();
241 if ( $rt ===
null ) {
244 $dbw->delete(
'redirect', [
'rd_from' => $id ], __METHOD__ );
247 $page->insertRedirectEntry( $rt );
252 $dbw->update(
'page', [
'page_is_redirect' => $fieldValue ],
253 [
'page_id' => $id ], __METHOD__ );
262 $services = MediaWikiServices::getInstance();
263 $page = $services->getWikiPageFactory()->newFromID( $id );
265 $services->getLinkCache()->clear();
267 if ( $page ===
null ) {
269 } elseif ( $ns !==
false
270 && !$page->getTitle()->inNamespace( $ns ) ) {
277 $page->doSecondaryDataUpdates( [
278 'defer' => DeferredUpdates::POSTSEND,
279 'recursive' =>
false,
298 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
299 $this->
output(
"Deleting illegal entries from the links tables...\n" );
304 $nextStart =
$dbr->selectField(
307 [ self::intervalCond(
$dbr,
'page_id', $start, $end ) ]
310 [
'ORDER BY' =>
'page_id',
'OFFSET' => $chunkSize ]
313 if ( $nextStart !==
false ) {
318 $chunkEnd = $nextStart - 1;
324 $fmtStart = $start !==
null ?
"[$start" :
'(-INF';
325 $fmtChunkEnd = $chunkEnd !==
null ?
"$chunkEnd]" :
'INF)';
326 $this->
output(
" Checking interval $fmtStart, $fmtChunkEnd\n" );
331 }
while ( $nextStart !==
false );
346 'pagelinks' =>
'pl_from',
347 'imagelinks' =>
'il_from',
348 'categorylinks' =>
'cl_from',
349 'templatelinks' =>
'tl_from',
350 'externallinks' =>
'el_from',
351 'iwlinks' =>
'iwl_from',
352 'langlinks' =>
'll_from',
353 'redirect' =>
'rd_from',
354 'page_props' =>
'pp_page',
357 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
358 foreach ( $linksTables as $table => $field ) {
359 $this->
output(
" $table: 0" );
360 $tableStart = $start;
363 $ids =
$dbr->selectFieldValues(
367 self::intervalCond(
$dbr, $field, $tableStart, $end ),
368 "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id', [], __METHOD__ )})",
371 [
'DISTINCT',
'ORDER BY' => $field,
'LIMIT' => $batchSize ]
374 $numIds = count( $ids );
377 $dbw->delete( $table, [ $field => $ids ], __METHOD__ );
378 $this->
output(
", $counter" );
379 $tableStart = $ids[$numIds - 1] + 1;
380 $lbFactory->waitForReplication();
383 }
while ( $numIds >= $batchSize && ( $end ===
null || $tableStart <= $end ) );
385 $this->
output(
" deleted.\n" );
402 if ( $start ===
null && $end ===
null ) {
403 return "$var IS NOT NULL";
404 } elseif ( $end ===
null ) {
405 return "$var >= {$db->addQuotes( $start )}";
406 } elseif ( $start ===
null ) {
407 return "$var <= {$db->addQuotes( $end )}";
409 return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
422 $this->
error(
"Tracking category '$category' is disabled\n" );
426 foreach ( $cats as $cat ) {
437 $this->
output(
"Refreshing pages in category '{$category->getText()}'...\n" );
444 if ( $this->
namespace !==
false ) {
451 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
453 $finalConds = $conds;
454 $timestamp =
$dbr->addQuotes( $timestamp );
456 "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
457 $res =
$dbr->select( [
'page',
'categorylinks' ],
458 [
'page_id',
'cl_timestamp' ],
462 'ORDER BY' => [
'cl_timestamp',
'cl_from' ],
467 foreach (
$res as $row ) {
470 $lbFactory->waitForReplication();
472 $lastId = $row->page_id;
473 $timestamp = $row->cl_timestamp;
477 }
while (
$res->numRows() == $this->getBatchSize() );
488 $cats = $trackingCategories->getTrackingCategories();
489 if ( isset( $cats[$categoryKey] ) ) {
490 return $cats[$categoryKey][
'cats'];
492 $this->
fatalError(
"Unknown tracking category {$categoryKey}\n" );
497 require_once RUN_MAINTENANCE_IF_MAIN;