MediaWiki  1.23.15
refreshLinks.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
31 class RefreshLinks extends Maintenance {
32  public function __construct() {
33  parent::__construct();
34  $this->mDescription = "Refresh link tables";
35  $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' );
36  $this->addOption( 'new-only', 'Only affect articles with just a single edit' );
37  $this->addOption( 'redirects-only', 'Only fix redirects, not all links' );
38  $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' );
39  $this->addOption( 'm', 'Maximum replication lag', false, true );
40  $this->addOption( 'e', 'Last page id to refresh', false, true );
41  $this->addArg( 'start', 'Page_id to start from, default 1', false );
42  $this->setBatchSize( 100 );
43  }
44 
45  public function execute() {
46  $max = $this->getOption( 'm', 0 );
47  if ( !$this->hasOption( 'dfn-only' ) ) {
48  $start = $this->getArg( 0, 1 );
49  $new = $this->getOption( 'new-only', false );
50  $end = $this->getOption( 'e', 0 );
51  $redir = $this->getOption( 'redirects-only', false );
52  $oldRedir = $this->getOption( 'old-redirects-only', false );
53  $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir );
54  }
55  $this->deleteLinksFromNonexistent( $max, $this->mBatchSize );
56  }
57 
67  private function doRefreshLinks( $start, $newOnly = false, $maxLag = false,
68  $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
69  global $wgParser, $wgUseTidy;
70 
71  $reportingInterval = 100;
72  $dbr = wfGetDB( DB_SLAVE );
73  $start = intval( $start );
74 
75  // Give extensions a chance to optimize settings
76  wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) );
77 
78  # Don't generate extension images (e.g. Timeline)
79  $wgParser->clearTagHooks();
80 
81  # Don't use HTML tidy
82  $wgUseTidy = false;
83 
84  $what = $redirectsOnly ? "redirects" : "links";
85 
86  if ( $oldRedirectsOnly ) {
87  # This entire code path is cut-and-pasted from below. Hurrah.
88 
89  $conds = array(
90  "page_is_redirect=1",
91  "rd_from IS NULL"
92  );
93 
94  if ( $end == 0 ) {
95  $conds[] = "page_id >= $start";
96  } else {
97  $conds[] = "page_id BETWEEN $start AND $end";
98  }
99 
100  $res = $dbr->select(
101  array( 'page', 'redirect' ),
102  'page_id',
103  $conds,
104  __METHOD__,
105  array(),
106  array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) )
107  );
108  $num = $res->numRows();
109  $this->output( "Refreshing $num old redirects from $start...\n" );
110 
111  $i = 0;
112 
113  foreach ( $res as $row ) {
114  if ( !( ++$i % $reportingInterval ) ) {
115  $this->output( "$i\n" );
116  wfWaitForSlaves();
117  }
118  $this->fixRedirect( $row->page_id );
119  }
120  } elseif ( $newOnly ) {
121  $this->output( "Refreshing $what from " );
122  $res = $dbr->select( 'page',
123  array( 'page_id' ),
124  array(
125  'page_is_new' => 1,
126  "page_id >= $start" ),
127  __METHOD__
128  );
129  $num = $res->numRows();
130  $this->output( "$num new articles...\n" );
131 
132  $i = 0;
133  foreach ( $res as $row ) {
134  if ( !( ++$i % $reportingInterval ) ) {
135  $this->output( "$i\n" );
136  wfWaitForSlaves();
137  }
138  if ( $redirectsOnly ) {
139  $this->fixRedirect( $row->page_id );
140  } else {
141  self::fixLinksFromArticle( $row->page_id );
142  }
143  }
144  } else {
145  if ( !$end ) {
146  $maxPage = $dbr->selectField( 'page', 'max(page_id)', false );
147  $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false );
148  $end = max( $maxPage, $maxRD );
149  }
150  $this->output( "Refreshing redirects table.\n" );
151  $this->output( "Starting from page_id $start of $end.\n" );
152 
153  for ( $id = $start; $id <= $end; $id++ ) {
154 
155  if ( !( $id % $reportingInterval ) ) {
156  $this->output( "$id\n" );
157  wfWaitForSlaves();
158  }
159  $this->fixRedirect( $id );
160  }
161 
162  if ( !$redirectsOnly ) {
163  $this->output( "Refreshing links tables.\n" );
164  $this->output( "Starting from page_id $start of $end.\n" );
165 
166  for ( $id = $start; $id <= $end; $id++ ) {
167 
168  if ( !( $id % $reportingInterval ) ) {
169  $this->output( "$id\n" );
170  wfWaitForSlaves();
171  }
173  }
174  }
175  }
176  }
177 
190  private function fixRedirect( $id ) {
191  $page = WikiPage::newFromID( $id );
192  $dbw = wfGetDB( DB_MASTER );
193 
194  if ( $page === null ) {
195  // This page doesn't exist (any more)
196  // Delete any redirect table entry for it
197  $dbw->delete( 'redirect', array( 'rd_from' => $id ),
198  __METHOD__ );
199  return;
200  }
201 
202  $rt = null;
203  $content = $page->getContent( Revision::RAW );
204  if ( $content !== null ) {
205  $rt = $content->getUltimateRedirectTarget();
206  }
207 
208  if ( $rt === null ) {
209  // The page is not a redirect
210  // Delete any redirect table entry for it
211  $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ );
212  $fieldValue = 0;
213  } else {
214  $page->insertRedirectEntry( $rt );
215  $fieldValue = 1;
216  }
217 
218  // Update the page table to be sure it is an a consistent state
219  $dbw->update( 'page', array( 'page_is_redirect' => $fieldValue ),
220  array( 'page_id' => $id ), __METHOD__ );
221  }
222 
227  public static function fixLinksFromArticle( $id ) {
228  $page = WikiPage::newFromID( $id );
229 
230  LinkCache::singleton()->clear();
231 
232  if ( $page === null ) {
233  return;
234  }
235 
236  $content = $page->getContent( Revision::RAW );
237  if ( $content === null ) {
238  return;
239  }
240 
241  $dbw = wfGetDB( DB_MASTER );
242  $dbw->begin( __METHOD__ );
243 
244  $updates = $content->getSecondaryDataUpdates( $page->getTitle() );
245  DataUpdate::runUpdates( $updates );
246 
247  $dbw->commit( __METHOD__ );
248  }
249 
259  private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
260  wfWaitForSlaves();
261 
262  $dbw = wfGetDB( DB_MASTER );
263 
264  $lb = wfGetLBFactory()->newMainLB();
265  $dbr = $lb->getConnection( DB_SLAVE );
266  $dbr->bufferResults( false );
267 
268  $linksTables = array( // table name => page_id field
269  'pagelinks' => 'pl_from',
270  'imagelinks' => 'il_from',
271  'categorylinks' => 'cl_from',
272  'templatelinks' => 'tl_from',
273  'externallinks' => 'el_from',
274  'iwlinks' => 'iwl_from',
275  'langlinks' => 'll_from',
276  'redirect' => 'rd_from',
277  'page_props' => 'pp_page',
278  );
279 
280  foreach ( $linksTables as $table => $field ) {
281  $this->output( "Retrieving illegal entries from $table... " );
282 
283  // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
284  $results = $dbr->select(
285  array( $table, 'page' ),
286  $field,
287  array( 'page_id' => null ),
288  __METHOD__,
289  'DISTINCT',
290  array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) )
291  );
292 
293  $counter = 0;
294  $list = array();
295  $this->output( "0.." );
296  foreach ( $results as $row ) {
297  $counter++;
298  $list[] = $row->$field;
299  if ( ( $counter % $batchSize ) == 0 ) {
300  wfWaitForSlaves();
301  $dbw->delete( $table, array( $field => $list ), __METHOD__ );
302 
303  $this->output( $counter . ".." );
304  $list = array();
305  }
306  }
307  $this->output( $counter );
308  if ( count( $list ) > 0 ) {
309  $dbw->delete( $table, array( $field => $list ), __METHOD__ );
310  }
311  $this->output( "\n" );
312  wfWaitForSlaves();
313  }
314  $lb->closeAll();
315  }
316 }
317 
318 $maintClass = 'RefreshLinks';
319 require_once RUN_MAINTENANCE_IF_MAIN;
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
DataUpdate\runUpdates
static runUpdates( $updates)
Convenience method, calls doUpdate() on every DataUpdate in the array.
Definition: DataUpdate.php:79
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3714
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false)
Add a parameter to the script.
Definition: Maintenance.php:169
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$dbr
$dbr
Definition: testCompression.php:48
$lb
if( $wgAPIRequestLog) $lb
Definition: api.php:126
wfRunHooks
wfRunHooks( $event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in $wgHooks.
Definition: GlobalFunctions.php:4066
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
wfWaitForSlaves
wfWaitForSlaves( $maxLag=false, $wiki=false, $cluster=false)
Modern version of wfWaitForSlaves().
Definition: GlobalFunctions.php:3859
WikiPage\newFromID
static newFromID( $id, $from='fromdb')
Constructor from a page id.
Definition: WikiPage.php:136
Revision\RAW
const RAW
Definition: Revision.php:74
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:55
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:191
wfGetLBFactory
& wfGetLBFactory()
Get the load balancer factory object.
Definition: GlobalFunctions.php:3733
$wgParser
$wgParser
Definition: Setup.php:587
Maintenance\addArg
addArg( $arg, $description, $required=true)
Add some args that are needed.
Definition: Maintenance.php:207
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:314
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:181
Maintenance\getArg
getArg( $argId=0, $default=null)
Get an argument.
Definition: Maintenance.php:246
$res
$res
Definition: database.txt:21
LinkCache\singleton
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:49
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:254