MediaWiki  master
LinkHolderArray.php
Go to the documentation of this file.
1 <?php
25 
31  public $internals = [];
33  public $interwikis = [];
35  public $size = 0;
36 
40  public $parent;
41  protected $tempIdOffset;
42 
46  public function __construct( $parent ) {
47  $this->parent = $parent;
48  }
49 
53  public function __destruct() {
54  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
55  foreach ( $this as $name => $value ) {
56  unset( $this->$name );
57  }
58  }
59 
68  public function __sleep() {
69  foreach ( $this->internals as &$nsLinks ) {
70  foreach ( $nsLinks as &$entry ) {
71  unset( $entry['title'] );
72  }
73  }
74  unset( $nsLinks );
75  unset( $entry );
76 
77  foreach ( $this->interwikis as &$entry ) {
78  unset( $entry['title'] );
79  }
80  unset( $entry );
81 
82  return [ 'internals', 'interwikis', 'size' ];
83  }
84 
88  public function __wakeup() {
89  foreach ( $this->internals as &$nsLinks ) {
90  foreach ( $nsLinks as &$entry ) {
91  $entry['title'] = Title::newFromText( $entry['pdbk'] );
92  }
93  }
94  unset( $nsLinks );
95  unset( $entry );
96 
97  foreach ( $this->interwikis as &$entry ) {
98  $entry['title'] = Title::newFromText( $entry['pdbk'] );
99  }
100  unset( $entry );
101  }
102 
107  public function merge( $other ) {
108  foreach ( $other->internals as $ns => $entries ) {
109  $this->size += count( $entries );
110  if ( !isset( $this->internals[$ns] ) ) {
111  $this->internals[$ns] = $entries;
112  } else {
113  $this->internals[$ns] += $entries;
114  }
115  }
116  $this->interwikis += $other->interwikis;
117  }
118 
131  public function mergeForeign( $other, $texts ) {
132  $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
133  $maxId = 0;
134 
135  # Renumber internal links
136  foreach ( $other->internals as $ns => $nsLinks ) {
137  foreach ( $nsLinks as $key => $entry ) {
138  $newKey = $idOffset + $key;
139  $this->internals[$ns][$newKey] = $entry;
140  $maxId = $newKey > $maxId ? $newKey : $maxId;
141  }
142  }
143  $texts = preg_replace_callback( '/(<!--LINK\'" \d+:)(\d+)(-->)/',
144  [ $this, 'mergeForeignCallback' ], $texts );
145 
146  # Renumber interwiki links
147  foreach ( $other->interwikis as $key => $entry ) {
148  $newKey = $idOffset + $key;
149  $this->interwikis[$newKey] = $entry;
150  $maxId = $newKey > $maxId ? $newKey : $maxId;
151  }
152  $texts = preg_replace_callback( '/(<!--IWLINK\'" )(\d+)(-->)/',
153  [ $this, 'mergeForeignCallback' ], $texts );
154 
155  # Set the parent link ID to be beyond the highest used ID
156  $this->parent->setLinkID( $maxId + 1 );
157  $this->tempIdOffset = null;
158  return $texts;
159  }
160 
165  protected function mergeForeignCallback( $m ) {
166  return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
167  }
168 
175  public function getSubArray( $text ) {
176  $sub = new LinkHolderArray( $this->parent );
177 
178  # Internal links
179  $pos = 0;
180  while ( $pos < strlen( $text ) ) {
181  if ( !preg_match( '/<!--LINK\'" (\d+):(\d+)-->/',
182  $text, $m, PREG_OFFSET_CAPTURE, $pos )
183  ) {
184  break;
185  }
186  $ns = (int)$m[1][0];
187  $key = (int)$m[2][0];
188  $sub->internals[$ns][$key] = $this->internals[$ns][$key];
189  $pos = $m[0][1] + strlen( $m[0][0] );
190  }
191 
192  # Interwiki links
193  $pos = 0;
194  while ( $pos < strlen( $text ) ) {
195  if ( !preg_match( '/<!--IWLINK\'" (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
196  break;
197  }
198  $key = $m[1][0];
199  $sub->interwikis[$key] = $this->interwikis[$key];
200  $pos = $m[0][1] + strlen( $m[0][0] );
201  }
202  return $sub;
203  }
204 
209  public function isBig() {
210  global $wgLinkHolderBatchSize;
211  return $this->size > $wgLinkHolderBatchSize;
212  }
213 
218  public function clear() {
219  $this->internals = [];
220  $this->interwikis = [];
221  $this->size = 0;
222  }
223 
237  public function makeHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
238  if ( !is_object( $nt ) ) {
239  # Fail gracefully
240  $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
241  } else {
242  # Separate the link trail from the rest of the link
243  list( $inside, $trail ) = Linker::splitTrail( $trail );
244 
245  $entry = [
246  'title' => $nt,
247  'text' => $prefix . $text . $inside,
248  'pdbk' => $nt->getPrefixedDBkey(),
249  ];
250  if ( $query !== [] ) {
251  $entry['query'] = $query;
252  }
253 
254  if ( $nt->isExternal() ) {
255  // Use a globally unique ID to keep the objects mergable
256  $key = $this->parent->nextLinkID();
257  $this->interwikis[$key] = $entry;
258  $retVal = "<!--IWLINK'\" $key-->{$trail}";
259  } else {
260  $key = $this->parent->nextLinkID();
261  $ns = $nt->getNamespace();
262  $this->internals[$ns][$key] = $entry;
263  $retVal = "<!--LINK'\" $ns:$key-->{$trail}";
264  }
265  $this->size++;
266  }
267  return $retVal;
268  }
269 
275  public function replace( &$text ) {
276  $this->replaceInternal( $text );
277  $this->replaceInterwiki( $text );
278  }
279 
285  protected function replaceInternal( &$text ) {
286  if ( !$this->internals ) {
287  return;
288  }
289 
290  $colours = [];
291  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
292  $output = $this->parent->getOutput();
293  $linkRenderer = $this->parent->getLinkRenderer();
294 
295  $dbr = wfGetDB( DB_REPLICA );
296 
297  # Sort by namespace
298  ksort( $this->internals );
299 
300  $linkcolour_ids = [];
301 
302  # Generate query
303  $lb = new LinkBatch();
304  $lb->setCaller( __METHOD__ );
305 
306  foreach ( $this->internals as $ns => $entries ) {
307  foreach ( $entries as $entry ) {
309  $title = $entry['title'];
310  $pdbk = $entry['pdbk'];
311 
312  # Skip invalid entries.
313  # Result will be ugly, but prevents crash.
314  if ( is_null( $title ) ) {
315  continue;
316  }
317 
318  # Check if it's a static known link, e.g. interwiki
319  if ( $title->isAlwaysKnown() ) {
320  $colours[$pdbk] = '';
321  } elseif ( $ns == NS_SPECIAL ) {
322  $colours[$pdbk] = 'new';
323  } else {
324  $id = $linkCache->getGoodLinkID( $pdbk );
325  if ( $id != 0 ) {
326  $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
327  $output->addLink( $title, $id );
328  $linkcolour_ids[$id] = $pdbk;
329  } elseif ( $linkCache->isBadLink( $pdbk ) ) {
330  $colours[$pdbk] = 'new';
331  } else {
332  # Not in the link cache, add it to the query
333  $lb->addObj( $title );
334  }
335  }
336  }
337  }
338  if ( !$lb->isEmpty() ) {
339  $fields = array_merge(
341  [ 'page_namespace', 'page_title' ]
342  );
343 
344  $res = $dbr->select(
345  'page',
346  $fields,
347  $lb->constructSet( 'page', $dbr ),
348  __METHOD__
349  );
350 
351  # Fetch data and form into an associative array
352  # non-existent = broken
353  foreach ( $res as $s ) {
354  $title = Title::makeTitle( $s->page_namespace, $s->page_title );
355  $pdbk = $title->getPrefixedDBkey();
356  $linkCache->addGoodLinkObjFromRow( $title, $s );
357  $output->addLink( $title, $s->page_id );
358  $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
359  // add id to the extension todolist
360  $linkcolour_ids[$s->page_id] = $pdbk;
361  }
362  unset( $res );
363  }
364  if ( count( $linkcolour_ids ) ) {
365  // pass an array of page_ids to an extension
366  Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours, $this->parent->getTitle() ] );
367  }
368 
369  # Do a second query for different language variants of links and categories
370  if ( $this->parent->getContentLanguage()->hasVariants() ) {
371  $this->doVariants( $colours );
372  }
373 
374  # Construct search and replace arrays
375  $replacePairs = [];
376  foreach ( $this->internals as $ns => $entries ) {
377  foreach ( $entries as $index => $entry ) {
378  $pdbk = $entry['pdbk'];
379  $title = $entry['title'];
380  $query = $entry['query'] ?? [];
381  $key = "$ns:$index";
382  $searchkey = "<!--LINK'\" $key-->";
383  $displayText = $entry['text'];
384  if ( isset( $entry['selflink'] ) ) {
385  $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
386  continue;
387  }
388  if ( $displayText === '' ) {
389  $displayText = null;
390  } else {
391  $displayText = new HtmlArmor( $displayText );
392  }
393  if ( !isset( $colours[$pdbk] ) ) {
394  $colours[$pdbk] = 'new';
395  }
396  $attribs = [];
397  if ( $colours[$pdbk] == 'new' ) {
398  $linkCache->addBadLinkObj( $title );
399  $output->addLink( $title, 0 );
400  $link = $linkRenderer->makeBrokenLink(
401  $title, $displayText, $attribs, $query
402  );
403  } else {
404  $link = $linkRenderer->makePreloadedLink(
405  $title, $displayText, $colours[$pdbk], $attribs, $query
406  );
407  }
408 
409  $replacePairs[$searchkey] = $link;
410  }
411  }
412 
413  # Do the thing
414  $text = preg_replace_callback(
415  '/(<!--LINK\'" .*?-->)/',
416  function ( array $matches ) use ( $replacePairs ) {
417  return $replacePairs[$matches[1]];
418  },
419  $text
420  );
421  }
422 
428  protected function replaceInterwiki( &$text ) {
429  if ( empty( $this->interwikis ) ) {
430  return;
431  }
432 
433  # Make interwiki link HTML
434  $output = $this->parent->getOutput();
435  $replacePairs = [];
436  $linkRenderer = $this->parent->getLinkRenderer();
437  foreach ( $this->interwikis as $key => $link ) {
438  $replacePairs[$key] = $linkRenderer->makeLink(
439  $link['title'],
440  new HtmlArmor( $link['text'] )
441  );
442  $output->addInterwikiLink( $link['title'] );
443  }
444 
445  $text = preg_replace_callback(
446  '/<!--IWLINK\'" (.*?)-->/',
447  function ( array $matches ) use ( $replacePairs ) {
448  return $replacePairs[$matches[1]];
449  },
450  $text
451  );
452  }
453 
458  protected function doVariants( &$colours ) {
459  $linkBatch = new LinkBatch();
460  $variantMap = []; // maps $pdbkey_Variant => $keys (of link holders)
461  $output = $this->parent->getOutput();
462  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
463  $titlesToBeConverted = '';
464  $titlesAttrs = [];
465 
466  // Concatenate titles to a single string, thus we only need auto convert the
467  // single string to all variants. This would improve parser's performance
468  // significantly.
469  foreach ( $this->internals as $ns => $entries ) {
470  if ( $ns == NS_SPECIAL ) {
471  continue;
472  }
473  foreach ( $entries as $index => $entry ) {
474  $pdbk = $entry['pdbk'];
475  // we only deal with new links (in its first query)
476  if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
477  $titlesAttrs[] = [ $index, $entry['title'] ];
478  // separate titles with \0 because it would never appears
479  // in a valid title
480  $titlesToBeConverted .= $entry['title']->getText() . "\0";
481  }
482  }
483  }
484 
485  // Now do the conversion and explode string to text of titles
486  $titlesAllVariants = $this->parent->getContentLanguage()->
487  autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
488  $allVariantsName = array_keys( $titlesAllVariants );
489  foreach ( $titlesAllVariants as &$titlesVariant ) {
490  $titlesVariant = explode( "\0", $titlesVariant );
491  }
492 
493  // Then add variants of links to link batch
494  $parentTitle = $this->parent->getTitle();
495  foreach ( $titlesAttrs as $i => $attrs ) {
497  list( $index, $title ) = $attrs;
498  $ns = $title->getNamespace();
499  $text = $title->getText();
500 
501  foreach ( $allVariantsName as $variantName ) {
502  $textVariant = $titlesAllVariants[$variantName][$i];
503  if ( $textVariant === $text ) {
504  continue;
505  }
506 
507  $variantTitle = Title::makeTitle( $ns, $textVariant );
508 
509  // Self-link checking for mixed/different variant titles. At this point, we
510  // already know the exact title does not exist, so the link cannot be to a
511  // variant of the current title that exists as a separate page.
512  if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
513  $this->internals[$ns][$index]['selflink'] = true;
514  continue 2;
515  }
516 
517  $linkBatch->addObj( $variantTitle );
518  $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
519  }
520  }
521 
522  // process categories, check if a category exists in some variant
523  $categoryMap = []; // maps $category_variant => $category (dbkeys)
524  $varCategories = []; // category replacements oldDBkey => newDBkey
525  foreach ( $output->getCategoryLinks() as $category ) {
526  $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
527  $linkBatch->addObj( $categoryTitle );
528  $variants = $this->parent->getContentLanguage()->autoConvertToAllVariants( $category );
529  foreach ( $variants as $variant ) {
530  if ( $variant !== $category ) {
531  $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
532  if ( is_null( $variantTitle ) ) {
533  continue;
534  }
535  $linkBatch->addObj( $variantTitle );
536  $categoryMap[$variant] = [ $category, $categoryTitle ];
537  }
538  }
539  }
540 
541  if ( !$linkBatch->isEmpty() ) {
542  // construct query
543  $dbr = wfGetDB( DB_REPLICA );
544  $fields = array_merge(
546  [ 'page_namespace', 'page_title' ]
547  );
548 
549  $varRes = $dbr->select( 'page',
550  $fields,
551  $linkBatch->constructSet( 'page', $dbr ),
552  __METHOD__
553  );
554 
555  $linkcolour_ids = [];
556  $linkRenderer = $this->parent->getLinkRenderer();
557 
558  // for each found variants, figure out link holders and replace
559  foreach ( $varRes as $s ) {
560  $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
561  $varPdbk = $variantTitle->getPrefixedDBkey();
562  $vardbk = $variantTitle->getDBkey();
563 
564  $holderKeys = [];
565  if ( isset( $variantMap[$varPdbk] ) ) {
566  $holderKeys = $variantMap[$varPdbk];
567  $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
568  $output->addLink( $variantTitle, $s->page_id );
569  }
570 
571  // loop over link holders
572  foreach ( $holderKeys as $key ) {
573  list( $ns, $index ) = explode( ':', $key, 2 );
574  $entry =& $this->internals[$ns][$index];
575  $pdbk = $entry['pdbk'];
576 
577  if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
578  // found link in some of the variants, replace the link holder data
579  $entry['title'] = $variantTitle;
580  $entry['pdbk'] = $varPdbk;
581 
582  // set pdbk and colour
583  $colours[$varPdbk] = $linkRenderer->getLinkClasses( $variantTitle );
584  $linkcolour_ids[$s->page_id] = $pdbk;
585  }
586  }
587 
588  // check if the object is a variant of a category
589  if ( isset( $categoryMap[$vardbk] ) ) {
590  list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
591  if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
592  $varCategories[$oldkey] = $vardbk;
593  }
594  }
595  }
596  Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours, $this->parent->getTitle() ] );
597 
598  // rebuild the categories in original order (if there are replacements)
599  if ( count( $varCategories ) > 0 ) {
600  $newCats = [];
601  $originalCats = $output->getCategories();
602  foreach ( $originalCats as $cat => $sortkey ) {
603  // make the replacement
604  if ( array_key_exists( $cat, $varCategories ) ) {
605  $newCats[$varCategories[$cat]] = $sortkey;
606  } else {
607  $newCats[$cat] = $sortkey;
608  }
609  }
610  $output->setCategoryLinks( $newCats );
611  }
612  }
613  }
614 
622  public function replaceText( $text ) {
623  $text = preg_replace_callback(
624  '/<!--(LINK|IWLINK)\'" (.*?)-->/',
625  [ $this, 'replaceTextCallback' ],
626  $text );
627 
628  return $text;
629  }
630 
638  public function replaceTextCallback( $matches ) {
639  list( , $type, $key ) = $matches;
640  if ( $type == 'LINK' ) {
641  list( $ns, $index ) = explode( ':', $key, 2 );
642  if ( isset( $this->internals[$ns][$index]['text'] ) ) {
643  return $this->internals[$ns][$index]['text'];
644  }
645  } elseif ( $type == 'IWLINK' ) {
646  if ( isset( $this->interwikis[$key]['text'] ) ) {
647  return $this->interwikis[$key]['text'];
648  }
649  }
650  return $matches[0];
651  }
652 }
replace(&$text)
Replace link placeholders with actual links, in the buffer.
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1770
isBig()
Returns true if the memory requirements of this object are getting large.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
const NS_SPECIAL
Definition: Defines.php:49
array [][] $internals
doVariants(&$colours)
Modify $this->internals and $colours according to language variant linking rules. ...
replaceInterwiki(&$text)
Replace interwiki links.
makeHolder( $nt, $text='', $query=[], $trail='', $prefix='')
Make a link placeholder.
mergeForeign( $other, $texts)
Merge a LinkHolderArray from another parser instance into this one.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
replaceInternal(&$text)
Replace internal links SecurityCheck-XSS Gets confused with $entry[&#39;pdbk&#39;].
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:163
__sleep()
Don&#39;t serialize the parent object, it is big, and not needed when it is a parameter to mergeForeign()...
const NS_CATEGORY
Definition: Defines.php:74
replaceText( $text)
Replace link placeholders with plain text of links (not HTML-formatted).
__construct( $parent)
__destruct()
Reduce memory usage to reduce the impact of circular references.
static getSelectFields()
Fields that LinkCache needs to select.
Definition: LinkCache.php:219
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:612
getSubArray( $text)
Get a subset of the current LinkHolderArray which is sufficient to interpret the given text...
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
__wakeup()
Recreate the Title objects.
clear()
Clear all stored link holders.
replaceTextCallback( $matches)
Callback for replaceText()
const DB_REPLICA
Definition: defines.php:25
$wgLinkHolderBatchSize
LinkHolderArray batch size For debugging.
merge( $other)
Merge another LinkHolderArray into this one.
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
$matches
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:319