MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
27 use Wikimedia\IPUtils;
28 
42  protected $language;
43 
45  protected $genderCache;
46 
48  protected $localInterwikis;
49 
51  protected $interwikiLookup;
52 
54  protected $nsInfo;
55 
63  private $createMalformedTitleException;
64 
73  public function __construct(
79  ) {
80  $this->language = $language;
81  $this->genderCache = $genderCache;
82  $this->localInterwikis = (array)$localInterwikis;
83  $this->interwikiLookup = $interwikiLookup;
84  $this->nsInfo = $nsInfo;
85 
86  // Default callback is to return a real MalformedTitleException,
87  // callback signature matches constructor
88  $this->createMalformedTitleException = static function (
89  $errorMessage,
90  $titleText = null,
91  $errorMessageParameters = []
93  return new MalformedTitleException( $errorMessage, $titleText, $errorMessageParameters );
94  };
95  }
96 
101  public function overrideCreateMalformedTitleExceptionCallback( callable $callback ) {
102  // @codeCoverageIgnoreStart
103  if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
104  throw new RuntimeException( __METHOD__ . ' can only be used in tests' );
105  }
106  // @codeCoverageIgnoreEnd
107  $this->createMalformedTitleException = $callback;
108  }
109 
119  public function getNamespaceName( $namespace, $text ) {
120  if ( $this->language->needsGenderDistinction() &&
121  $this->nsInfo->hasGenderDistinction( $namespace )
122  ) {
123  // NOTE: we are assuming here that the title text is a user name!
124  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
125  $name = $this->language->getGenderNsText( $namespace, $gender );
126  } else {
127  $name = $this->language->getNsText( $namespace );
128  }
129 
130  if ( $name === false ) {
131  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
132  }
133 
134  return $name;
135  }
136 
149  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
150  $out = '';
151  if ( $interwiki !== '' ) {
152  $out = $interwiki . ':';
153  }
154 
155  if ( $namespace != 0 ) {
156  try {
157  $nsName = $this->getNamespaceName( $namespace, $text );
158  } catch ( InvalidArgumentException $e ) {
159  // See T165149. Awkward, but better than erroneously linking to the main namespace.
160  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
161  }
162 
163  $out .= $nsName . ':';
164  }
165  $out .= $text;
166 
167  if ( $fragment !== '' ) {
168  $out .= '#' . $fragment;
169  }
170 
171  $out = str_replace( '_', ' ', $out );
172 
173  return $out;
174  }
175 
185  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
186  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
187  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
188 
189  // NOTE: this is an ugly kludge that allows this class to share the
190  // code for parsing with the old Title class. The parser code should
191  // be refactored to avoid this.
192  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
193 
194  return new TitleValue(
195  $parts['namespace'],
196  $parts['dbkey'],
197  $parts['fragment'],
198  $parts['interwiki']
199  );
200  }
201 
212  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
213  if ( !$this->nsInfo->exists( $namespace ) ) {
214  return null;
215  }
216 
217  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
218  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
219  if ( strval( $interwiki ) != '' ) {
220  $fullText = "$interwiki:$fullText";
221  }
222  if ( strval( $fragment ) != '' ) {
223  $fullText .= '#' . $fragment;
224  }
225 
226  try {
227  $parts = $this->splitTitleString( $fullText );
228  } catch ( MalformedTitleException $e ) {
229  return null;
230  }
231 
232  return new TitleValue(
233  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
234  }
235 
243  public function getText( $title ) {
244  if ( $title instanceof LinkTarget ) {
245  return $title->getText();
246  } elseif ( $title instanceof PageReference ) {
247  return strtr( $title->getDBKey(), '_', ' ' );
248  } else {
249  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
250  }
251  }
252 
261  public function getPrefixedText( $title ) {
262  if ( $title instanceof LinkTarget ) {
263  if ( !isset( $title->prefixedText ) ) {
264  $title->prefixedText = $this->formatTitle(
265  $title->getNamespace(),
266  $title->getText(),
267  '',
268  $title->getInterwiki()
269  );
270  }
271  return $title->prefixedText;
272  } elseif ( $title instanceof PageReference ) {
273  $title->assertWiki( PageReference::LOCAL );
274  return $this->formatTitle(
275  $title->getNamespace(),
276  $this->getText( $title )
277  );
278  } else {
279  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
280  }
281  }
282 
289  public function getPrefixedDBkey( $target ) {
290  if ( $target instanceof LinkTarget ) {
291  return strtr( $this->formatTitle(
292  $target->getNamespace(),
293  $target->getDBkey(),
294  '',
295  $target->getInterwiki()
296  ), ' ', '_' );
297  } elseif ( $target instanceof PageReference ) {
298  $target->assertWiki( PageReference::LOCAL );
299  return strtr( $this->formatTitle(
300  $target->getNamespace(),
301  $target->getDBkey()
302  ), ' ', '_' );
303  } else {
304  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $target ) );
305  }
306  }
307 
315  public function getFullText( $title ) {
316  if ( $title instanceof LinkTarget ) {
317  return $this->formatTitle(
318  $title->getNamespace(),
319  $title->getText(),
320  $title->getFragment(),
321  $title->getInterwiki()
322  );
323  } elseif ( $title instanceof PageReference ) {
324  $title->assertWiki( PageReference::LOCAL );
325  return $this->formatTitle(
326  $title->getNamespace(),
327  $this->getText( $title )
328  );
329  } else {
330  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
331  }
332  }
333 
355  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
356  $dbkey = str_replace( ' ', '_', $text );
357 
358  # Initialisation
359  $parts = [
360  'interwiki' => '',
361  'local_interwiki' => false,
362  'fragment' => '',
363  'namespace' => (int)$defaultNamespace,
364  'dbkey' => $dbkey,
365  ];
366 
367  # Strip Unicode bidi override characters.
368  # Sometimes they slip into cut-n-pasted page titles, where the
369  # override chars get included in list displays.
370  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
371 
372  if ( $dbkey === null ) {
373  # Regex had an error. Most likely this is caused by invalid UTF-8
374  $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
375  throw $exception;
376  }
377 
378  # Clean up whitespace
379  $dbkey = preg_replace(
380  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
381  '_',
382  $dbkey
383  );
384  $dbkey = trim( $dbkey, '_' );
385 
386  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
387  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
388  $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
389  throw $exception;
390  }
391 
392  $parts['dbkey'] = $dbkey;
393 
394  # Initial colon indicates main namespace rather than specified default
395  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
396  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
397  $parts['namespace'] = NS_MAIN;
398  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
399  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
400  }
401 
402  if ( $dbkey == '' ) {
403  $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
404  throw $exception;
405  }
406 
407  # Namespace or interwiki prefix
408  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
409  do {
410  $m = [];
411  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
412  $p = $m[1];
413  $ns = $this->language->getNsIndex( $p );
414  if ( $ns !== false ) {
415  # Ordinary namespace
416  $dbkey = $m[2];
417  $parts['namespace'] = $ns;
418  # For Talk:X pages, check if X has a "namespace" prefix
419  if ( $ns === NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
420  if ( $this->language->getNsIndex( $x[1] ) ) {
421  # Disallow Talk:File:x type titles...
422  $exception = ( $this->createMalformedTitleException )(
423  'title-invalid-talk-namespace',
424  $text
425  );
426  throw $exception;
427  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
428  # Disallow Talk:Interwiki:x type titles...
429  $exception = ( $this->createMalformedTitleException )(
430  'title-invalid-talk-namespace',
431  $text
432  );
433  throw $exception;
434  }
435  }
436  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
437  # Interwiki link
438  $dbkey = $m[2];
439  $parts['interwiki'] = $this->language->lc( $p );
440 
441  # Redundant interwiki prefix to the local wiki
442  foreach ( $this->localInterwikis as $localIW ) {
443  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
444  if ( $dbkey == '' ) {
445  # Empty self-links should point to the Main Page, to ensure
446  # compatibility with cross-wiki transclusions and the like.
447  $mainPage = Title::newMainPage();
448  return [
449  'interwiki' => $mainPage->getInterwiki(),
450  'local_interwiki' => true,
451  'fragment' => $mainPage->getFragment(),
452  'namespace' => $mainPage->getNamespace(),
453  'dbkey' => $mainPage->getDBkey(),
454  ];
455  }
456  $parts['interwiki'] = '';
457  # local interwikis should behave like initial-colon links
458  $parts['local_interwiki'] = true;
459 
460  # Do another namespace split...
461  continue 2;
462  }
463  }
464 
465  # If there's an initial colon after the interwiki, that also
466  # resets the default namespace
467  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
468  $parts['namespace'] = NS_MAIN;
469  $dbkey = substr( $dbkey, 1 );
470  $dbkey = trim( $dbkey, '_' );
471  }
472  }
473  # If there's no recognized interwiki or namespace,
474  # then let the colon expression be part of the title.
475  }
476  break;
477  } while ( true );
478 
479  $fragment = strstr( $dbkey, '#' );
480  if ( $fragment !== false ) {
481  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
482  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
483  # remove whitespace again: prevents "Foo_bar_#"
484  # becoming "Foo_bar_"
485  $dbkey = rtrim( $dbkey, "_" );
486  }
487 
488  # Reject illegal characters.
489  $rxTc = self::getTitleInvalidRegex();
490  $matches = [];
491  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
492  $exception = ( $this->createMalformedTitleException )( 'title-invalid-characters', $text, [ $matches[0] ] );
493  throw $exception;
494  }
495 
496  # Pages with "/./" or "/../" appearing in the URLs will often be un-
497  # reachable due to the way web browsers deal with 'relative' URLs.
498  # Also, they conflict with subpage syntax. Forbid them explicitly.
499  if (
500  str_contains( $dbkey, '.' ) &&
501  (
502  $dbkey === '.' || $dbkey === '..' ||
503  str_starts_with( $dbkey, './' ) ||
504  str_starts_with( $dbkey, '../' ) ||
505  str_contains( $dbkey, '/./' ) ||
506  str_contains( $dbkey, '/../' ) ||
507  str_ends_with( $dbkey, '/.' ) ||
508  str_ends_with( $dbkey, '/..' )
509  )
510  ) {
511  $exception = ( $this->createMalformedTitleException )( 'title-invalid-relative', $text );
512  throw $exception;
513  }
514 
515  # Magic tilde sequences? Nu-uh!
516  if ( strpos( $dbkey, '~~~' ) !== false ) {
517  $exception = ( $this->createMalformedTitleException )( 'title-invalid-magic-tilde', $text );
518  throw $exception;
519  }
520 
521  # Limit the size of titles to 255 bytes. This is typically the size of the
522  # underlying database field. We make an exception for special pages, which
523  # don't need to be stored in the database, and may edge over 255 bytes due
524  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
525  $maxLength = ( $parts['namespace'] !== NS_SPECIAL ) ? 255 : 512;
526  if ( strlen( $dbkey ) > $maxLength ) {
527  $exception = ( $this->createMalformedTitleException )(
528  'title-invalid-too-long',
529  $text,
530  [ Message::numParam( $maxLength ) ]
531  );
532  throw $exception;
533  }
534 
535  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
536  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
537  # other site might be case-sensitive.
538  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
539  $dbkey = $this->language->ucfirst( $dbkey );
540  }
541 
542  # Can't make a link to a namespace alone... "empty" local links can only be
543  # self-links with a fragment identifier.
544  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] !== NS_MAIN ) {
545  $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
546  throw $exception;
547  }
548 
549  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
550  // IP names are not allowed for accounts, and can only be referring to
551  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
552  // there are numerous ways to present the same IP. Having sp:contribs scan
553  // them all is silly and having some show the edits and others not is
554  // inconsistent. Same for talk/userpages. Keep them normalized instead.
555  if ( $dbkey !== '' && ( $parts['namespace'] === NS_USER || $parts['namespace'] === NS_USER_TALK ) ) {
556  $dbkey = IPUtils::sanitizeIP( $dbkey );
557  // IPUtils::sanitizeIP return null only for bad input
558  '@phan-var string $dbkey';
559  }
560 
561  // Any remaining initial :s are illegal.
562  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
563  $exception = ( $this->createMalformedTitleException )( 'title-invalid-leading-colon', $text );
564  throw $exception;
565  }
566 
567  // Fill fields
568  $parts['dbkey'] = $dbkey;
569 
570  // Check to ensure that the return value can be used to construct a TitleValue.
571  // All issues should in theory be caught above, this is here to enforce consistency.
572  try {
574  $parts['namespace'],
575  $parts['dbkey'],
576  $parts['fragment'],
577  $parts['interwiki']
578  );
579  } catch ( InvalidArgumentException $ex ) {
580  $exception = ( $this->createMalformedTitleException )( 'title-invalid', $text, [ $ex->getMessage() ] );
581  throw $exception;
582  }
583 
584  return $parts;
585  }
586 
596  public static function getTitleInvalidRegex() {
597  static $rxTc = false;
598  if ( !$rxTc ) {
599  # Matching titles will be held as illegal.
600  $rxTc = '/' .
601  # Any character not allowed is forbidden...
602  '[^' . Title::legalChars() . ']' .
603  # URL percent encoding sequences interfere with the ability
604  # to round-trip titles -- you can't link to them consistently.
605  '|%[0-9A-Fa-f]{2}' .
606  # XML/HTML character references produce similar issues.
607  '|&[A-Za-z0-9\x80-\xff]+;' .
608  '/S';
609  }
610 
611  return $rxTc;
612  }
613 }
const NS_USER
Definition: Defines.php:66
const NS_MAIN
Definition: Defines.php:64
const NS_SPECIAL
Definition: Defines.php:53
const NS_TALK
Definition: Defines.php:65
const NS_USER_TALK
Definition: Defines.php:67
$matches
Caches user genders when needed to use correct namespace aliases.
Definition: GenderCache.php:34
Base class for language-specific code.
Definition: Language.php:56
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
overrideCreateMalformedTitleExceptionCallback(callable $callback)
formatTitle( $namespace, $text, $fragment='', $interwiki='')
__construct(Language $language, GenderCache $genderCache, $localInterwikis, InterwikiLookup $interwikiLookup, NamespaceInfo $nsInfo)
InterwikiLookup $interwikiLookup
getNamespaceName( $namespace, $text)
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
Represents a title within MediaWiki.
Definition: Title.php:82
static numParam( $num)
Definition: Message.php:1146
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1388
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Assert that the given parameters could be used to construct a TitleValue object.
Definition: TitleValue.php:165
Service interface for looking up Interwiki records.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
A title formatter service for MediaWiki.
A title parser service for MediaWiki.
Definition: TitleParser.php:33