MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
24 namespace MediaWiki\Title;
25 
26 use GenderCache;
27 use InvalidArgumentException;
28 use Language;
29 use LogicException;
34 use Message;
35 use Wikimedia\IPUtils;
36 
50  protected $language;
51 
53  protected $genderCache;
54 
56  protected $localInterwikis;
57 
59  protected $interwikiLookup;
60 
62  protected $nsInfo;
63 
71  private $createMalformedTitleException;
72 
81  public function __construct(
87  ) {
88  $this->language = $language;
89  $this->genderCache = $genderCache;
90  $this->localInterwikis = (array)$localInterwikis;
91  $this->interwikiLookup = $interwikiLookup;
92  $this->nsInfo = $nsInfo;
93 
94  // Default callback is to return a real MalformedTitleException,
95  // callback signature matches constructor
96  $this->createMalformedTitleException = static function (
97  $errorMessage,
98  $titleText = null,
99  $errorMessageParameters = []
101  return new MalformedTitleException( $errorMessage, $titleText, $errorMessageParameters );
102  };
103  }
104 
109  public function overrideCreateMalformedTitleExceptionCallback( callable $callback ) {
110  // @codeCoverageIgnoreStart
111  if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
112  throw new LogicException( __METHOD__ . ' can only be used in tests' );
113  }
114  // @codeCoverageIgnoreEnd
115  $this->createMalformedTitleException = $callback;
116  }
117 
127  public function getNamespaceName( $namespace, $text ) {
128  if ( $this->language->needsGenderDistinction() &&
129  $this->nsInfo->hasGenderDistinction( $namespace )
130  ) {
131  // NOTE: we are assuming here that the title text is a user name!
132  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
133  $name = $this->language->getGenderNsText( $namespace, $gender );
134  } else {
135  $name = $this->language->getNsText( $namespace );
136  }
137 
138  if ( $name === false ) {
139  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
140  }
141 
142  return $name;
143  }
144 
157  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
158  $out = '';
159  if ( $interwiki !== '' ) {
160  $out = $interwiki . ':';
161  }
162 
163  if ( $namespace != 0 ) {
164  try {
165  $nsName = $this->getNamespaceName( $namespace, $text );
166  } catch ( InvalidArgumentException $e ) {
167  // See T165149. Awkward, but better than erroneously linking to the main namespace.
168  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
169  }
170 
171  $out .= $nsName . ':';
172  }
173  $out .= $text;
174 
175  if ( $fragment !== '' ) {
176  $out .= '#' . $fragment;
177  }
178 
179  $out = str_replace( '_', ' ', $out );
180 
181  return $out;
182  }
183 
193  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
194  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
195  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
196 
197  // NOTE: this is an ugly kludge that allows this class to share the
198  // code for parsing with the old Title class. The parser code should
199  // be refactored to avoid this.
200  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
201 
202  return new TitleValue(
203  $parts['namespace'],
204  $parts['dbkey'],
205  $parts['fragment'],
206  $parts['interwiki']
207  );
208  }
209 
220  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
221  if ( !$this->nsInfo->exists( $namespace ) ) {
222  return null;
223  }
224 
225  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
226  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
227  if ( strval( $interwiki ) != '' ) {
228  $fullText = "$interwiki:$fullText";
229  }
230  if ( strval( $fragment ) != '' ) {
231  $fullText .= '#' . $fragment;
232  }
233 
234  try {
235  $parts = $this->splitTitleString( $fullText );
236  } catch ( MalformedTitleException $e ) {
237  return null;
238  }
239 
240  return new TitleValue(
241  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
242  }
243 
251  public function getText( $title ) {
252  if ( $title instanceof LinkTarget ) {
253  return $title->getText();
254  } elseif ( $title instanceof PageReference ) {
255  return strtr( $title->getDBKey(), '_', ' ' );
256  } else {
257  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
258  }
259  }
260 
269  public function getPrefixedText( $title ) {
270  if ( $title instanceof LinkTarget ) {
271  if ( !isset( $title->prefixedText ) ) {
272  $title->prefixedText = $this->formatTitle(
273  $title->getNamespace(),
274  $title->getText(),
275  '',
276  $title->getInterwiki()
277  );
278  }
279  return $title->prefixedText;
280  } elseif ( $title instanceof PageReference ) {
281  $title->assertWiki( PageReference::LOCAL );
282  return $this->formatTitle(
283  $title->getNamespace(),
284  $this->getText( $title )
285  );
286  } else {
287  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
288  }
289  }
290 
297  public function getPrefixedDBkey( $target ) {
298  if ( $target instanceof LinkTarget ) {
299  return strtr( $this->formatTitle(
300  $target->getNamespace(),
301  $target->getDBkey(),
302  '',
303  $target->getInterwiki()
304  ), ' ', '_' );
305  } elseif ( $target instanceof PageReference ) {
306  $target->assertWiki( PageReference::LOCAL );
307  return strtr( $this->formatTitle(
308  $target->getNamespace(),
309  $target->getDBkey()
310  ), ' ', '_' );
311  } else {
312  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $target ) );
313  }
314  }
315 
323  public function getFullText( $title ) {
324  if ( $title instanceof LinkTarget ) {
325  return $this->formatTitle(
326  $title->getNamespace(),
327  $title->getText(),
328  $title->getFragment(),
329  $title->getInterwiki()
330  );
331  } elseif ( $title instanceof PageReference ) {
332  $title->assertWiki( PageReference::LOCAL );
333  return $this->formatTitle(
334  $title->getNamespace(),
335  $this->getText( $title )
336  );
337  } else {
338  throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
339  }
340  }
341 
363  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
364  $dbkey = str_replace( ' ', '_', $text );
365 
366  # Initialisation
367  $parts = [
368  'interwiki' => '',
369  'local_interwiki' => false,
370  'fragment' => '',
371  'namespace' => (int)$defaultNamespace,
372  'dbkey' => $dbkey,
373  ];
374 
375  # Strip Unicode bidi override characters.
376  # Sometimes they slip into cut-n-pasted page titles, where the
377  # override chars get included in list displays.
378  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
379 
380  if ( $dbkey === null ) {
381  # Regex had an error. Most likely this is caused by invalid UTF-8
382  $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
383  throw $exception;
384  }
385 
386  # Clean up whitespace
387  $dbkey = preg_replace(
388  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
389  '_',
390  $dbkey
391  );
392  $dbkey = trim( $dbkey, '_' );
393 
394  if ( strpos( $dbkey, \UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
395  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
396  $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
397  throw $exception;
398  }
399 
400  $parts['dbkey'] = $dbkey;
401 
402  # Initial colon indicates main namespace rather than specified default
403  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
404  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
405  $parts['namespace'] = NS_MAIN;
406  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
407  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
408  }
409 
410  if ( $dbkey == '' ) {
411  $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
412  throw $exception;
413  }
414 
415  # Namespace or interwiki prefix
416  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
417  do {
418  $m = [];
419  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
420  $p = $m[1];
421  $ns = $this->language->getNsIndex( $p );
422  if ( $ns !== false ) {
423  # Ordinary namespace
424  $dbkey = $m[2];
425  $parts['namespace'] = $ns;
426  # For Talk:X pages, check if X has a "namespace" prefix
427  if ( $ns === NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
428  if ( $this->language->getNsIndex( $x[1] ) ) {
429  # Disallow Talk:File:x type titles...
430  $exception = ( $this->createMalformedTitleException )(
431  'title-invalid-talk-namespace',
432  $text
433  );
434  throw $exception;
435  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
436  # Disallow Talk:Interwiki:x type titles...
437  $exception = ( $this->createMalformedTitleException )(
438  'title-invalid-talk-namespace',
439  $text
440  );
441  throw $exception;
442  }
443  }
444  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
445  # Interwiki link
446  $dbkey = $m[2];
447  $parts['interwiki'] = $this->language->lc( $p );
448 
449  # Redundant interwiki prefix to the local wiki
450  foreach ( $this->localInterwikis as $localIW ) {
451  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
452  if ( $dbkey == '' ) {
453  # Empty self-links should point to the Main Page, to ensure
454  # compatibility with cross-wiki transclusions and the like.
455  $mainPage = Title::newMainPage();
456  return [
457  'interwiki' => $mainPage->getInterwiki(),
458  'local_interwiki' => true,
459  'fragment' => $mainPage->getFragment(),
460  'namespace' => $mainPage->getNamespace(),
461  'dbkey' => $mainPage->getDBkey(),
462  ];
463  }
464  $parts['interwiki'] = '';
465  # local interwikis should behave like initial-colon links
466  $parts['local_interwiki'] = true;
467 
468  # Do another namespace split...
469  continue 2;
470  }
471  }
472 
473  # If there's an initial colon after the interwiki, that also
474  # resets the default namespace
475  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
476  $parts['namespace'] = NS_MAIN;
477  $dbkey = substr( $dbkey, 1 );
478  $dbkey = trim( $dbkey, '_' );
479  }
480  }
481  # If there's no recognized interwiki or namespace,
482  # then let the colon expression be part of the title.
483  }
484  break;
485  } while ( true );
486 
487  $fragment = strstr( $dbkey, '#' );
488  if ( $fragment !== false ) {
489  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
490  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
491  # remove whitespace again: prevents "Foo_bar_#"
492  # becoming "Foo_bar_"
493  $dbkey = rtrim( $dbkey, "_" );
494  }
495 
496  # Reject illegal characters.
497  $rxTc = self::getTitleInvalidRegex();
498  $matches = [];
499  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
500  $exception = ( $this->createMalformedTitleException )( 'title-invalid-characters', $text, [ $matches[0] ] );
501  throw $exception;
502  }
503 
504  # Pages with "/./" or "/../" appearing in the URLs will often be un-
505  # reachable due to the way web browsers deal with 'relative' URLs.
506  # Also, they conflict with subpage syntax. Forbid them explicitly.
507  if (
508  str_contains( $dbkey, '.' ) &&
509  (
510  $dbkey === '.' || $dbkey === '..' ||
511  str_starts_with( $dbkey, './' ) ||
512  str_starts_with( $dbkey, '../' ) ||
513  str_contains( $dbkey, '/./' ) ||
514  str_contains( $dbkey, '/../' ) ||
515  str_ends_with( $dbkey, '/.' ) ||
516  str_ends_with( $dbkey, '/..' )
517  )
518  ) {
519  $exception = ( $this->createMalformedTitleException )( 'title-invalid-relative', $text );
520  throw $exception;
521  }
522 
523  # Magic tilde sequences? Nu-uh!
524  if ( strpos( $dbkey, '~~~' ) !== false ) {
525  $exception = ( $this->createMalformedTitleException )( 'title-invalid-magic-tilde', $text );
526  throw $exception;
527  }
528 
529  # Limit the size of titles to 255 bytes. This is typically the size of the
530  # underlying database field. We make an exception for special pages, which
531  # don't need to be stored in the database, and may edge over 255 bytes due
532  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
533  $maxLength = ( $parts['namespace'] !== NS_SPECIAL ) ? 255 : 512;
534  if ( strlen( $dbkey ) > $maxLength ) {
535  $exception = ( $this->createMalformedTitleException )(
536  'title-invalid-too-long',
537  $text,
538  [ Message::numParam( $maxLength ) ]
539  );
540  throw $exception;
541  }
542 
543  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
544  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
545  # other site might be case-sensitive.
546  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
547  $dbkey = $this->language->ucfirst( $dbkey );
548  }
549 
550  # Can't make a link to a namespace alone... "empty" local links can only be
551  # self-links with a fragment identifier.
552  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] !== NS_MAIN ) {
553  $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
554  throw $exception;
555  }
556 
557  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
558  // IP names are not allowed for accounts, and can only be referring to
559  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
560  // there are numerous ways to present the same IP. Having sp:contribs scan
561  // them all is silly and having some show the edits and others not is
562  // inconsistent. Same for talk/userpages. Keep them normalized instead.
563  if ( $dbkey !== '' && ( $parts['namespace'] === NS_USER || $parts['namespace'] === NS_USER_TALK ) ) {
564  $dbkey = IPUtils::sanitizeIP( $dbkey );
565  // IPUtils::sanitizeIP return null only for bad input
566  '@phan-var string $dbkey';
567  }
568 
569  // Any remaining initial :s are illegal.
570  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
571  $exception = ( $this->createMalformedTitleException )( 'title-invalid-leading-colon', $text );
572  throw $exception;
573  }
574 
575  // Fill fields
576  $parts['dbkey'] = $dbkey;
577 
578  // Check to ensure that the return value can be used to construct a TitleValue.
579  // All issues should in theory be caught above, this is here to enforce consistency.
580  try {
582  $parts['namespace'],
583  $parts['dbkey'],
584  $parts['fragment'],
585  $parts['interwiki']
586  );
587  } catch ( InvalidArgumentException $ex ) {
588  $exception = ( $this->createMalformedTitleException )( 'title-invalid', $text, [ $ex->getMessage() ] );
589  throw $exception;
590  }
591 
592  return $parts;
593  }
594 
604  public static function getTitleInvalidRegex() {
605  static $rxTc = false;
606  if ( !$rxTc ) {
607  # Matching titles will be held as illegal.
608  $rxTc = '/' .
609  # Any character not allowed is forbidden...
610  '[^' . Title::legalChars() . ']' .
611  # URL percent encoding sequences interfere with the ability
612  # to round-trip titles -- you can't link to them consistently.
613  '|%[0-9A-Fa-f]{2}' .
614  # XML/HTML character references produce similar issues.
615  '|&[A-Za-z0-9\x80-\xff]+;' .
616  '/S';
617  }
618 
619  return $rxTc;
620  }
621 }
622 
627 class_alias( MediaWikiTitleCodec::class, 'MediaWikiTitleCodec' );
const NS_USER
Definition: Defines.php:66
const NS_MAIN
Definition: Defines.php:64
const NS_SPECIAL
Definition: Defines.php:53
const NS_TALK
Definition: Defines.php:65
const NS_USER_TALK
Definition: Defines.php:67
$matches
Caches user genders when needed to use correct namespace aliases.
Definition: GenderCache.php:35
Base class for language-specific code.
Definition: Language.php:61
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:46
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1398
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
__construct(Language $language, GenderCache $genderCache, $localInterwikis, InterwikiLookup $interwikiLookup, NamespaceInfo $nsInfo)
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
overrideCreateMalformedTitleExceptionCallback(callable $callback)
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Definition: TitleValue.php:44
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Assert that the given parameters could be used to construct a TitleValue object.
Definition: TitleValue.php:169
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:686
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:720
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition: Message.php:144
static numParam( $num)
Definition: Message.php:1154
Service interface for looking up Interwiki records.
Represents the target of a wiki link.
Definition: LinkTarget.php:30
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
A title formatter service for MediaWiki.
A title parser service for MediaWiki.
Definition: TitleParser.php:35