MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
25 use Wikimedia\IPUtils;
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
72  public function __construct(
78  ) {
79  $this->language = $language;
80  $this->genderCache = $genderCache;
81  $this->localInterwikis = (array)$localInterwikis;
82  $this->interwikiLookup = $interwikiLookup;
83  $this->nsInfo = $nsInfo;
84  }
85 
95  public function getNamespaceName( $namespace, $text ) {
96  if ( $this->language->needsGenderDistinction() &&
97  $this->nsInfo->hasGenderDistinction( $namespace )
98  ) {
99  // NOTE: we are assuming here that the title text is a user name!
100  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
101  $name = $this->language->getGenderNsText( $namespace, $gender );
102  } else {
103  $name = $this->language->getNsText( $namespace );
104  }
105 
106  if ( $name === false ) {
107  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
108  }
109 
110  return $name;
111  }
112 
125  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
126  $out = '';
127  if ( $interwiki !== '' ) {
128  $out = $interwiki . ':';
129  }
130 
131  if ( $namespace != 0 ) {
132  try {
133  $nsName = $this->getNamespaceName( $namespace, $text );
134  } catch ( InvalidArgumentException $e ) {
135  // See T165149. Awkward, but better than erroneously linking to the main namespace.
136  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
137  }
138 
139  $out .= $nsName . ':';
140  }
141  $out .= $text;
142 
143  if ( $fragment !== '' ) {
144  $out .= '#' . $fragment;
145  }
146 
147  $out = str_replace( '_', ' ', $out );
148 
149  return $out;
150  }
151 
161  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
162  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
163  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
164 
165  // NOTE: this is an ugly kludge that allows this class to share the
166  // code for parsing with the old Title class. The parser code should
167  // be refactored to avoid this.
168  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
169 
170  return new TitleValue(
171  $parts['namespace'],
172  $parts['dbkey'],
173  $parts['fragment'],
174  $parts['interwiki']
175  );
176  }
177 
188  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
189  if ( !$this->nsInfo->exists( $namespace ) ) {
190  return null;
191  }
192 
193  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
194  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
195  if ( strval( $interwiki ) != '' ) {
196  $fullText = "$interwiki:$fullText";
197  }
198  if ( strval( $fragment ) != '' ) {
199  $fullText .= '#' . $fragment;
200  }
201 
202  try {
203  $parts = $this->splitTitleString( $fullText );
204  } catch ( MalformedTitleException $e ) {
205  return null;
206  }
207 
208  return new TitleValue(
209  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
210  }
211 
219  public function getText( LinkTarget $title ) {
220  return $title->getText();
221  }
222 
231  public function getPrefixedText( LinkTarget $title ) {
232  if ( !isset( $title->prefixedText ) ) {
233  $title->prefixedText = $this->formatTitle(
234  $title->getNamespace(),
235  $title->getText(),
236  '',
237  $title->getInterwiki()
238  );
239  }
240 
241  return $title->prefixedText;
242  }
243 
250  public function getPrefixedDBkey( LinkTarget $target ) {
251  return strtr( $this->formatTitle(
252  $target->getNamespace(),
253  $target->getDBkey(),
254  '',
255  $target->getInterwiki()
256  ), ' ', '_' );
257  }
258 
266  public function getFullText( LinkTarget $title ) {
267  return $this->formatTitle(
268  $title->getNamespace(),
269  $title->getText(),
270  $title->getFragment(),
271  $title->getInterwiki()
272  );
273  }
274 
296  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
297  $dbkey = str_replace( ' ', '_', $text );
298 
299  # Initialisation
300  $parts = [
301  'interwiki' => '',
302  'local_interwiki' => false,
303  'fragment' => '',
304  'namespace' => (int)$defaultNamespace,
305  'dbkey' => $dbkey,
306  ];
307 
308  # Strip Unicode bidi override characters.
309  # Sometimes they slip into cut-n-pasted page titles, where the
310  # override chars get included in list displays.
311  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
312 
313  # Clean up whitespace
314  # Note: use of the /u option on preg_replace here will cause
315  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
316  # conveniently disabling them.
317  $dbkey = preg_replace(
318  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
319  '_',
320  $dbkey
321  );
322  $dbkey = trim( $dbkey, '_' );
323 
324  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
325  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
326  throw new MalformedTitleException( 'title-invalid-utf8', $text );
327  }
328 
329  $parts['dbkey'] = $dbkey;
330 
331  # Initial colon indicates main namespace rather than specified default
332  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
333  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
334  $parts['namespace'] = NS_MAIN;
335  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
336  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
337  }
338 
339  if ( $dbkey == '' ) {
340  throw new MalformedTitleException( 'title-invalid-empty', $text );
341  }
342 
343  # Namespace or interwiki prefix
344  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
345  do {
346  $m = [];
347  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
348  $p = $m[1];
349  $ns = $this->language->getNsIndex( $p );
350  if ( $ns !== false ) {
351  # Ordinary namespace
352  $dbkey = $m[2];
353  $parts['namespace'] = $ns;
354  # For Talk:X pages, check if X has a "namespace" prefix
355  if ( $ns === NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
356  if ( $this->language->getNsIndex( $x[1] ) ) {
357  # Disallow Talk:File:x type titles...
358  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
359  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
360  # Disallow Talk:Interwiki:x type titles...
361  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
362  }
363  }
364  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
365  # Interwiki link
366  $dbkey = $m[2];
367  $parts['interwiki'] = $this->language->lc( $p );
368 
369  # Redundant interwiki prefix to the local wiki
370  foreach ( $this->localInterwikis as $localIW ) {
371  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
372  if ( $dbkey == '' ) {
373  # Empty self-links should point to the Main Page, to ensure
374  # compatibility with cross-wiki transclusions and the like.
375  $mainPage = Title::newMainPage();
376  return [
377  'interwiki' => $mainPage->getInterwiki(),
378  'local_interwiki' => true,
379  'fragment' => $mainPage->getFragment(),
380  'namespace' => $mainPage->getNamespace(),
381  'dbkey' => $mainPage->getDBkey(),
382  ];
383  }
384  $parts['interwiki'] = '';
385  # local interwikis should behave like initial-colon links
386  $parts['local_interwiki'] = true;
387 
388  # Do another namespace split...
389  continue 2;
390  }
391  }
392 
393  # If there's an initial colon after the interwiki, that also
394  # resets the default namespace
395  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
396  $parts['namespace'] = NS_MAIN;
397  $dbkey = substr( $dbkey, 1 );
398  $dbkey = trim( $dbkey, '_' );
399  }
400  }
401  # If there's no recognized interwiki or namespace,
402  # then let the colon expression be part of the title.
403  }
404  break;
405  } while ( true );
406 
407  $fragment = strstr( $dbkey, '#' );
408  if ( $fragment !== false ) {
409  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
410  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
411  # remove whitespace again: prevents "Foo_bar_#"
412  # becoming "Foo_bar_"
413  $dbkey = preg_replace( '/_*$/', '', $dbkey );
414  }
415 
416  # Reject illegal characters.
417  $rxTc = self::getTitleInvalidRegex();
418  $matches = [];
419  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
420  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
421  }
422 
423  # Pages with "/./" or "/../" appearing in the URLs will often be un-
424  # reachable due to the way web browsers deal with 'relative' URLs.
425  # Also, they conflict with subpage syntax. Forbid them explicitly.
426  if (
427  strpos( $dbkey, '.' ) !== false &&
428  (
429  $dbkey === '.' || $dbkey === '..' ||
430  strpos( $dbkey, './' ) === 0 ||
431  strpos( $dbkey, '../' ) === 0 ||
432  strpos( $dbkey, '/./' ) !== false ||
433  strpos( $dbkey, '/../' ) !== false ||
434  substr( $dbkey, -2 ) == '/.' ||
435  substr( $dbkey, -3 ) == '/..'
436  )
437  ) {
438  throw new MalformedTitleException( 'title-invalid-relative', $text );
439  }
440 
441  # Magic tilde sequences? Nu-uh!
442  if ( strpos( $dbkey, '~~~' ) !== false ) {
443  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
444  }
445 
446  # Limit the size of titles to 255 bytes. This is typically the size of the
447  # underlying database field. We make an exception for special pages, which
448  # don't need to be stored in the database, and may edge over 255 bytes due
449  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
450  $maxLength = ( $parts['namespace'] !== NS_SPECIAL ) ? 255 : 512;
451  if ( strlen( $dbkey ) > $maxLength ) {
452  throw new MalformedTitleException( 'title-invalid-too-long', $text,
453  [ Message::numParam( $maxLength ) ] );
454  }
455 
456  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
457  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
458  # other site might be case-sensitive.
459  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
460  $dbkey = $this->language->ucfirst( $dbkey );
461  }
462 
463  # Can't make a link to a namespace alone... "empty" local links can only be
464  # self-links with a fragment identifier.
465  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] !== NS_MAIN ) {
466  throw new MalformedTitleException( 'title-invalid-empty', $text );
467  }
468 
469  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
470  // IP names are not allowed for accounts, and can only be referring to
471  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
472  // there are numerous ways to present the same IP. Having sp:contribs scan
473  // them all is silly and having some show the edits and others not is
474  // inconsistent. Same for talk/userpages. Keep them normalized instead.
475  if ( $parts['namespace'] === NS_USER || $parts['namespace'] === NS_USER_TALK ) {
476  $dbkey = IPUtils::sanitizeIP( $dbkey );
477  }
478 
479  // Any remaining initial :s are illegal.
480  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
481  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
482  }
483 
484  // Fill fields
485  $parts['dbkey'] = $dbkey;
486 
487  // Sanity check to ensure that the return value can be used to construct a TitleValue.
488  // All issues should in theory be caught above, this is here to enforce consistency.
489  try {
491  $parts['namespace'],
492  $parts['dbkey'],
493  $parts['fragment'],
494  $parts['interwiki']
495  );
496  } catch ( InvalidArgumentException $ex ) {
497  throw new MalformedTitleException( 'title-invalid', $text, [ $ex->getMessage() ] );
498  }
499 
500  return $parts;
501  }
502 
512  public static function getTitleInvalidRegex() {
513  static $rxTc = false;
514  if ( !$rxTc ) {
515  # Matching titles will be held as illegal.
516  $rxTc = '/' .
517  # Any character not allowed is forbidden...
518  '[^' . Title::legalChars() . ']' .
519  # URL percent encoding sequences interfere with the ability
520  # to round-trip titles -- you can't link to them consistently.
521  '|%[0-9A-Fa-f]{2}' .
522  # XML/HTML character references produce similar issues.
523  '|&[A-Za-z0-9\x80-\xff]+;' .
524  '|&#[0-9]+;' .
525  '|&#x[0-9A-Fa-f]+;' .
526  '/S';
527  }
528 
529  return $rxTc;
530  }
531 }
MediaWikiTitleCodec\getPrefixedText
getPrefixedText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:231
Message\numParam
static numParam( $num)
Definition: Message.php:1038
MediaWiki\Linker\LinkTarget\getInterwiki
getInterwiki()
The interwiki component of this LinkTarget.
MediaWikiTitleCodec
A codec for MediaWiki page titles.
Definition: MediaWikiTitleCodec.php:38
MediaWikiTitleCodec\getTitleInvalidRegex
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
Definition: MediaWikiTitleCodec.php:512
GenderCache
Caches user genders when needed to use correct namespace aliases.
Definition: GenderCache.php:36
MediaWikiTitleCodec\splitTitleString
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
Definition: MediaWikiTitleCodec.php:296
Sanitizer\decodeCharReferencesAndNormalize
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1245
MediaWikiTitleCodec\makeTitleValueSafe
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
Definition: MediaWikiTitleCodec.php:188
Title\newMainPage
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:688
MediaWikiTitleCodec\getNamespaceName
getNamespaceName( $namespace, $text)
Definition: MediaWikiTitleCodec.php:95
MediaWikiTitleCodec\parseTitle
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
Definition: MediaWikiTitleCodec.php:161
MediaWikiTitleCodec\getFullText
getFullText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:266
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:53
MediaWiki\Linker\LinkTarget\getNamespace
getNamespace()
Get the namespace index.
$matches
$matches
Definition: NoLocalSettings.php:24
MediaWiki\Interwiki\InterwikiLookup
Service interface for looking up Interwiki records.
Definition: InterwikiLookup.php:32
MediaWikiTitleCodec\formatTitle
formatTitle( $namespace, $text, $fragment='', $interwiki='')
Definition: MediaWikiTitleCodec.php:125
MediaWikiTitleCodec\$localInterwikis
string[] $localInterwikis
Definition: MediaWikiTitleCodec.php:52
$title
$title
Definition: testCompression.php:38
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
NS_TALK
const NS_TALK
Definition: Defines.php:65
TitleValue\assertValidSpec
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Asserts that the given parameters could be used to construct a TitleValue object.
Definition: TitleValue.php:171
MediaWikiTitleCodec\$interwikiLookup
InterwikiLookup $interwikiLookup
Definition: MediaWikiTitleCodec.php:57
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
NS_USER
const NS_USER
Definition: Defines.php:66
MediaWikiTitleCodec\$genderCache
GenderCache $genderCache
Definition: MediaWikiTitleCodec.php:47
MediaWikiTitleCodec\getText
getText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:219
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:34
MediaWikiTitleCodec\getPrefixedDBkey
getPrefixedDBkey(LinkTarget $target)
Definition: MediaWikiTitleCodec.php:250
NS_USER_TALK
const NS_USER_TALK
Definition: Defines.php:67
MediaWikiTitleCodec\$nsInfo
NamespaceInfo $nsInfo
Definition: MediaWikiTitleCodec.php:62
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:737
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:43
MediaWikiTitleCodec\$language
Language $language
Definition: MediaWikiTitleCodec.php:42
MediaWikiTitleCodec\__construct
__construct(Language $language, GenderCache $genderCache, $localInterwikis, InterwikiLookup $interwikiLookup, NamespaceInfo $nsInfo)
Definition: MediaWikiTitleCodec.php:72
TitleValue
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40