MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 use Wikimedia\IPUtils;
27 
43  protected $language;
44 
48  protected $genderCache;
49 
53  protected $localInterwikis;
54 
58  protected $interwikiLookup;
59 
63  protected $nsInfo;
64 
75  NamespaceInfo $nsInfo = null
76  ) {
77  if ( !$interwikiLookup ) {
78  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
79  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
80  }
81  if ( !$nsInfo ) {
82  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
83  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
84  }
85  $this->language = $language;
86  $this->genderCache = $genderCache;
87  $this->localInterwikis = (array)$localInterwikis;
88  $this->interwikiLookup = $interwikiLookup;
89  $this->nsInfo = $nsInfo;
90  }
91 
101  public function getNamespaceName( $namespace, $text ) {
102  if ( $this->language->needsGenderDistinction() &&
103  $this->nsInfo->hasGenderDistinction( $namespace )
104  ) {
105  // NOTE: we are assuming here that the title text is a user name!
106  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
107  $name = $this->language->getGenderNsText( $namespace, $gender );
108  } else {
109  $name = $this->language->getNsText( $namespace );
110  }
111 
112  if ( $name === false ) {
113  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
114  }
115 
116  return $name;
117  }
118 
131  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
132  $out = '';
133  if ( $interwiki !== '' ) {
134  $out = $interwiki . ':';
135  }
136 
137  if ( $namespace != 0 ) {
138  try {
139  $nsName = $this->getNamespaceName( $namespace, $text );
140  } catch ( InvalidArgumentException $e ) {
141  // See T165149. Awkward, but better than erroneously linking to the main namespace.
142  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
143  }
144 
145  $out .= $nsName . ':';
146  }
147  $out .= $text;
148 
149  if ( $fragment !== '' ) {
150  $out .= '#' . $fragment;
151  }
152 
153  $out = str_replace( '_', ' ', $out );
154 
155  return $out;
156  }
157 
167  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
168  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
169  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
170 
171  // NOTE: this is an ugly kludge that allows this class to share the
172  // code for parsing with the old Title class. The parser code should
173  // be refactored to avoid this.
174  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
175 
176  return new TitleValue(
177  $parts['namespace'],
178  $parts['dbkey'],
179  $parts['fragment'],
180  $parts['interwiki']
181  );
182  }
183 
194  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
195  if ( !$this->nsInfo->exists( $namespace ) ) {
196  return null;
197  }
198 
199  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
200  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
201  if ( strval( $interwiki ) != '' ) {
202  $fullText = "$interwiki:$fullText";
203  }
204  if ( strval( $fragment ) != '' ) {
205  $fullText .= '#' . $fragment;
206  }
207 
208  try {
209  $parts = $this->splitTitleString( $fullText );
210  } catch ( MalformedTitleException $e ) {
211  return null;
212  }
213 
214  return new TitleValue(
215  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
216  }
217 
225  public function getText( LinkTarget $title ) {
226  return $title->getText();
227  }
228 
237  public function getPrefixedText( LinkTarget $title ) {
238  if ( !isset( $title->prefixedText ) ) {
239  $title->prefixedText = $this->formatTitle(
240  $title->getNamespace(),
241  $title->getText(),
242  '',
243  $title->getInterwiki()
244  );
245  }
246 
247  return $title->prefixedText;
248  }
249 
256  public function getPrefixedDBkey( LinkTarget $target ) {
257  return strtr( $this->formatTitle(
258  $target->getNamespace(),
259  $target->getDBkey(),
260  '',
261  $target->getInterwiki()
262  ), ' ', '_' );
263  }
264 
272  public function getFullText( LinkTarget $title ) {
273  return $this->formatTitle(
274  $title->getNamespace(),
275  $title->getText(),
276  $title->getFragment(),
277  $title->getInterwiki()
278  );
279  }
280 
302  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
303  $dbkey = str_replace( ' ', '_', $text );
304 
305  # Initialisation
306  $parts = [
307  'interwiki' => '',
308  'local_interwiki' => false,
309  'fragment' => '',
310  'namespace' => $defaultNamespace,
311  'dbkey' => $dbkey,
312  ];
313 
314  # Strip Unicode bidi override characters.
315  # Sometimes they slip into cut-n-pasted page titles, where the
316  # override chars get included in list displays.
317  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
318 
319  # Clean up whitespace
320  # Note: use of the /u option on preg_replace here will cause
321  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
322  # conveniently disabling them.
323  $dbkey = preg_replace(
324  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
325  '_',
326  $dbkey
327  );
328  $dbkey = trim( $dbkey, '_' );
329 
330  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
331  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
332  throw new MalformedTitleException( 'title-invalid-utf8', $text );
333  }
334 
335  $parts['dbkey'] = $dbkey;
336 
337  # Initial colon indicates main namespace rather than specified default
338  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
339  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
340  $parts['namespace'] = NS_MAIN;
341  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
342  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
343  }
344 
345  if ( $dbkey == '' ) {
346  throw new MalformedTitleException( 'title-invalid-empty', $text );
347  }
348 
349  # Namespace or interwiki prefix
350  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
351  do {
352  $m = [];
353  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
354  $p = $m[1];
355  $ns = $this->language->getNsIndex( $p );
356  if ( $ns !== false ) {
357  # Ordinary namespace
358  $dbkey = $m[2];
359  $parts['namespace'] = $ns;
360  # For Talk:X pages, check if X has a "namespace" prefix
361  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
362  if ( $this->language->getNsIndex( $x[1] ) ) {
363  # Disallow Talk:File:x type titles...
364  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
365  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
366  # Disallow Talk:Interwiki:x type titles...
367  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
368  }
369  }
370  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
371  # Interwiki link
372  $dbkey = $m[2];
373  $parts['interwiki'] = $this->language->lc( $p );
374 
375  # Redundant interwiki prefix to the local wiki
376  foreach ( $this->localInterwikis as $localIW ) {
377  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
378  if ( $dbkey == '' ) {
379  # Empty self-links should point to the Main Page, to ensure
380  # compatibility with cross-wiki transclusions and the like.
381  $mainPage = Title::newMainPage();
382  return [
383  'interwiki' => $mainPage->getInterwiki(),
384  'local_interwiki' => true,
385  'fragment' => $mainPage->getFragment(),
386  'namespace' => $mainPage->getNamespace(),
387  'dbkey' => $mainPage->getDBkey(),
388  ];
389  }
390  $parts['interwiki'] = '';
391  # local interwikis should behave like initial-colon links
392  $parts['local_interwiki'] = true;
393 
394  # Do another namespace split...
395  continue 2;
396  }
397  }
398 
399  # If there's an initial colon after the interwiki, that also
400  # resets the default namespace
401  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
402  $parts['namespace'] = NS_MAIN;
403  $dbkey = substr( $dbkey, 1 );
404  $dbkey = trim( $dbkey, '_' );
405  }
406  }
407  # If there's no recognized interwiki or namespace,
408  # then let the colon expression be part of the title.
409  }
410  break;
411  } while ( true );
412 
413  $fragment = strstr( $dbkey, '#' );
414  if ( $fragment !== false ) {
415  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
416  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
417  # remove whitespace again: prevents "Foo_bar_#"
418  # becoming "Foo_bar_"
419  $dbkey = preg_replace( '/_*$/', '', $dbkey );
420  }
421 
422  # Reject illegal characters.
423  $rxTc = self::getTitleInvalidRegex();
424  $matches = [];
425  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
426  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
427  }
428 
429  # Pages with "/./" or "/../" appearing in the URLs will often be un-
430  # reachable due to the way web browsers deal with 'relative' URLs.
431  # Also, they conflict with subpage syntax. Forbid them explicitly.
432  if (
433  strpos( $dbkey, '.' ) !== false &&
434  (
435  $dbkey === '.' || $dbkey === '..' ||
436  strpos( $dbkey, './' ) === 0 ||
437  strpos( $dbkey, '../' ) === 0 ||
438  strpos( $dbkey, '/./' ) !== false ||
439  strpos( $dbkey, '/../' ) !== false ||
440  substr( $dbkey, -2 ) == '/.' ||
441  substr( $dbkey, -3 ) == '/..'
442  )
443  ) {
444  throw new MalformedTitleException( 'title-invalid-relative', $text );
445  }
446 
447  # Magic tilde sequences? Nu-uh!
448  if ( strpos( $dbkey, '~~~' ) !== false ) {
449  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
450  }
451 
452  # Limit the size of titles to 255 bytes. This is typically the size of the
453  # underlying database field. We make an exception for special pages, which
454  # don't need to be stored in the database, and may edge over 255 bytes due
455  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
456  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
457  if ( strlen( $dbkey ) > $maxLength ) {
458  throw new MalformedTitleException( 'title-invalid-too-long', $text,
459  [ Message::numParam( $maxLength ) ] );
460  }
461 
462  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
463  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
464  # other site might be case-sensitive.
465  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
466  $dbkey = $this->language->ucfirst( $dbkey );
467  }
468 
469  # Can't make a link to a namespace alone... "empty" local links can only be
470  # self-links with a fragment identifier.
471  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
472  throw new MalformedTitleException( 'title-invalid-empty', $text );
473  }
474 
475  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
476  // IP names are not allowed for accounts, and can only be referring to
477  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
478  // there are numerous ways to present the same IP. Having sp:contribs scan
479  // them all is silly and having some show the edits and others not is
480  // inconsistent. Same for talk/userpages. Keep them normalized instead.
481  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
482  $dbkey = IPUtils::sanitizeIP( $dbkey );
483  }
484 
485  // Any remaining initial :s are illegal.
486  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
487  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
488  }
489 
490  // Fill fields
491  $parts['dbkey'] = $dbkey;
492 
493  // Sanity check to ensure that the return value can be used to construct a TitleValue.
494  // All issues should in theory be caught above, this is here to enforce consistency.
495  try {
497  $parts['namespace'],
498  $parts['dbkey'],
499  $parts['fragment'],
500  $parts['interwiki']
501  );
502  } catch ( InvalidArgumentException $ex ) {
503  throw new MalformedTitleException( 'title-invalid', $text, [ $ex->getMessage() ] );
504  }
505 
506  return $parts;
507  }
508 
518  public static function getTitleInvalidRegex() {
519  static $rxTc = false;
520  if ( !$rxTc ) {
521  # Matching titles will be held as illegal.
522  $rxTc = '/' .
523  # Any character not allowed is forbidden...
524  '[^' . Title::legalChars() . ']' .
525  # URL percent encoding sequences interfere with the ability
526  # to round-trip titles -- you can't link to them consistently.
527  '|%[0-9A-Fa-f]{2}' .
528  # XML/HTML character references produce similar issues.
529  '|&[A-Za-z0-9\x80-\xff]+;' .
530  '|&#[0-9]+;' .
531  '|&#x[0-9A-Fa-f]+;' .
532  '/S';
533  }
534 
535  return $rxTc;
536  }
537 }
MediaWikiTitleCodec\getPrefixedText
getPrefixedText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:237
MediaWiki\Linker\LinkTarget\getInterwiki
getInterwiki()
The interwiki component of this LinkTarget.
MediaWikiTitleCodec
A codec for MediaWiki page titles.
Definition: MediaWikiTitleCodec.php:39
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:130
MediaWikiTitleCodec\getTitleInvalidRegex
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
Definition: MediaWikiTitleCodec.php:518
GenderCache
Caches user genders when needed to use correct namespace aliases.
Definition: GenderCache.php:34
MediaWikiTitleCodec\splitTitleString
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
Definition: MediaWikiTitleCodec.php:302
MediaWikiTitleCodec\makeTitleValueSafe
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
Definition: MediaWikiTitleCodec.php:194
Title\newMainPage
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:646
MediaWikiTitleCodec\getNamespaceName
getNamespaceName( $namespace, $text)
Definition: MediaWikiTitleCodec.php:101
MediaWikiTitleCodec\parseTitle
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
Definition: MediaWikiTitleCodec.php:167
MediaWikiTitleCodec\getFullText
getFullText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:272
NS_MAIN
const NS_MAIN
Definition: Defines.php:60
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:49
MediaWiki\Linker\LinkTarget\getNamespace
getNamespace()
Get the namespace index.
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1044
MediaWikiTitleCodec\__construct
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
Definition: MediaWikiTitleCodec.php:73
$matches
$matches
Definition: NoLocalSettings.php:24
MediaWiki\Interwiki\InterwikiLookup
Service interface for looking up Interwiki records.
Definition: InterwikiLookup.php:32
MediaWikiTitleCodec\formatTitle
formatTitle( $namespace, $text, $fragment='', $interwiki='')
Definition: MediaWikiTitleCodec.php:131
MediaWikiTitleCodec\$localInterwikis
string[] $localInterwikis
Definition: MediaWikiTitleCodec.php:53
$title
$title
Definition: testCompression.php:36
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
TitleValue\assertValidSpec
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Asserts that the given parameters could be used to construct a TitleValue object.
Definition: TitleValue.php:142
NS_USER_TALK
const NS_USER_TALK
Definition: Defines.php:63
MediaWikiTitleCodec\$interwikiLookup
InterwikiLookup $interwikiLookup
Definition: MediaWikiTitleCodec.php:58
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
MediaWikiTitleCodec\$genderCache
GenderCache $genderCache
Definition: MediaWikiTitleCodec.php:48
MediaWikiTitleCodec\getText
getText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:225
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:25
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:34
MediaWikiTitleCodec\getPrefixedDBkey
getPrefixedDBkey(LinkTarget $target)
Definition: MediaWikiTitleCodec.php:256
NS_USER
const NS_USER
Definition: Defines.php:62
NS_TALK
const NS_TALK
Definition: Defines.php:61
MediaWikiTitleCodec\$nsInfo
NamespaceInfo $nsInfo
Definition: MediaWikiTitleCodec.php:63
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:33
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:692
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Language
Internationalisation code.
Definition: Language.php:39
MediaWikiTitleCodec\$language
Language $language
Definition: MediaWikiTitleCodec.php:43
TitleValue
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:37