MediaWiki  1.34.0
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
74  NamespaceInfo $nsInfo = null
75  ) {
76  if ( !$interwikiLookup ) {
77  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
78  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
79  }
80  if ( !$nsInfo ) {
81  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
82  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
83  }
84  $this->language = $language;
85  $this->genderCache = $genderCache;
86  $this->localInterwikis = (array)$localInterwikis;
87  $this->interwikiLookup = $interwikiLookup;
88  $this->nsInfo = $nsInfo;
89  }
90 
100  public function getNamespaceName( $namespace, $text ) {
101  if ( $this->language->needsGenderDistinction() &&
102  $this->nsInfo->hasGenderDistinction( $namespace )
103  ) {
104  // NOTE: we are assuming here that the title text is a user name!
105  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
106  $name = $this->language->getGenderNsText( $namespace, $gender );
107  } else {
108  $name = $this->language->getNsText( $namespace );
109  }
110 
111  if ( $name === false ) {
112  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
113  }
114 
115  return $name;
116  }
117 
130  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
131  $out = '';
132  if ( $interwiki !== '' ) {
133  $out = $interwiki . ':';
134  }
135 
136  if ( $namespace != 0 ) {
137  try {
138  $nsName = $this->getNamespaceName( $namespace, $text );
139  } catch ( InvalidArgumentException $e ) {
140  // See T165149. Awkward, but better than erroneously linking to the main namespace.
141  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
142  }
143 
144  $out .= $nsName . ':';
145  }
146  $out .= $text;
147 
148  if ( $fragment !== '' ) {
149  $out .= '#' . $fragment;
150  }
151 
152  $out = str_replace( '_', ' ', $out );
153 
154  return $out;
155  }
156 
166  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
167  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
168  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
169 
170  // NOTE: this is an ugly cludge that allows this class to share the
171  // code for parsing with the old Title class. The parser code should
172  // be refactored to avoid this.
173  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
174 
175  // Fragment-only is okay, but only with no namespace
176  if ( $parts['dbkey'] === '' &&
177  ( $parts['fragment'] === '' || $parts['namespace'] !== NS_MAIN ) ) {
178  throw new MalformedTitleException( 'title-invalid-empty', $text );
179  }
180 
181  return new TitleValue(
182  $parts['namespace'],
183  $parts['dbkey'],
184  $parts['fragment'],
185  $parts['interwiki']
186  );
187  }
188 
199  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
200  if ( !$this->nsInfo->exists( $namespace ) ) {
201  return null;
202  }
203 
204  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
205  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
206  if ( strval( $interwiki ) != '' ) {
207  $fullText = "$interwiki:$fullText";
208  }
209  if ( strval( $fragment ) != '' ) {
210  $fullText .= '#' . $fragment;
211  }
212 
213  try {
214  $parts = $this->splitTitleString( $fullText );
215  } catch ( MalformedTitleException $e ) {
216  return null;
217  }
218 
219  return new TitleValue(
220  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
221  }
222 
230  public function getText( LinkTarget $title ) {
231  return $title->getText();
232  }
233 
242  public function getPrefixedText( LinkTarget $title ) {
243  if ( !isset( $title->prefixedText ) ) {
244  $title->prefixedText = $this->formatTitle(
245  $title->getNamespace(),
246  $title->getText(),
247  '',
248  $title->getInterwiki()
249  );
250  }
251 
252  return $title->prefixedText;
253  }
254 
261  public function getPrefixedDBkey( LinkTarget $target ) {
262  return strtr( $this->formatTitle(
263  $target->getNamespace(),
264  $target->getDBkey(),
265  '',
266  $target->getInterwiki()
267  ), ' ', '_' );
268  }
269 
277  public function getFullText( LinkTarget $title ) {
278  return $this->formatTitle(
279  $title->getNamespace(),
280  $title->getText(),
281  $title->getFragment(),
282  $title->getInterwiki()
283  );
284  }
285 
306  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
307  $dbkey = str_replace( ' ', '_', $text );
308 
309  # Initialisation
310  $parts = [
311  'interwiki' => '',
312  'local_interwiki' => false,
313  'fragment' => '',
314  'namespace' => $defaultNamespace,
315  'dbkey' => $dbkey,
316  'user_case_dbkey' => $dbkey,
317  ];
318 
319  # Strip Unicode bidi override characters.
320  # Sometimes they slip into cut-n-pasted page titles, where the
321  # override chars get included in list displays.
322  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
323 
324  # Clean up whitespace
325  # Note: use of the /u option on preg_replace here will cause
326  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
327  # conveniently disabling them.
328  $dbkey = preg_replace(
329  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
330  '_',
331  $dbkey
332  );
333  $dbkey = trim( $dbkey, '_' );
334 
335  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
336  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
337  throw new MalformedTitleException( 'title-invalid-utf8', $text );
338  }
339 
340  $parts['dbkey'] = $dbkey;
341 
342  # Initial colon indicates main namespace rather than specified default
343  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
344  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
345  $parts['namespace'] = NS_MAIN;
346  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
347  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
348  }
349 
350  if ( $dbkey == '' ) {
351  throw new MalformedTitleException( 'title-invalid-empty', $text );
352  }
353 
354  # Namespace or interwiki prefix
355  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
356  do {
357  $m = [];
358  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
359  $p = $m[1];
360  $ns = $this->language->getNsIndex( $p );
361  if ( $ns !== false ) {
362  # Ordinary namespace
363  $dbkey = $m[2];
364  $parts['namespace'] = $ns;
365  # For Talk:X pages, check if X has a "namespace" prefix
366  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
367  if ( $this->language->getNsIndex( $x[1] ) ) {
368  # Disallow Talk:File:x type titles...
369  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
370  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
371  # Disallow Talk:Interwiki:x type titles...
372  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
373  }
374  }
375  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
376  # Interwiki link
377  $dbkey = $m[2];
378  $parts['interwiki'] = $this->language->lc( $p );
379 
380  # Redundant interwiki prefix to the local wiki
381  foreach ( $this->localInterwikis as $localIW ) {
382  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
383  if ( $dbkey == '' ) {
384  # Empty self-links should point to the Main Page, to ensure
385  # compatibility with cross-wiki transclusions and the like.
386  $mainPage = Title::newMainPage();
387  return [
388  'interwiki' => $mainPage->getInterwiki(),
389  'local_interwiki' => true,
390  'fragment' => $mainPage->getFragment(),
391  'namespace' => $mainPage->getNamespace(),
392  'dbkey' => $mainPage->getDBkey(),
393  'user_case_dbkey' => $mainPage->getUserCaseDBKey()
394  ];
395  }
396  $parts['interwiki'] = '';
397  # local interwikis should behave like initial-colon links
398  $parts['local_interwiki'] = true;
399 
400  # Do another namespace split...
401  continue 2;
402  }
403  }
404 
405  # If there's an initial colon after the interwiki, that also
406  # resets the default namespace
407  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
408  $parts['namespace'] = NS_MAIN;
409  $dbkey = substr( $dbkey, 1 );
410  $dbkey = trim( $dbkey, '_' );
411  }
412  }
413  # If there's no recognized interwiki or namespace,
414  # then let the colon expression be part of the title.
415  }
416  break;
417  } while ( true );
418 
419  $fragment = strstr( $dbkey, '#' );
420  if ( $fragment !== false ) {
421  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
422  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
423  # remove whitespace again: prevents "Foo_bar_#"
424  # becoming "Foo_bar_"
425  $dbkey = preg_replace( '/_*$/', '', $dbkey );
426  }
427 
428  # Reject illegal characters.
429  $rxTc = self::getTitleInvalidRegex();
430  $matches = [];
431  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
432  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
433  }
434 
435  # Pages with "/./" or "/../" appearing in the URLs will often be un-
436  # reachable due to the way web browsers deal with 'relative' URLs.
437  # Also, they conflict with subpage syntax. Forbid them explicitly.
438  if (
439  strpos( $dbkey, '.' ) !== false &&
440  (
441  $dbkey === '.' || $dbkey === '..' ||
442  strpos( $dbkey, './' ) === 0 ||
443  strpos( $dbkey, '../' ) === 0 ||
444  strpos( $dbkey, '/./' ) !== false ||
445  strpos( $dbkey, '/../' ) !== false ||
446  substr( $dbkey, -2 ) == '/.' ||
447  substr( $dbkey, -3 ) == '/..'
448  )
449  ) {
450  throw new MalformedTitleException( 'title-invalid-relative', $text );
451  }
452 
453  # Magic tilde sequences? Nu-uh!
454  if ( strpos( $dbkey, '~~~' ) !== false ) {
455  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
456  }
457 
458  # Limit the size of titles to 255 bytes. This is typically the size of the
459  # underlying database field. We make an exception for special pages, which
460  # don't need to be stored in the database, and may edge over 255 bytes due
461  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
462  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
463  if ( strlen( $dbkey ) > $maxLength ) {
464  throw new MalformedTitleException( 'title-invalid-too-long', $text,
465  [ Message::numParam( $maxLength ) ] );
466  }
467 
468  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
469  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
470  # other site might be case-sensitive.
471  $parts['user_case_dbkey'] = $dbkey;
472  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
473  $dbkey = $this->language->ucfirst( $dbkey );
474  }
475 
476  # Can't make a link to a namespace alone... "empty" local links can only be
477  # self-links with a fragment identifier.
478  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
479  throw new MalformedTitleException( 'title-invalid-empty', $text );
480  }
481 
482  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
483  // IP names are not allowed for accounts, and can only be referring to
484  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
485  // there are numerous ways to present the same IP. Having sp:contribs scan
486  // them all is silly and having some show the edits and others not is
487  // inconsistent. Same for talk/userpages. Keep them normalized instead.
488  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
489  $dbkey = IP::sanitizeIP( $dbkey );
490  }
491 
492  // Any remaining initial :s are illegal.
493  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
494  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
495  }
496 
497  # Fill fields
498  $parts['dbkey'] = $dbkey;
499 
500  return $parts;
501  }
502 
512  public static function getTitleInvalidRegex() {
513  static $rxTc = false;
514  if ( !$rxTc ) {
515  # Matching titles will be held as illegal.
516  $rxTc = '/' .
517  # Any character not allowed is forbidden...
518  '[^' . Title::legalChars() . ']' .
519  # URL percent encoding sequences interfere with the ability
520  # to round-trip titles -- you can't link to them consistently.
521  '|%[0-9A-Fa-f]{2}' .
522  # XML/HTML character references produce similar issues.
523  '|&[A-Za-z0-9\x80-\xff]+;' .
524  '|&#[0-9]+;' .
525  '|&#x[0-9A-Fa-f]+;' .
526  '/S';
527  }
528 
529  return $rxTc;
530  }
531 }
MediaWikiTitleCodec\getPrefixedText
getPrefixedText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:242
MediaWiki\Linker\LinkTarget\getInterwiki
getInterwiki()
The interwiki component of this LinkTarget.
MediaWikiTitleCodec
A codec for MediaWiki page titles.
Definition: MediaWikiTitleCodec.php:38
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
MediaWikiTitleCodec\getTitleInvalidRegex
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
Definition: MediaWikiTitleCodec.php:512
GenderCache
Caches user genders when needed to use correct namespace aliases.
Definition: GenderCache.php:33
MediaWikiTitleCodec\splitTitleString
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Normalizes and splits a title string.
Definition: MediaWikiTitleCodec.php:306
MediaWikiTitleCodec\makeTitleValueSafe
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
Definition: MediaWikiTitleCodec.php:199
Title\newMainPage
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:649
MediaWikiTitleCodec\getNamespaceName
getNamespaceName( $namespace, $text)
Definition: MediaWikiTitleCodec.php:100
MediaWikiTitleCodec\parseTitle
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
Definition: MediaWikiTitleCodec.php:166
MediaWikiTitleCodec\getFullText
getFullText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:277
NS_MAIN
const NS_MAIN
Definition: Defines.php:60
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:49
MediaWiki\Linker\LinkTarget\getNamespace
getNamespace()
Get the namespace index.
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1044
MediaWikiTitleCodec\__construct
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
Definition: MediaWikiTitleCodec.php:72
$matches
$matches
Definition: NoLocalSettings.php:24
MediaWiki\Interwiki\InterwikiLookup
Service interface for looking up Interwiki records.
Definition: InterwikiLookup.php:32
MediaWikiTitleCodec\formatTitle
formatTitle( $namespace, $text, $fragment='', $interwiki='')
Definition: MediaWikiTitleCodec.php:130
MediaWikiTitleCodec\$localInterwikis
string[] $localInterwikis
Definition: MediaWikiTitleCodec.php:52
$title
$title
Definition: testCompression.php:34
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
NS_USER_TALK
const NS_USER_TALK
Definition: Defines.php:63
MediaWikiTitleCodec\$interwikiLookup
InterwikiLookup $interwikiLookup
Definition: MediaWikiTitleCodec.php:57
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
MediaWikiTitleCodec\$genderCache
GenderCache $genderCache
Definition: MediaWikiTitleCodec.php:47
IP\sanitizeIP
static sanitizeIP( $ip)
Convert an IP into a verbose, uppercase, normalized form.
Definition: IP.php:139
MediaWikiTitleCodec\getText
getText(LinkTarget $title)
Definition: MediaWikiTitleCodec.php:230
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:25
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:34
MediaWikiTitleCodec\getPrefixedDBkey
getPrefixedDBkey(LinkTarget $target)
Definition: MediaWikiTitleCodec.php:261
NS_USER
const NS_USER
Definition: Defines.php:62
NS_TALK
const NS_TALK
Definition: Defines.php:61
MediaWikiTitleCodec\$nsInfo
NamespaceInfo $nsInfo
Definition: MediaWikiTitleCodec.php:62
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:33
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:695
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Language
Internationalisation code.
Definition: Language.php:37
MediaWikiTitleCodec\$language
Language $language
Definition: MediaWikiTitleCodec.php:42
TitleValue
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:36