MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
74  NamespaceInfo $nsInfo = null
75  ) {
76  if ( !$interwikiLookup ) {
77  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
78  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
79  }
80  if ( !$nsInfo ) {
81  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
82  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
83  }
84  $this->language = $language;
85  $this->genderCache = $genderCache;
86  $this->localInterwikis = (array)$localInterwikis;
87  $this->interwikiLookup = $interwikiLookup;
88  $this->nsInfo = $nsInfo;
89  }
90 
100  public function getNamespaceName( $namespace, $text ) {
101  if ( $this->language->needsGenderDistinction() &&
102  $this->nsInfo->hasGenderDistinction( $namespace )
103  ) {
104  // NOTE: we are assuming here that the title text is a user name!
105  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
106  $name = $this->language->getGenderNsText( $namespace, $gender );
107  } else {
108  $name = $this->language->getNsText( $namespace );
109  }
110 
111  if ( $name === false ) {
112  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
113  }
114 
115  return $name;
116  }
117 
130  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
131  $out = '';
132  if ( $interwiki !== '' ) {
133  $out = $interwiki . ':';
134  }
135 
136  if ( $namespace != 0 ) {
137  try {
138  $nsName = $this->getNamespaceName( $namespace, $text );
139  } catch ( InvalidArgumentException $e ) {
140  // See T165149. Awkward, but better than erroneously linking to the main namespace.
141  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
142  }
143 
144  $out .= $nsName . ':';
145  }
146  $out .= $text;
147 
148  if ( $fragment !== '' ) {
149  $out .= '#' . $fragment;
150  }
151 
152  $out = str_replace( '_', ' ', $out );
153 
154  return $out;
155  }
156 
166  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
167  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
168  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
169 
170  // NOTE: this is an ugly kludge that allows this class to share the
171  // code for parsing with the old Title class. The parser code should
172  // be refactored to avoid this.
173  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
174 
175  return new TitleValue(
176  $parts['namespace'],
177  $parts['dbkey'],
178  $parts['fragment'],
179  $parts['interwiki']
180  );
181  }
182 
193  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
194  if ( !$this->nsInfo->exists( $namespace ) ) {
195  return null;
196  }
197 
198  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
199  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
200  if ( strval( $interwiki ) != '' ) {
201  $fullText = "$interwiki:$fullText";
202  }
203  if ( strval( $fragment ) != '' ) {
204  $fullText .= '#' . $fragment;
205  }
206 
207  try {
208  $parts = $this->splitTitleString( $fullText );
209  } catch ( MalformedTitleException $e ) {
210  return null;
211  }
212 
213  return new TitleValue(
214  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
215  }
216 
224  public function getText( LinkTarget $title ) {
225  return $title->getText();
226  }
227 
236  public function getPrefixedText( LinkTarget $title ) {
237  if ( !isset( $title->prefixedText ) ) {
238  $title->prefixedText = $this->formatTitle(
239  $title->getNamespace(),
240  $title->getText(),
241  '',
242  $title->getInterwiki()
243  );
244  }
245 
246  return $title->prefixedText;
247  }
248 
255  public function getPrefixedDBkey( LinkTarget $target ) {
256  return strtr( $this->formatTitle(
257  $target->getNamespace(),
258  $target->getDBkey(),
259  '',
260  $target->getInterwiki()
261  ), ' ', '_' );
262  }
263 
271  public function getFullText( LinkTarget $title ) {
272  return $this->formatTitle(
273  $title->getNamespace(),
274  $title->getText(),
275  $title->getFragment(),
276  $title->getInterwiki()
277  );
278  }
279 
301  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
302  $dbkey = str_replace( ' ', '_', $text );
303 
304  # Initialisation
305  $parts = [
306  'interwiki' => '',
307  'local_interwiki' => false,
308  'fragment' => '',
309  'namespace' => $defaultNamespace,
310  'dbkey' => $dbkey,
311  'user_case_dbkey' => $dbkey,
312  ];
313 
314  # Strip Unicode bidi override characters.
315  # Sometimes they slip into cut-n-pasted page titles, where the
316  # override chars get included in list displays.
317  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
318 
319  # Clean up whitespace
320  # Note: use of the /u option on preg_replace here will cause
321  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
322  # conveniently disabling them.
323  $dbkey = preg_replace(
324  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
325  '_',
326  $dbkey
327  );
328  $dbkey = trim( $dbkey, '_' );
329 
330  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
331  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
332  throw new MalformedTitleException( 'title-invalid-utf8', $text );
333  }
334 
335  $parts['dbkey'] = $dbkey;
336 
337  # Initial colon indicates main namespace rather than specified default
338  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
339  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
340  $parts['namespace'] = NS_MAIN;
341  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
342  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
343  }
344 
345  if ( $dbkey == '' ) {
346  throw new MalformedTitleException( 'title-invalid-empty', $text );
347  }
348 
349  # Namespace or interwiki prefix
350  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
351  do {
352  $m = [];
353  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
354  $p = $m[1];
355  $ns = $this->language->getNsIndex( $p );
356  if ( $ns !== false ) {
357  # Ordinary namespace
358  $dbkey = $m[2];
359  $parts['namespace'] = $ns;
360  # For Talk:X pages, check if X has a "namespace" prefix
361  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
362  if ( $this->language->getNsIndex( $x[1] ) ) {
363  # Disallow Talk:File:x type titles...
364  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
365  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
366  # Disallow Talk:Interwiki:x type titles...
367  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
368  }
369  }
370  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
371  # Interwiki link
372  $dbkey = $m[2];
373  $parts['interwiki'] = $this->language->lc( $p );
374 
375  # Redundant interwiki prefix to the local wiki
376  foreach ( $this->localInterwikis as $localIW ) {
377  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
378  if ( $dbkey == '' ) {
379  # Empty self-links should point to the Main Page, to ensure
380  # compatibility with cross-wiki transclusions and the like.
381  $mainPage = Title::newMainPage();
382  return [
383  'interwiki' => $mainPage->getInterwiki(),
384  'local_interwiki' => true,
385  'fragment' => $mainPage->getFragment(),
386  'namespace' => $mainPage->getNamespace(),
387  'dbkey' => $mainPage->getDBkey(),
388  'user_case_dbkey' => $mainPage->getUserCaseDBKey()
389  ];
390  }
391  $parts['interwiki'] = '';
392  # local interwikis should behave like initial-colon links
393  $parts['local_interwiki'] = true;
394 
395  # Do another namespace split...
396  continue 2;
397  }
398  }
399 
400  # If there's an initial colon after the interwiki, that also
401  # resets the default namespace
402  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
403  $parts['namespace'] = NS_MAIN;
404  $dbkey = substr( $dbkey, 1 );
405  $dbkey = trim( $dbkey, '_' );
406  }
407  }
408  # If there's no recognized interwiki or namespace,
409  # then let the colon expression be part of the title.
410  }
411  break;
412  } while ( true );
413 
414  $fragment = strstr( $dbkey, '#' );
415  if ( $fragment !== false ) {
416  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
417  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
418  # remove whitespace again: prevents "Foo_bar_#"
419  # becoming "Foo_bar_"
420  $dbkey = preg_replace( '/_*$/', '', $dbkey );
421  }
422 
423  # Reject illegal characters.
424  $rxTc = self::getTitleInvalidRegex();
425  $matches = [];
426  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
427  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
428  }
429 
430  # Pages with "/./" or "/../" appearing in the URLs will often be un-
431  # reachable due to the way web browsers deal with 'relative' URLs.
432  # Also, they conflict with subpage syntax. Forbid them explicitly.
433  if (
434  strpos( $dbkey, '.' ) !== false &&
435  (
436  $dbkey === '.' || $dbkey === '..' ||
437  strpos( $dbkey, './' ) === 0 ||
438  strpos( $dbkey, '../' ) === 0 ||
439  strpos( $dbkey, '/./' ) !== false ||
440  strpos( $dbkey, '/../' ) !== false ||
441  substr( $dbkey, -2 ) == '/.' ||
442  substr( $dbkey, -3 ) == '/..'
443  )
444  ) {
445  throw new MalformedTitleException( 'title-invalid-relative', $text );
446  }
447 
448  # Magic tilde sequences? Nu-uh!
449  if ( strpos( $dbkey, '~~~' ) !== false ) {
450  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
451  }
452 
453  # Limit the size of titles to 255 bytes. This is typically the size of the
454  # underlying database field. We make an exception for special pages, which
455  # don't need to be stored in the database, and may edge over 255 bytes due
456  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
457  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
458  if ( strlen( $dbkey ) > $maxLength ) {
459  throw new MalformedTitleException( 'title-invalid-too-long', $text,
460  [ Message::numParam( $maxLength ) ] );
461  }
462 
463  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
464  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
465  # other site might be case-sensitive.
466  $parts['user_case_dbkey'] = $dbkey;
467  if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
468  $dbkey = $this->language->ucfirst( $dbkey );
469  }
470 
471  # Can't make a link to a namespace alone... "empty" local links can only be
472  # self-links with a fragment identifier.
473  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
474  throw new MalformedTitleException( 'title-invalid-empty', $text );
475  }
476 
477  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
478  // IP names are not allowed for accounts, and can only be referring to
479  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
480  // there are numerous ways to present the same IP. Having sp:contribs scan
481  // them all is silly and having some show the edits and others not is
482  // inconsistent. Same for talk/userpages. Keep them normalized instead.
483  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
484  $dbkey = IP::sanitizeIP( $dbkey );
485  }
486 
487  // Any remaining initial :s are illegal.
488  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
489  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
490  }
491 
492  // Fill fields
493  $parts['dbkey'] = $dbkey;
494 
495  // Sanity check to ensure that the return value can be used to construct a TitleValue.
496  // All issues should in theory be caught above, this is here to enforce consistency.
497  try {
499  $parts['namespace'],
500  $parts['dbkey'],
501  $parts['fragment'],
502  $parts['interwiki']
503  );
504  } catch ( InvalidArgumentException $ex ) {
505  throw new MalformedTitleException( 'title-invalid', $text, [ $ex->getMessage() ] );
506  }
507 
508  return $parts;
509  }
510 
520  public static function getTitleInvalidRegex() {
521  static $rxTc = false;
522  if ( !$rxTc ) {
523  # Matching titles will be held as illegal.
524  $rxTc = '/' .
525  # Any character not allowed is forbidden...
526  '[^' . Title::legalChars() . ']' .
527  # URL percent encoding sequences interfere with the ability
528  # to round-trip titles -- you can't link to them consistently.
529  '|%[0-9A-Fa-f]{2}' .
530  # XML/HTML character references produce similar issues.
531  '|&[A-Za-z0-9\x80-\xff]+;' .
532  '|&#[0-9]+;' .
533  '|&#x[0-9A-Fa-f]+;' .
534  '/S';
535  }
536 
537  return $rxTc;
538  }
539 }
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
const NS_MAIN
Definition: Defines.php:60
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:648
getText(LinkTarget $title)
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:36
const NS_SPECIAL
Definition: Defines.php:49
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Asserts that the given parameters could be used to construct a TitleValue object. ...
Definition: TitleValue.php:110
static numParam( $num)
Definition: Message.php:1038
getNamespace()
Get the namespace index.
getFragment()
Get the link fragment (i.e.
static sanitizeIP( $ip)
Convert an IP into a verbose, uppercase, normalized form.
Definition: IP.php:139
getNamespaceName( $namespace, $text)
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
getDBkey()
Get the main part with underscores.
getPrefixedText(LinkTarget $title)
Service interface for looking up Interwiki records.
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
InterwikiLookup $interwikiLookup
getFullText(LinkTarget $title)
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getInterwiki()
The interwiki component of this LinkTarget.
getText()
Returns the link in text form, without namespace prefix or fragment.
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:694
const NS_TALK
Definition: Defines.php:61
const NS_USER_TALK
Definition: Defines.php:63
getPrefixedDBkey(LinkTarget $target)
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1686
$matches