MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
74  ) {
75  if ( !$interwikiLookup ) {
76  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
77  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
78  }
79  if ( !$nsInfo ) {
80  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
81  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
82  }
83  $this->language = $language;
84  $this->genderCache = $genderCache;
85  $this->localInterwikis = (array)$localInterwikis;
86  $this->interwikiLookup = $interwikiLookup;
87  $this->nsInfo = $nsInfo;
88  }
89 
99  public function getNamespaceName( $namespace, $text ) {
100  if ( $this->language->needsGenderDistinction() &&
101  $this->nsInfo->hasGenderDistinction( $namespace )
102  ) {
103  // NOTE: we are assuming here that the title text is a user name!
104  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
105  $name = $this->language->getGenderNsText( $namespace, $gender );
106  } else {
107  $name = $this->language->getNsText( $namespace );
108  }
109 
110  if ( $name === false ) {
111  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
112  }
113 
114  return $name;
115  }
116 
129  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
130  $out = '';
131  if ( $interwiki !== '' ) {
132  $out = $interwiki . ':';
133  }
134 
135  if ( $namespace != 0 ) {
136  try {
137  $nsName = $this->getNamespaceName( $namespace, $text );
138  } catch ( InvalidArgumentException $e ) {
139  // See T165149. Awkward, but better than erroneously linking to the main namespace.
140  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
141  }
142 
143  $out .= $nsName . ':';
144  }
145  $out .= $text;
146 
147  if ( $fragment !== '' ) {
148  $out .= '#' . $fragment;
149  }
150 
151  $out = str_replace( '_', ' ', $out );
152 
153  return $out;
154  }
155 
165  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
166  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
167  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
168 
169  // NOTE: this is an ugly cludge that allows this class to share the
170  // code for parsing with the old Title class. The parser code should
171  // be refactored to avoid this.
172  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
173 
174  // Fragment-only is okay, but only with no namespace
175  if ( $parts['dbkey'] === '' &&
176  ( $parts['fragment'] === '' || $parts['namespace'] !== NS_MAIN ) ) {
177  throw new MalformedTitleException( 'title-invalid-empty', $text );
178  }
179 
180  return new TitleValue(
181  $parts['namespace'],
182  $parts['dbkey'],
183  $parts['fragment'],
184  $parts['interwiki']
185  );
186  }
187 
198  public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
199  if ( !$this->nsInfo->exists( $namespace ) ) {
200  return null;
201  }
202 
203  $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
204  $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
205  if ( strval( $interwiki ) != '' ) {
206  $fullText = "$interwiki:$fullText";
207  }
208  if ( strval( $fragment ) != '' ) {
209  $fullText .= '#' . $fragment;
210  }
211 
212  try {
213  $parts = $this->splitTitleString( $fullText );
214  } catch ( MalformedTitleException $e ) {
215  return null;
216  }
217 
218  return new TitleValue(
219  $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
220  }
221 
229  public function getText( LinkTarget $title ) {
230  return $title->getText();
231  }
232 
240  public function getPrefixedText( LinkTarget $title ) {
241  if ( !isset( $title->prefixedText ) ) {
242  $title->prefixedText = $this->formatTitle(
243  $title->getNamespace(),
244  $title->getText(),
245  '',
246  $title->getInterwiki()
247  );
248  }
249 
250  return $title->prefixedText;
251  }
252 
259  public function getPrefixedDBkey( LinkTarget $target ) {
260  return strtr( $this->formatTitle(
261  $target->getNamespace(),
262  $target->getDBkey(),
263  '',
264  $target->getInterwiki()
265  ), ' ', '_' );
266  }
267 
275  public function getFullText( LinkTarget $title ) {
276  return $this->formatTitle(
277  $title->getNamespace(),
278  $title->getText(),
279  $title->getFragment(),
280  $title->getInterwiki()
281  );
282  }
283 
304  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
305  $dbkey = str_replace( ' ', '_', $text );
306 
307  # Initialisation
308  $parts = [
309  'interwiki' => '',
310  'local_interwiki' => false,
311  'fragment' => '',
312  'namespace' => $defaultNamespace,
313  'dbkey' => $dbkey,
314  'user_case_dbkey' => $dbkey,
315  ];
316 
317  # Strip Unicode bidi override characters.
318  # Sometimes they slip into cut-n-pasted page titles, where the
319  # override chars get included in list displays.
320  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
321 
322  # Clean up whitespace
323  # Note: use of the /u option on preg_replace here will cause
324  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
325  # conveniently disabling them.
326  $dbkey = preg_replace(
327  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
328  '_',
329  $dbkey
330  );
331  $dbkey = trim( $dbkey, '_' );
332 
333  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
334  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
335  throw new MalformedTitleException( 'title-invalid-utf8', $text );
336  }
337 
338  $parts['dbkey'] = $dbkey;
339 
340  # Initial colon indicates main namespace rather than specified default
341  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
342  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
343  $parts['namespace'] = NS_MAIN;
344  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
345  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
346  }
347 
348  if ( $dbkey == '' ) {
349  throw new MalformedTitleException( 'title-invalid-empty', $text );
350  }
351 
352  # Namespace or interwiki prefix
353  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
354  do {
355  $m = [];
356  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
357  $p = $m[1];
358  $ns = $this->language->getNsIndex( $p );
359  if ( $ns !== false ) {
360  # Ordinary namespace
361  $dbkey = $m[2];
362  $parts['namespace'] = $ns;
363  # For Talk:X pages, check if X has a "namespace" prefix
364  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
365  if ( $this->language->getNsIndex( $x[1] ) ) {
366  # Disallow Talk:File:x type titles...
367  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
368  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
369  # Disallow Talk:Interwiki:x type titles...
370  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
371  }
372  }
373  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
374  # Interwiki link
375  $dbkey = $m[2];
376  $parts['interwiki'] = $this->language->lc( $p );
377 
378  # Redundant interwiki prefix to the local wiki
379  foreach ( $this->localInterwikis as $localIW ) {
380  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
381  if ( $dbkey == '' ) {
382  # Empty self-links should point to the Main Page, to ensure
383  # compatibility with cross-wiki transclusions and the like.
384  $mainPage = Title::newMainPage();
385  return [
386  'interwiki' => $mainPage->getInterwiki(),
387  'local_interwiki' => true,
388  'fragment' => $mainPage->getFragment(),
389  'namespace' => $mainPage->getNamespace(),
390  'dbkey' => $mainPage->getDBkey(),
391  'user_case_dbkey' => $mainPage->getUserCaseDBKey()
392  ];
393  }
394  $parts['interwiki'] = '';
395  # local interwikis should behave like initial-colon links
396  $parts['local_interwiki'] = true;
397 
398  # Do another namespace split...
399  continue 2;
400  }
401  }
402 
403  # If there's an initial colon after the interwiki, that also
404  # resets the default namespace
405  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
406  $parts['namespace'] = NS_MAIN;
407  $dbkey = substr( $dbkey, 1 );
408  $dbkey = trim( $dbkey, '_' );
409  }
410  }
411  # If there's no recognized interwiki or namespace,
412  # then let the colon expression be part of the title.
413  }
414  break;
415  } while ( true );
416 
417  $fragment = strstr( $dbkey, '#' );
418  if ( $fragment !== false ) {
419  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
420  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
421  # remove whitespace again: prevents "Foo_bar_#"
422  # becoming "Foo_bar_"
423  $dbkey = preg_replace( '/_*$/', '', $dbkey );
424  }
425 
426  # Reject illegal characters.
427  $rxTc = self::getTitleInvalidRegex();
428  $matches = [];
429  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
430  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
431  }
432 
433  # Pages with "/./" or "/../" appearing in the URLs will often be un-
434  # reachable due to the way web browsers deal with 'relative' URLs.
435  # Also, they conflict with subpage syntax. Forbid them explicitly.
436  if (
437  strpos( $dbkey, '.' ) !== false &&
438  (
439  $dbkey === '.' || $dbkey === '..' ||
440  strpos( $dbkey, './' ) === 0 ||
441  strpos( $dbkey, '../' ) === 0 ||
442  strpos( $dbkey, '/./' ) !== false ||
443  strpos( $dbkey, '/../' ) !== false ||
444  substr( $dbkey, -2 ) == '/.' ||
445  substr( $dbkey, -3 ) == '/..'
446  )
447  ) {
448  throw new MalformedTitleException( 'title-invalid-relative', $text );
449  }
450 
451  # Magic tilde sequences? Nu-uh!
452  if ( strpos( $dbkey, '~~~' ) !== false ) {
453  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
454  }
455 
456  # Limit the size of titles to 255 bytes. This is typically the size of the
457  # underlying database field. We make an exception for special pages, which
458  # don't need to be stored in the database, and may edge over 255 bytes due
459  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
460  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
461  if ( strlen( $dbkey ) > $maxLength ) {
462  throw new MalformedTitleException( 'title-invalid-too-long', $text,
463  [ Message::numParam( $maxLength ) ] );
464  }
465 
466  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
467  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
468  # other site might be case-sensitive.
469  $parts['user_case_dbkey'] = $dbkey;
470  if ( $parts['interwiki'] === '' ) {
471  $dbkey = Title::capitalize( $dbkey, $parts['namespace'] );
472  }
473 
474  # Can't make a link to a namespace alone... "empty" local links can only be
475  # self-links with a fragment identifier.
476  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
477  throw new MalformedTitleException( 'title-invalid-empty', $text );
478  }
479 
480  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
481  // IP names are not allowed for accounts, and can only be referring to
482  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
483  // there are numerous ways to present the same IP. Having sp:contribs scan
484  // them all is silly and having some show the edits and others not is
485  // inconsistent. Same for talk/userpages. Keep them normalized instead.
486  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
487  $dbkey = IP::sanitizeIP( $dbkey );
488  }
489 
490  // Any remaining initial :s are illegal.
491  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
492  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
493  }
494 
495  # Fill fields
496  $parts['dbkey'] = $dbkey;
497 
498  return $parts;
499  }
500 
510  public static function getTitleInvalidRegex() {
511  static $rxTc = false;
512  if ( !$rxTc ) {
513  # Matching titles will be held as illegal.
514  $rxTc = '/' .
515  # Any character not allowed is forbidden...
516  '[^' . Title::legalChars() . ']' .
517  # URL percent encoding sequences interfere with the ability
518  # to round-trip titles -- you can't link to them consistently.
519  '|%[0-9A-Fa-f]{2}' .
520  # XML/HTML character references produce similar issues.
521  '|&[A-Za-z0-9\x80-\xff]+;' .
522  '|&#[0-9]+;' .
523  '|&#x[0-9A-Fa-f]+;' .
524  '/S';
525  }
526 
527  return $rxTc;
528  }
529 }
A codec for MediaWiki page titles.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
const NS_MAIN
Definition: Defines.php:60
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:656
getText(LinkTarget $title)
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2147
const NS_SPECIAL
Definition: Defines.php:49
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
static numParam( $num)
Definition: Message.php:1051
getNamespace()
Get the namespace index.
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
getFragment()
Get the link fragment (i.e.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:767
static sanitizeIP( $ip)
Convert an IP into a verbose, uppercase, normalized form.
Definition: IP.php:139
getNamespaceName( $namespace, $text)
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Normalizes and splits a title string.
getDBkey()
Get the main part with underscores.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after processing
Definition: hooks.txt:1972
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
getPrefixedText(LinkTarget $title)
Service interface for looking up Interwiki records.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
InterwikiLookup $interwikiLookup
static capitalize( $text, $ns=NS_MAIN)
Capitalize a text string for a title if it belongs to a namespace that capitalizes.
Definition: Title.php:3183
getFullText(LinkTarget $title)
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getInterwiki()
The interwiki component of this LinkTarget.
getText()
Returns the link in text form, without namespace prefix or fragment.
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:703
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
const NS_TALK
Definition: Defines.php:61
const NS_USER_TALK
Definition: Defines.php:63
getPrefixedDBkey(LinkTarget $target)
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1686
$matches