MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
74  ) {
75  if ( !$interwikiLookup ) {
76  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
77  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
78  }
79  if ( !$nsInfo ) {
80  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
81  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
82  }
83  $this->language = $language;
84  $this->genderCache = $genderCache;
85  $this->localInterwikis = (array)$localInterwikis;
86  $this->interwikiLookup = $interwikiLookup;
87  $this->nsInfo = $nsInfo;
88  }
89 
99  public function getNamespaceName( $namespace, $text ) {
100  if ( $this->language->needsGenderDistinction() &&
101  $this->nsInfo->hasGenderDistinction( $namespace )
102  ) {
103  // NOTE: we are assuming here that the title text is a user name!
104  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
105  $name = $this->language->getGenderNsText( $namespace, $gender );
106  } else {
107  $name = $this->language->getNsText( $namespace );
108  }
109 
110  if ( $name === false ) {
111  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
112  }
113 
114  return $name;
115  }
116 
129  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
130  $out = '';
131  if ( $interwiki !== '' ) {
132  $out = $interwiki . ':';
133  }
134 
135  if ( $namespace != 0 ) {
136  try {
137  $nsName = $this->getNamespaceName( $namespace, $text );
138  } catch ( InvalidArgumentException $e ) {
139  // See T165149. Awkward, but better than erroneously linking to the main namespace.
140  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
141  }
142 
143  $out .= $nsName . ':';
144  }
145  $out .= $text;
146 
147  if ( $fragment !== '' ) {
148  $out .= '#' . $fragment;
149  }
150 
151  $out = str_replace( '_', ' ', $out );
152 
153  return $out;
154  }
155 
165  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
166  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
167  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
168 
169  // NOTE: this is an ugly cludge that allows this class to share the
170  // code for parsing with the old Title class. The parser code should
171  // be refactored to avoid this.
172  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
173 
174  // Fragment-only is okay, but only with no namespace
175  if ( $parts['dbkey'] === '' &&
176  ( $parts['fragment'] === '' || $parts['namespace'] !== NS_MAIN ) ) {
177  throw new MalformedTitleException( 'title-invalid-empty', $text );
178  }
179 
180  return new TitleValue(
181  $parts['namespace'],
182  $parts['dbkey'],
183  $parts['fragment'],
184  $parts['interwiki']
185  );
186  }
187 
195  public function getText( LinkTarget $title ) {
196  return $title->getText();
197  }
198 
206  public function getPrefixedText( LinkTarget $title ) {
207  if ( !isset( $title->prefixedText ) ) {
208  $title->prefixedText = $this->formatTitle(
209  $title->getNamespace(),
210  $title->getText(),
211  '',
212  $title->getInterwiki()
213  );
214  }
215 
216  return $title->prefixedText;
217  }
218 
225  public function getPrefixedDBkey( LinkTarget $target ) {
226  return strtr( $this->formatTitle(
227  $target->getNamespace(),
228  $target->getDBkey(),
229  '',
230  $target->getInterwiki()
231  ), ' ', '_' );
232  }
233 
241  public function getFullText( LinkTarget $title ) {
242  return $this->formatTitle(
243  $title->getNamespace(),
244  $title->getText(),
245  $title->getFragment(),
246  $title->getInterwiki()
247  );
248  }
249 
270  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
271  $dbkey = str_replace( ' ', '_', $text );
272 
273  # Initialisation
274  $parts = [
275  'interwiki' => '',
276  'local_interwiki' => false,
277  'fragment' => '',
278  'namespace' => $defaultNamespace,
279  'dbkey' => $dbkey,
280  'user_case_dbkey' => $dbkey,
281  ];
282 
283  # Strip Unicode bidi override characters.
284  # Sometimes they slip into cut-n-pasted page titles, where the
285  # override chars get included in list displays.
286  $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
287 
288  # Clean up whitespace
289  # Note: use of the /u option on preg_replace here will cause
290  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
291  # conveniently disabling them.
292  $dbkey = preg_replace(
293  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
294  '_',
295  $dbkey
296  );
297  $dbkey = trim( $dbkey, '_' );
298 
299  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
300  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
301  throw new MalformedTitleException( 'title-invalid-utf8', $text );
302  }
303 
304  $parts['dbkey'] = $dbkey;
305 
306  # Initial colon indicates main namespace rather than specified default
307  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
308  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
309  $parts['namespace'] = NS_MAIN;
310  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
311  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
312  }
313 
314  if ( $dbkey == '' ) {
315  throw new MalformedTitleException( 'title-invalid-empty', $text );
316  }
317 
318  # Namespace or interwiki prefix
319  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
320  do {
321  $m = [];
322  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
323  $p = $m[1];
324  $ns = $this->language->getNsIndex( $p );
325  if ( $ns !== false ) {
326  # Ordinary namespace
327  $dbkey = $m[2];
328  $parts['namespace'] = $ns;
329  # For Talk:X pages, check if X has a "namespace" prefix
330  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
331  if ( $this->language->getNsIndex( $x[1] ) ) {
332  # Disallow Talk:File:x type titles...
333  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
334  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
335  # Disallow Talk:Interwiki:x type titles...
336  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
337  }
338  }
339  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
340  # Interwiki link
341  $dbkey = $m[2];
342  $parts['interwiki'] = $this->language->lc( $p );
343 
344  # Redundant interwiki prefix to the local wiki
345  foreach ( $this->localInterwikis as $localIW ) {
346  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
347  if ( $dbkey == '' ) {
348  # Empty self-links should point to the Main Page, to ensure
349  # compatibility with cross-wiki transclusions and the like.
350  $mainPage = Title::newMainPage();
351  return [
352  'interwiki' => $mainPage->getInterwiki(),
353  'local_interwiki' => true,
354  'fragment' => $mainPage->getFragment(),
355  'namespace' => $mainPage->getNamespace(),
356  'dbkey' => $mainPage->getDBkey(),
357  'user_case_dbkey' => $mainPage->getUserCaseDBKey()
358  ];
359  }
360  $parts['interwiki'] = '';
361  # local interwikis should behave like initial-colon links
362  $parts['local_interwiki'] = true;
363 
364  # Do another namespace split...
365  continue 2;
366  }
367  }
368 
369  # If there's an initial colon after the interwiki, that also
370  # resets the default namespace
371  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
372  $parts['namespace'] = NS_MAIN;
373  $dbkey = substr( $dbkey, 1 );
374  $dbkey = trim( $dbkey, '_' );
375  }
376  }
377  # If there's no recognized interwiki or namespace,
378  # then let the colon expression be part of the title.
379  }
380  break;
381  } while ( true );
382 
383  $fragment = strstr( $dbkey, '#' );
384  if ( $fragment !== false ) {
385  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
386  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
387  # remove whitespace again: prevents "Foo_bar_#"
388  # becoming "Foo_bar_"
389  $dbkey = preg_replace( '/_*$/', '', $dbkey );
390  }
391 
392  # Reject illegal characters.
393  $rxTc = self::getTitleInvalidRegex();
394  $matches = [];
395  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
396  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
397  }
398 
399  # Pages with "/./" or "/../" appearing in the URLs will often be un-
400  # reachable due to the way web browsers deal with 'relative' URLs.
401  # Also, they conflict with subpage syntax. Forbid them explicitly.
402  if (
403  strpos( $dbkey, '.' ) !== false &&
404  (
405  $dbkey === '.' || $dbkey === '..' ||
406  strpos( $dbkey, './' ) === 0 ||
407  strpos( $dbkey, '../' ) === 0 ||
408  strpos( $dbkey, '/./' ) !== false ||
409  strpos( $dbkey, '/../' ) !== false ||
410  substr( $dbkey, -2 ) == '/.' ||
411  substr( $dbkey, -3 ) == '/..'
412  )
413  ) {
414  throw new MalformedTitleException( 'title-invalid-relative', $text );
415  }
416 
417  # Magic tilde sequences? Nu-uh!
418  if ( strpos( $dbkey, '~~~' ) !== false ) {
419  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
420  }
421 
422  # Limit the size of titles to 255 bytes. This is typically the size of the
423  # underlying database field. We make an exception for special pages, which
424  # don't need to be stored in the database, and may edge over 255 bytes due
425  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
426  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
427  if ( strlen( $dbkey ) > $maxLength ) {
428  throw new MalformedTitleException( 'title-invalid-too-long', $text,
429  [ Message::numParam( $maxLength ) ] );
430  }
431 
432  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
433  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
434  # other site might be case-sensitive.
435  $parts['user_case_dbkey'] = $dbkey;
436  if ( $parts['interwiki'] === '' ) {
437  $dbkey = Title::capitalize( $dbkey, $parts['namespace'] );
438  }
439 
440  # Can't make a link to a namespace alone... "empty" local links can only be
441  # self-links with a fragment identifier.
442  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
443  throw new MalformedTitleException( 'title-invalid-empty', $text );
444  }
445 
446  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
447  // IP names are not allowed for accounts, and can only be referring to
448  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
449  // there are numerous ways to present the same IP. Having sp:contribs scan
450  // them all is silly and having some show the edits and others not is
451  // inconsistent. Same for talk/userpages. Keep them normalized instead.
452  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
453  $dbkey = IP::sanitizeIP( $dbkey );
454  }
455 
456  // Any remaining initial :s are illegal.
457  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
458  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
459  }
460 
461  # Fill fields
462  $parts['dbkey'] = $dbkey;
463 
464  return $parts;
465  }
466 
476  public static function getTitleInvalidRegex() {
477  static $rxTc = false;
478  if ( !$rxTc ) {
479  # Matching titles will be held as illegal.
480  $rxTc = '/' .
481  # Any character not allowed is forbidden...
482  '[^' . Title::legalChars() . ']' .
483  # URL percent encoding sequences interfere with the ability
484  # to round-trip titles -- you can't link to them consistently.
485  '|%[0-9A-Fa-f]{2}' .
486  # XML/HTML character references produce similar issues.
487  '|&[A-Za-z0-9\x80-\xff]+;' .
488  '|&#[0-9]+;' .
489  '|&#x[0-9A-Fa-f]+;' .
490  '/S';
491  }
492 
493  return $rxTc;
494  }
495 }
A codec for MediaWiki page titles.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
const NS_MAIN
Definition: Defines.php:60
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:653
getText(LinkTarget $title)
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2159
const NS_SPECIAL
Definition: Defines.php:49
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
static numParam( $num)
Definition: Message.php:1049
getNamespace()
Get the namespace index.
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
getFragment()
Get the link fragment (i.e.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:780
static sanitizeIP( $ip)
Convert an IP into a verbose, uppercase, normalized form.
Definition: IP.php:139
getNamespaceName( $namespace, $text)
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Normalizes and splits a title string.
getDBkey()
Get the main part with underscores.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after processing
Definition: hooks.txt:1982
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
getPrefixedText(LinkTarget $title)
Service interface for looking up Interwiki records.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
InterwikiLookup $interwikiLookup
static capitalize( $text, $ns=NS_MAIN)
Capitalize a text string for a title if it belongs to a namespace that capitalizes.
Definition: Title.php:3161
getFullText(LinkTarget $title)
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getInterwiki()
The interwiki component of this LinkTarget.
getText()
Returns the link in text form, without namespace prefix or fragment.
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:700
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
const NS_TALK
Definition: Defines.php:61
const NS_USER_TALK
Definition: Defines.php:63
getPrefixedDBkey(LinkTarget $target)
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1679
$matches