MediaWiki  master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1 <?php
26 
42  protected $language;
43 
47  protected $genderCache;
48 
52  protected $localInterwikis;
53 
57  protected $interwikiLookup;
58 
62  protected $nsInfo;
63 
74  ) {
75  if ( !$interwikiLookup ) {
76  wfDeprecated( __METHOD__ . ' with no InterwikiLookup argument', '1.34' );
77  $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
78  }
79  if ( !$nsInfo ) {
80  wfDeprecated( __METHOD__ . ' with no NamespaceInfo argument', '1.34' );
81  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
82  }
83  $this->language = $language;
84  $this->genderCache = $genderCache;
85  $this->localInterwikis = (array)$localInterwikis;
86  $this->interwikiLookup = $interwikiLookup;
87  $this->nsInfo = $nsInfo;
88  }
89 
99  public function getNamespaceName( $namespace, $text ) {
100  if ( $this->language->needsGenderDistinction() &&
101  $this->nsInfo->hasGenderDistinction( $namespace )
102  ) {
103  // NOTE: we are assuming here that the title text is a user name!
104  $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
105  $name = $this->language->getGenderNsText( $namespace, $gender );
106  } else {
107  $name = $this->language->getNsText( $namespace );
108  }
109 
110  if ( $name === false ) {
111  throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
112  }
113 
114  return $name;
115  }
116 
129  public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
130  $out = '';
131  if ( $interwiki !== '' ) {
132  $out = $interwiki . ':';
133  }
134 
135  if ( $namespace != 0 ) {
136  try {
137  $nsName = $this->getNamespaceName( $namespace, $text );
138  } catch ( InvalidArgumentException $e ) {
139  // See T165149. Awkward, but better than erroneously linking to the main namespace.
140  $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
141  }
142 
143  $out .= $nsName . ':';
144  }
145  $out .= $text;
146 
147  if ( $fragment !== '' ) {
148  $out .= '#' . $fragment;
149  }
150 
151  $out = str_replace( '_', ' ', $out );
152 
153  return $out;
154  }
155 
166  public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
167  // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
168  $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
169 
170  // NOTE: this is an ugly cludge that allows this class to share the
171  // code for parsing with the old Title class. The parser code should
172  // be refactored to avoid this.
173  $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
174 
175  // Fragment-only is okay, but only with no namespace
176  if ( $parts['dbkey'] === '' &&
177  ( $parts['fragment'] === '' || $parts['namespace'] !== NS_MAIN ) ) {
178  throw new MalformedTitleException( 'title-invalid-empty', $text );
179  }
180 
181  return new TitleValue(
182  $parts['namespace'],
183  $parts['dbkey'],
184  $parts['fragment'],
185  $parts['interwiki']
186  );
187  }
188 
196  public function getText( LinkTarget $title ) {
197  return $title->getText();
198  }
199 
207  public function getPrefixedText( LinkTarget $title ) {
208  if ( !isset( $title->prefixedText ) ) {
209  $title->prefixedText = $this->formatTitle(
210  $title->getNamespace(),
211  $title->getText(),
212  '',
213  $title->getInterwiki()
214  );
215  }
216 
217  return $title->prefixedText;
218  }
219 
226  public function getPrefixedDBkey( LinkTarget $target ) {
227  return strtr( $this->formatTitle(
228  $target->getNamespace(),
229  $target->getDBkey(),
230  '',
231  $target->getInterwiki()
232  ), ' ', '_' );
233  }
234 
242  public function getFullText( LinkTarget $title ) {
243  return $this->formatTitle(
244  $title->getNamespace(),
245  $title->getText(),
246  $title->getFragment(),
247  $title->getInterwiki()
248  );
249  }
250 
271  public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
272  $dbkey = str_replace( ' ', '_', $text );
273 
274  # Initialisation
275  $parts = [
276  'interwiki' => '',
277  'local_interwiki' => false,
278  'fragment' => '',
279  'namespace' => $defaultNamespace,
280  'dbkey' => $dbkey,
281  'user_case_dbkey' => $dbkey,
282  ];
283 
284  # Strip Unicode bidi override characters.
285  # Sometimes they slip into cut-n-pasted page titles, where the
286  # override chars get included in list displays.
287  $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey );
288 
289  # Clean up whitespace
290  # Note: use of the /u option on preg_replace here will cause
291  # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
292  # conveniently disabling them.
293  $dbkey = preg_replace(
294  '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
295  '_',
296  $dbkey
297  );
298  $dbkey = trim( $dbkey, '_' );
299 
300  if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
301  # Contained illegal UTF-8 sequences or forbidden Unicode chars.
302  throw new MalformedTitleException( 'title-invalid-utf8', $text );
303  }
304 
305  $parts['dbkey'] = $dbkey;
306 
307  # Initial colon indicates main namespace rather than specified default
308  # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
309  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
310  $parts['namespace'] = NS_MAIN;
311  $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
312  $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
313  }
314 
315  if ( $dbkey == '' ) {
316  throw new MalformedTitleException( 'title-invalid-empty', $text );
317  }
318 
319  # Namespace or interwiki prefix
320  $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
321  do {
322  $m = [];
323  if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
324  $p = $m[1];
325  $ns = $this->language->getNsIndex( $p );
326  if ( $ns !== false ) {
327  # Ordinary namespace
328  $dbkey = $m[2];
329  $parts['namespace'] = $ns;
330  # For Talk:X pages, check if X has a "namespace" prefix
331  if ( $ns == NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
332  if ( $this->language->getNsIndex( $x[1] ) ) {
333  # Disallow Talk:File:x type titles...
334  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
335  } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
336  # Disallow Talk:Interwiki:x type titles...
337  throw new MalformedTitleException( 'title-invalid-talk-namespace', $text );
338  }
339  }
340  } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
341  # Interwiki link
342  $dbkey = $m[2];
343  $parts['interwiki'] = $this->language->lc( $p );
344 
345  # Redundant interwiki prefix to the local wiki
346  foreach ( $this->localInterwikis as $localIW ) {
347  if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
348  if ( $dbkey == '' ) {
349  # Empty self-links should point to the Main Page, to ensure
350  # compatibility with cross-wiki transclusions and the like.
351  $mainPage = Title::newMainPage();
352  return [
353  'interwiki' => $mainPage->getInterwiki(),
354  'local_interwiki' => true,
355  'fragment' => $mainPage->getFragment(),
356  'namespace' => $mainPage->getNamespace(),
357  'dbkey' => $mainPage->getDBkey(),
358  'user_case_dbkey' => $mainPage->getUserCaseDBKey()
359  ];
360  }
361  $parts['interwiki'] = '';
362  # local interwikis should behave like initial-colon links
363  $parts['local_interwiki'] = true;
364 
365  # Do another namespace split...
366  continue 2;
367  }
368  }
369 
370  # If there's an initial colon after the interwiki, that also
371  # resets the default namespace
372  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
373  $parts['namespace'] = NS_MAIN;
374  $dbkey = substr( $dbkey, 1 );
375  $dbkey = trim( $dbkey, '_' );
376  }
377  }
378  # If there's no recognized interwiki or namespace,
379  # then let the colon expression be part of the title.
380  }
381  break;
382  } while ( true );
383 
384  $fragment = strstr( $dbkey, '#' );
385  if ( $fragment !== false ) {
386  $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
387  $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
388  # remove whitespace again: prevents "Foo_bar_#"
389  # becoming "Foo_bar_"
390  $dbkey = preg_replace( '/_*$/', '', $dbkey );
391  }
392 
393  # Reject illegal characters.
394  $rxTc = self::getTitleInvalidRegex();
395  $matches = [];
396  if ( preg_match( $rxTc, $dbkey, $matches ) ) {
397  throw new MalformedTitleException( 'title-invalid-characters', $text, [ $matches[0] ] );
398  }
399 
400  # Pages with "/./" or "/../" appearing in the URLs will often be un-
401  # reachable due to the way web browsers deal with 'relative' URLs.
402  # Also, they conflict with subpage syntax. Forbid them explicitly.
403  if (
404  strpos( $dbkey, '.' ) !== false &&
405  (
406  $dbkey === '.' || $dbkey === '..' ||
407  strpos( $dbkey, './' ) === 0 ||
408  strpos( $dbkey, '../' ) === 0 ||
409  strpos( $dbkey, '/./' ) !== false ||
410  strpos( $dbkey, '/../' ) !== false ||
411  substr( $dbkey, -2 ) == '/.' ||
412  substr( $dbkey, -3 ) == '/..'
413  )
414  ) {
415  throw new MalformedTitleException( 'title-invalid-relative', $text );
416  }
417 
418  # Magic tilde sequences? Nu-uh!
419  if ( strpos( $dbkey, '~~~' ) !== false ) {
420  throw new MalformedTitleException( 'title-invalid-magic-tilde', $text );
421  }
422 
423  # Limit the size of titles to 255 bytes. This is typically the size of the
424  # underlying database field. We make an exception for special pages, which
425  # don't need to be stored in the database, and may edge over 255 bytes due
426  # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
427  $maxLength = ( $parts['namespace'] != NS_SPECIAL ) ? 255 : 512;
428  if ( strlen( $dbkey ) > $maxLength ) {
429  throw new MalformedTitleException( 'title-invalid-too-long', $text,
430  [ Message::numParam( $maxLength ) ] );
431  }
432 
433  # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
434  # and [[Foo]] point to the same place. Don't force it for interwikis, since the
435  # other site might be case-sensitive.
436  $parts['user_case_dbkey'] = $dbkey;
437  if ( $parts['interwiki'] === '' ) {
438  $dbkey = Title::capitalize( $dbkey, $parts['namespace'] );
439  }
440 
441  # Can't make a link to a namespace alone... "empty" local links can only be
442  # self-links with a fragment identifier.
443  if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] != NS_MAIN ) {
444  throw new MalformedTitleException( 'title-invalid-empty', $text );
445  }
446 
447  // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
448  // IP names are not allowed for accounts, and can only be referring to
449  // edits from the IP. Given '::' abbreviations and caps/lowercaps,
450  // there are numerous ways to present the same IP. Having sp:contribs scan
451  // them all is silly and having some show the edits and others not is
452  // inconsistent. Same for talk/userpages. Keep them normalized instead.
453  if ( $parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK ) {
454  $dbkey = IP::sanitizeIP( $dbkey );
455  }
456 
457  // Any remaining initial :s are illegal.
458  if ( $dbkey !== '' && $dbkey[0] == ':' ) {
459  throw new MalformedTitleException( 'title-invalid-leading-colon', $text );
460  }
461 
462  # Fill fields
463  $parts['dbkey'] = $dbkey;
464 
465  return $parts;
466  }
467 
477  public static function getTitleInvalidRegex() {
478  static $rxTc = false;
479  if ( !$rxTc ) {
480  # Matching titles will be held as illegal.
481  $rxTc = '/' .
482  # Any character not allowed is forbidden...
483  '[^' . Title::legalChars() . ']' .
484  # URL percent encoding sequences interfere with the ability
485  # to round-trip titles -- you can't link to them consistently.
486  '|%[0-9A-Fa-f]{2}' .
487  # XML/HTML character references produce similar issues.
488  '|&[A-Za-z0-9\x80-\xff]+;' .
489  '|&#[0-9]+;' .
490  '|&#x[0-9A-Fa-f]+;' .
491  '/S';
492  }
493 
494  return $rxTc;
495  }
496 }
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
A codec for MediaWiki page titles.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
const NS_MAIN
Definition: Defines.php:64
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition: Title.php:653
getText(LinkTarget $title)
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2159
const NS_SPECIAL
Definition: Defines.php:53
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
static numParam( $num)
Definition: Message.php:1049
getNamespace()
Get the namespace index.
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
getFragment()
Get the link fragment (i.e.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:780
static sanitizeIP( $ip)
Convert an IP into a verbose, uppercase, normalized form.
Definition: IP.php:152
getNamespaceName( $namespace, $text)
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Normalizes and splits a title string.
getDBkey()
Get the main part with underscores.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after processing
Definition: hooks.txt:1982
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
getPrefixedText(LinkTarget $title)
Service interface for looking up Interwiki records.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
__construct(Language $language, GenderCache $genderCache, $localInterwikis=[], InterwikiLookup $interwikiLookup=null, NamespaceInfo $nsInfo=null)
InterwikiLookup $interwikiLookup
static capitalize( $text, $ns=NS_MAIN)
Capitalize a text string for a title if it belongs to a namespace that capitalizes.
Definition: Title.php:3141
getFullText(LinkTarget $title)
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getInterwiki()
The interwiki component of this LinkTarget.
getText()
Returns the link in text form, without namespace prefix or fragment.
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:700
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
const NS_TALK
Definition: Defines.php:65
const NS_USER_TALK
Definition: Defines.php:67
getPrefixedDBkey(LinkTarget $target)
static decodeCharReferencesAndNormalize( $text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1678
$matches