MediaWiki master
MediaWikiTitleCodec.php
Go to the documentation of this file.
1<?php
24namespace MediaWiki\Title;
25
26use InvalidArgumentException;
27use Language;
28use LogicException;
35use Wikimedia\IPUtils;
36
50 protected $language;
51
53 protected $genderCache;
54
57
60
62 protected $nsInfo;
63
71 private $createMalformedTitleException;
72
81 public function __construct(
87 ) {
88 $this->language = $language;
89 $this->genderCache = $genderCache;
90 $this->localInterwikis = (array)$localInterwikis;
91 $this->interwikiLookup = $interwikiLookup;
92 $this->nsInfo = $nsInfo;
93
94 // Default callback is to return a real MalformedTitleException,
95 // callback signature matches constructor
96 $this->createMalformedTitleException = static function (
97 $errorMessage,
98 $titleText = null,
99 $errorMessageParameters = []
101 return new MalformedTitleException( $errorMessage, $titleText, $errorMessageParameters );
102 };
103 }
104
109 public function overrideCreateMalformedTitleExceptionCallback( callable $callback ) {
110 // @codeCoverageIgnoreStart
111 if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
112 throw new LogicException( __METHOD__ . ' can only be used in tests' );
113 }
114 // @codeCoverageIgnoreEnd
115 $this->createMalformedTitleException = $callback;
116 }
117
127 public function getNamespaceName( $namespace, $text ) {
128 if ( $this->language->needsGenderDistinction() &&
129 $this->nsInfo->hasGenderDistinction( $namespace )
130 ) {
131 // NOTE: we are assuming here that the title text is a user name!
132 $gender = $this->genderCache->getGenderOf( $text, __METHOD__ );
133 $name = $this->language->getGenderNsText( $namespace, $gender );
134 } else {
135 $name = $this->language->getNsText( $namespace );
136 }
137
138 if ( $name === false ) {
139 throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace );
140 }
141
142 return $name;
143 }
144
157 public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) {
158 $out = '';
159 if ( $interwiki !== '' ) {
160 $out = $interwiki . ':';
161 }
162
163 if ( $namespace != 0 ) {
164 try {
165 $nsName = $this->getNamespaceName( $namespace, $text );
166 } catch ( InvalidArgumentException $e ) {
167 // See T165149. Awkward, but better than erroneously linking to the main namespace.
168 $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}";
169 }
170
171 $out .= $nsName . ':';
172 }
173 $out .= $text;
174
175 if ( $fragment !== '' ) {
176 $out .= '#' . $fragment;
177 }
178
179 $out = str_replace( '_', ' ', $out );
180
181 return $out;
182 }
183
193 public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
194 // Convert things like &eacute; &#257; or &#x3017; into normalized (T16952) text
195 $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
196
197 // NOTE: this is an ugly kludge that allows this class to share the
198 // code for parsing with the old Title class. The parser code should
199 // be refactored to avoid this.
200 $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
201
202 return new TitleValue(
203 $parts['namespace'],
204 $parts['dbkey'],
205 $parts['fragment'],
206 $parts['interwiki']
207 );
208 }
209
220 public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) {
221 if ( !$this->nsInfo->exists( $namespace ) ) {
222 return null;
223 }
224
225 $canonicalNs = $this->nsInfo->getCanonicalName( $namespace );
226 $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text";
227 if ( strval( $interwiki ) != '' ) {
228 $fullText = "$interwiki:$fullText";
229 }
230 if ( strval( $fragment ) != '' ) {
231 $fullText .= '#' . $fragment;
232 }
233
234 try {
235 $parts = $this->splitTitleString( $fullText );
236 } catch ( MalformedTitleException $e ) {
237 return null;
238 }
239
240 return new TitleValue(
241 $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] );
242 }
243
251 public function getText( $title ) {
252 if ( $title instanceof LinkTarget ) {
253 return $title->getText();
254 } elseif ( $title instanceof PageReference ) {
255 return strtr( $title->getDBKey(), '_', ' ' );
256 } else {
257 throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
258 }
259 }
260
269 public function getPrefixedText( $title ) {
270 if ( $title instanceof LinkTarget ) {
271 if ( !isset( $title->prefixedText ) ) {
272 $title->prefixedText = $this->formatTitle(
273 $title->getNamespace(),
274 $title->getText(),
275 '',
276 $title->getInterwiki()
277 );
278 }
279 return $title->prefixedText;
280 } elseif ( $title instanceof PageReference ) {
281 $title->assertWiki( PageReference::LOCAL );
282 return $this->formatTitle(
283 $title->getNamespace(),
284 $this->getText( $title )
285 );
286 } else {
287 throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
288 }
289 }
290
297 public function getPrefixedDBkey( $target ) {
298 if ( $target instanceof LinkTarget ) {
299 return strtr( $this->formatTitle(
300 $target->getNamespace(),
301 $target->getDBkey(),
302 '',
303 $target->getInterwiki()
304 ), ' ', '_' );
305 } elseif ( $target instanceof PageReference ) {
306 $target->assertWiki( PageReference::LOCAL );
307 return strtr( $this->formatTitle(
308 $target->getNamespace(),
309 $target->getDBkey()
310 ), ' ', '_' );
311 } else {
312 throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $target ) );
313 }
314 }
315
323 public function getFullText( $title ) {
324 if ( $title instanceof LinkTarget ) {
325 return $this->formatTitle(
326 $title->getNamespace(),
327 $title->getText(),
328 $title->getFragment(),
329 $title->getInterwiki()
330 );
331 } elseif ( $title instanceof PageReference ) {
332 $title->assertWiki( PageReference::LOCAL );
333 return $this->formatTitle(
334 $title->getNamespace(),
335 $this->getText( $title )
336 );
337 } else {
338 throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) );
339 }
340 }
341
363 public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) {
364 $dbkey = str_replace( ' ', '_', $text );
365
366 # Initialisation
367 $parts = [
368 'interwiki' => '',
369 'local_interwiki' => false,
370 'fragment' => '',
371 'namespace' => (int)$defaultNamespace,
372 'dbkey' => $dbkey,
373 ];
374
375 # Strip Unicode bidi override characters.
376 # Sometimes they slip into cut-n-pasted page titles, where the
377 # override chars get included in list displays.
378 $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
379
380 if ( $dbkey === null ) {
381 # Regex had an error. Most likely this is caused by invalid UTF-8
382 $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
383 throw $exception;
384 }
385
386 # Clean up whitespace
387 $dbkey = preg_replace(
388 '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u',
389 '_',
390 $dbkey
391 );
392 $dbkey = trim( $dbkey, '_' );
393
394 if ( strpos( $dbkey, \UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
395 # Contained illegal UTF-8 sequences or forbidden Unicode chars.
396 $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text );
397 throw $exception;
398 }
399
400 $parts['dbkey'] = $dbkey;
401
402 # Initial colon indicates main namespace rather than specified default
403 # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
404 if ( $dbkey !== '' && $dbkey[0] == ':' ) {
405 $parts['namespace'] = NS_MAIN;
406 $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing
407 $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace
408 }
409
410 if ( $dbkey == '' ) {
411 $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
412 throw $exception;
413 }
414
415 # Namespace or interwiki prefix
416 $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
417 do {
418 $m = [];
419 if ( preg_match( $prefixRegexp, $dbkey, $m ) ) {
420 $p = $m[1];
421 $ns = $this->language->getNsIndex( $p );
422 if ( $ns !== false ) {
423 # Ordinary namespace
424 $dbkey = $m[2];
425 $parts['namespace'] = $ns;
426 # For Talk:X pages, check if X has a "namespace" prefix
427 if ( $ns === NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) {
428 if ( $this->language->getNsIndex( $x[1] ) ) {
429 # Disallow Talk:File:x type titles...
430 $exception = ( $this->createMalformedTitleException )(
431 'title-invalid-talk-namespace',
432 $text
433 );
434 throw $exception;
435 } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) {
436 # Disallow Talk:Interwiki:x type titles...
437 $exception = ( $this->createMalformedTitleException )(
438 'title-invalid-talk-namespace',
439 $text
440 );
441 throw $exception;
442 }
443 }
444 } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) {
445 # Interwiki link
446 $dbkey = $m[2];
447 $parts['interwiki'] = $this->language->lc( $p );
448
449 # Redundant interwiki prefix to the local wiki
450 foreach ( $this->localInterwikis as $localIW ) {
451 if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) {
452 if ( $dbkey == '' ) {
453 # Empty self-links should point to the Main Page, to ensure
454 # compatibility with cross-wiki transclusions and the like.
455 $mainPage = Title::newMainPage();
456 return [
457 'interwiki' => $mainPage->getInterwiki(),
458 'local_interwiki' => true,
459 'fragment' => $mainPage->getFragment(),
460 'namespace' => $mainPage->getNamespace(),
461 'dbkey' => $mainPage->getDBkey(),
462 ];
463 }
464 $parts['interwiki'] = '';
465 # local interwikis should behave like initial-colon links
466 $parts['local_interwiki'] = true;
467
468 # Do another namespace split...
469 continue 2;
470 }
471 }
472
473 # If there's an initial colon after the interwiki, that also
474 # resets the default namespace
475 if ( $dbkey !== '' && $dbkey[0] == ':' ) {
476 $parts['namespace'] = NS_MAIN;
477 $dbkey = substr( $dbkey, 1 );
478 $dbkey = trim( $dbkey, '_' );
479 }
480 }
481 # If there's no recognized interwiki or namespace,
482 # then let the colon expression be part of the title.
483 }
484 break;
485 } while ( true );
486
487 $fragment = strstr( $dbkey, '#' );
488 if ( $fragment !== false ) {
489 $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) );
490 $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) );
491 # remove whitespace again: prevents "Foo_bar_#"
492 # becoming "Foo_bar_"
493 $dbkey = rtrim( $dbkey, "_" );
494 }
495
496 # Reject illegal characters.
498 $matches = [];
499 if ( preg_match( $rxTc, $dbkey, $matches ) ) {
500 $exception = ( $this->createMalformedTitleException )( 'title-invalid-characters', $text, [ $matches[0] ] );
501 throw $exception;
502 }
503
504 # Pages with "/./" or "/../" appearing in the URLs will often be un-
505 # reachable due to the way web browsers deal with 'relative' URLs.
506 # Also, they conflict with subpage syntax. Forbid them explicitly.
507 if (
508 str_contains( $dbkey, '.' ) &&
509 (
510 $dbkey === '.' || $dbkey === '..' ||
511 str_starts_with( $dbkey, './' ) ||
512 str_starts_with( $dbkey, '../' ) ||
513 str_contains( $dbkey, '/./' ) ||
514 str_contains( $dbkey, '/../' ) ||
515 str_ends_with( $dbkey, '/.' ) ||
516 str_ends_with( $dbkey, '/..' )
517 )
518 ) {
519 $exception = ( $this->createMalformedTitleException )( 'title-invalid-relative', $text );
520 throw $exception;
521 }
522
523 # Magic tilde sequences? Nu-uh!
524 if ( strpos( $dbkey, '~~~' ) !== false ) {
525 $exception = ( $this->createMalformedTitleException )( 'title-invalid-magic-tilde', $text );
526 throw $exception;
527 }
528
529 # Limit the size of titles to 255 bytes. This is typically the size of the
530 # underlying database field. We make an exception for special pages, which
531 # don't need to be stored in the database, and may edge over 255 bytes due
532 # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
533 $maxLength = ( $parts['namespace'] !== NS_SPECIAL ) ? 255 : 512;
534 if ( strlen( $dbkey ) > $maxLength ) {
535 $exception = ( $this->createMalformedTitleException )(
536 'title-invalid-too-long',
537 $text,
538 [ Message::numParam( $maxLength ) ]
539 );
540 throw $exception;
541 }
542
543 # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
544 # and [[Foo]] point to the same place. Don't force it for interwikis, since the
545 # other site might be case-sensitive.
546 if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) {
547 $dbkey = $this->language->ucfirst( $dbkey );
548 }
549
550 # Can't make a link to a namespace alone... "empty" local links can only be
551 # self-links with a fragment identifier.
552 if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] !== NS_MAIN ) {
553 $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text );
554 throw $exception;
555 }
556
557 // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
558 // IP names are not allowed for accounts, and can only be referring to
559 // edits from the IP. Given '::' abbreviations and caps/lowercaps,
560 // there are numerous ways to present the same IP. Having sp:contribs scan
561 // them all is silly and having some show the edits and others not is
562 // inconsistent. Same for talk/userpages. Keep them normalized instead.
563 if ( $dbkey !== '' && ( $parts['namespace'] === NS_USER || $parts['namespace'] === NS_USER_TALK ) ) {
564 $dbkey = IPUtils::sanitizeIP( $dbkey );
565 // IPUtils::sanitizeIP return null only for bad input
566 '@phan-var string $dbkey';
567 }
568
569 // Any remaining initial :s are illegal.
570 if ( $dbkey !== '' && $dbkey[0] == ':' ) {
571 $exception = ( $this->createMalformedTitleException )( 'title-invalid-leading-colon', $text );
572 throw $exception;
573 }
574
575 // Fill fields
576 $parts['dbkey'] = $dbkey;
577
578 // Check to ensure that the return value can be used to construct a TitleValue.
579 // All issues should in theory be caught above, this is here to enforce consistency.
580 try {
582 $parts['namespace'],
583 $parts['dbkey'],
584 $parts['fragment'],
585 $parts['interwiki']
586 );
587 } catch ( InvalidArgumentException $ex ) {
588 $exception = ( $this->createMalformedTitleException )( 'title-invalid', $text, [ $ex->getMessage() ] );
589 throw $exception;
590 }
591
592 return $parts;
593 }
594
604 public static function getTitleInvalidRegex() {
605 static $rxTc = false;
606 if ( !$rxTc ) {
607 # Matching titles will be held as illegal.
608 $rxTc = '/' .
609 # Any character not allowed is forbidden...
610 '[^' . Title::legalChars() . ']' .
611 # URL percent encoding sequences interfere with the ability
612 # to round-trip titles -- you can't link to them consistently.
613 '|%[0-9A-Fa-f]{2}' .
614 # XML/HTML character references produce similar issues.
615 '|&[A-Za-z0-9\x80-\xff]+;' .
616 '/S';
617 }
618
619 return $rxTc;
620 }
621}
622
627class_alias( MediaWikiTitleCodec::class, 'MediaWikiTitleCodec' );
const NS_USER
Definition Defines.php:66
const NS_MAIN
Definition Defines.php:64
const NS_SPECIAL
Definition Defines.php:53
const NS_TALK
Definition Defines.php:65
const NS_USER_TALK
Definition Defines.php:67
Base class for language-specific code.
Definition Language.php:63
Look up "gender" user preference.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:157
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
__construct(Language $language, GenderCache $genderCache, $localInterwikis, InterwikiLookup $interwikiLookup, NamespaceInfo $nsInfo)
makeTitleValueSafe( $namespace, $text, $fragment='', $interwiki='')
Given a namespace and title, return a TitleValue if valid, or null if invalid.
formatTitle( $namespace, $text, $fragment='', $interwiki='')
parseTitle( $text, $defaultNamespace=NS_MAIN)
Parses the given text and constructs a TitleValue.
splitTitleString( $text, $defaultNamespace=NS_MAIN)
Validates, normalizes and splits a title string.
overrideCreateMalformedTitleExceptionCallback(callable $callback)
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
static assertValidSpec( $namespace, $title, $fragment='', $interwiki='')
Assert that the given parameters could be used to construct a TitleValue object.
static newMainPage(MessageLocalizer $localizer=null)
Create a new Title for the Main Page.
Definition Title.php:682
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition Title.php:715
Service interface for looking up Interwiki records.
Represents the target of a wiki link.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
A title formatter service for MediaWiki.
A title parser service for MediaWiki.