Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.38% |
227 / 238 |
|
50.00% |
6 / 12 |
CRAP | |
0.00% |
0 / 1 |
MediaWikiTitleCodec | |
95.78% |
227 / 237 |
|
50.00% |
6 / 12 |
77 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
overrideCreateMalformedTitleExceptionCallback | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
getNamespaceName | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
formatTitle | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
5 | |||
parseTitle | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
makeTitleValueSafe | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
6 | |||
getText | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getPrefixedText | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
4.00 | |||
getPrefixedDBkey | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
3.00 | |||
getFullText | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
3.00 | |||
splitTitleString | |
100.00% |
125 / 125 |
|
100.00% |
1 / 1 |
43 | |||
getTitleInvalidRegex | |
37.50% |
3 / 8 |
|
0.00% |
0 / 1 |
2.98 |
1 | <?php |
2 | /** |
3 | * A codec for MediaWiki page titles. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @author Daniel Kinzler |
22 | */ |
23 | |
24 | namespace MediaWiki\Title; |
25 | |
26 | use InvalidArgumentException; |
27 | use Language; |
28 | use LogicException; |
29 | use MediaWiki\Cache\GenderCache; |
30 | use MediaWiki\Interwiki\InterwikiLookup; |
31 | use MediaWiki\Linker\LinkTarget; |
32 | use MediaWiki\Message\Message; |
33 | use MediaWiki\Page\PageReference; |
34 | use MediaWiki\Parser\Sanitizer; |
35 | use Wikimedia\IPUtils; |
36 | |
37 | /** |
38 | * A codec for MediaWiki page titles. |
39 | * |
40 | * @note Normalization and validation is applied while parsing, not when formatting. |
41 | * It's possible to construct a TitleValue with an invalid title, and use MediaWikiTitleCodec |
42 | * to generate an (invalid) title string from it. TitleValues should be constructed only |
43 | * via parseTitle() or from a (semi)trusted source, such as the database. |
44 | * |
45 | * @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue |
46 | * @since 1.23 |
47 | */ |
48 | class MediaWikiTitleCodec implements TitleFormatter, TitleParser { |
49 | /** @var Language */ |
50 | protected $language; |
51 | |
52 | /** @var GenderCache */ |
53 | protected $genderCache; |
54 | |
55 | /** @var string[] */ |
56 | protected $localInterwikis; |
57 | |
58 | /** @var InterwikiLookup */ |
59 | protected $interwikiLookup; |
60 | |
61 | /** @var NamespaceInfo */ |
62 | protected $nsInfo; |
63 | |
64 | /** |
65 | * The code here can throw MalformedTitleException, which cannot be created in |
66 | * unit tests (see T281935). Until that changes, we use this helper callback |
67 | * that can be overridden in unit tests to return a mock instead. |
68 | * |
69 | * @var callable |
70 | */ |
71 | private $createMalformedTitleException; |
72 | |
73 | /** |
74 | * @param Language $language The language object to use for localizing namespace names, |
75 | * capitalization, etc. |
76 | * @param GenderCache $genderCache The gender cache for generating gendered namespace names |
77 | * @param string[]|string $localInterwikis |
78 | * @param InterwikiLookup $interwikiLookup |
79 | * @param NamespaceInfo $nsInfo |
80 | */ |
81 | public function __construct( |
82 | Language $language, |
83 | GenderCache $genderCache, |
84 | $localInterwikis, |
85 | InterwikiLookup $interwikiLookup, |
86 | NamespaceInfo $nsInfo |
87 | ) { |
88 | $this->language = $language; |
89 | $this->genderCache = $genderCache; |
90 | $this->localInterwikis = (array)$localInterwikis; |
91 | $this->interwikiLookup = $interwikiLookup; |
92 | $this->nsInfo = $nsInfo; |
93 | |
94 | // Default callback is to return a real MalformedTitleException, |
95 | // callback signature matches constructor |
96 | $this->createMalformedTitleException = static function ( |
97 | $errorMessage, |
98 | $titleText = null, |
99 | $errorMessageParameters = [] |
100 | ): MalformedTitleException { |
101 | return new MalformedTitleException( $errorMessage, $titleText, $errorMessageParameters ); |
102 | }; |
103 | } |
104 | |
105 | /** |
106 | * @internal |
107 | * @param callable $callback |
108 | */ |
109 | public function overrideCreateMalformedTitleExceptionCallback( callable $callback ) { |
110 | // @codeCoverageIgnoreStart |
111 | if ( !defined( 'MW_PHPUNIT_TEST' ) ) { |
112 | throw new LogicException( __METHOD__ . ' can only be used in tests' ); |
113 | } |
114 | // @codeCoverageIgnoreEnd |
115 | $this->createMalformedTitleException = $callback; |
116 | } |
117 | |
118 | /** |
119 | * @see TitleFormatter::getNamespaceName() |
120 | * |
121 | * @param int $namespace |
122 | * @param string $text |
123 | * |
124 | * @throws InvalidArgumentException If the namespace is invalid |
125 | * @return string Namespace name with underscores (not spaces), e.g. 'User_talk' |
126 | */ |
127 | public function getNamespaceName( $namespace, $text ) { |
128 | if ( $this->language->needsGenderDistinction() && |
129 | $this->nsInfo->hasGenderDistinction( $namespace ) |
130 | ) { |
131 | // NOTE: we are assuming here that the title text is a user name! |
132 | $gender = $this->genderCache->getGenderOf( $text, __METHOD__ ); |
133 | $name = $this->language->getGenderNsText( $namespace, $gender ); |
134 | } else { |
135 | $name = $this->language->getNsText( $namespace ); |
136 | } |
137 | |
138 | if ( $name === false ) { |
139 | throw new InvalidArgumentException( 'Unknown namespace ID: ' . $namespace ); |
140 | } |
141 | |
142 | return $name; |
143 | } |
144 | |
145 | /** |
146 | * @see TitleFormatter::formatTitle() |
147 | * |
148 | * @param int|false $namespace The namespace ID (or false, if the namespace should be ignored) |
149 | * @param string $text The page title. Should be valid. Only minimal normalization is applied. |
150 | * Underscores will be replaced. |
151 | * @param string $fragment The fragment name (may be empty). |
152 | * @param string $interwiki The interwiki name (may be empty). |
153 | * |
154 | * @throws InvalidArgumentException If the namespace is invalid |
155 | * @return string |
156 | */ |
157 | public function formatTitle( $namespace, $text, $fragment = '', $interwiki = '' ) { |
158 | $out = ''; |
159 | if ( $interwiki !== '' ) { |
160 | $out = $interwiki . ':'; |
161 | } |
162 | |
163 | if ( $namespace != 0 ) { |
164 | try { |
165 | $nsName = $this->getNamespaceName( $namespace, $text ); |
166 | } catch ( InvalidArgumentException $e ) { |
167 | // See T165149. Awkward, but better than erroneously linking to the main namespace. |
168 | $nsName = $this->language->getNsText( NS_SPECIAL ) . ":Badtitle/NS{$namespace}"; |
169 | } |
170 | |
171 | $out .= $nsName . ':'; |
172 | } |
173 | $out .= $text; |
174 | |
175 | if ( $fragment !== '' ) { |
176 | $out .= '#' . $fragment; |
177 | } |
178 | |
179 | $out = str_replace( '_', ' ', $out ); |
180 | |
181 | return $out; |
182 | } |
183 | |
184 | /** |
185 | * Parses the given text and constructs a TitleValue. |
186 | * |
187 | * @param string $text The text to parse |
188 | * @param int $defaultNamespace Namespace to assume per default (usually NS_MAIN) |
189 | * |
190 | * @throws MalformedTitleException |
191 | * @return TitleValue |
192 | */ |
193 | public function parseTitle( $text, $defaultNamespace = NS_MAIN ) { |
194 | // Convert things like é ā or 〗 into normalized (T16952) text |
195 | $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text ); |
196 | |
197 | // NOTE: this is an ugly kludge that allows this class to share the |
198 | // code for parsing with the old Title class. The parser code should |
199 | // be refactored to avoid this. |
200 | $parts = $this->splitTitleString( $filteredText, $defaultNamespace ); |
201 | |
202 | return new TitleValue( |
203 | $parts['namespace'], |
204 | $parts['dbkey'], |
205 | $parts['fragment'], |
206 | $parts['interwiki'] |
207 | ); |
208 | } |
209 | |
210 | /** |
211 | * Given a namespace and title, return a TitleValue if valid, or null if invalid. |
212 | * |
213 | * @param int $namespace |
214 | * @param string $text |
215 | * @param string $fragment |
216 | * @param string $interwiki |
217 | * |
218 | * @return TitleValue|null |
219 | */ |
220 | public function makeTitleValueSafe( $namespace, $text, $fragment = '', $interwiki = '' ) { |
221 | if ( !$this->nsInfo->exists( $namespace ) ) { |
222 | return null; |
223 | } |
224 | |
225 | $canonicalNs = $this->nsInfo->getCanonicalName( $namespace ); |
226 | $fullText = $canonicalNs == '' ? $text : "$canonicalNs:$text"; |
227 | if ( strval( $interwiki ) != '' ) { |
228 | $fullText = "$interwiki:$fullText"; |
229 | } |
230 | if ( strval( $fragment ) != '' ) { |
231 | $fullText .= '#' . $fragment; |
232 | } |
233 | |
234 | try { |
235 | $parts = $this->splitTitleString( $fullText ); |
236 | } catch ( MalformedTitleException $e ) { |
237 | return null; |
238 | } |
239 | |
240 | return new TitleValue( |
241 | $parts['namespace'], $parts['dbkey'], $parts['fragment'], $parts['interwiki'] ); |
242 | } |
243 | |
244 | /** |
245 | * @see TitleFormatter::getText() |
246 | * |
247 | * @param LinkTarget|PageReference $title |
248 | * |
249 | * @return string |
250 | */ |
251 | public function getText( $title ) { |
252 | if ( $title instanceof LinkTarget ) { |
253 | return $title->getText(); |
254 | } elseif ( $title instanceof PageReference ) { |
255 | return strtr( $title->getDBKey(), '_', ' ' ); |
256 | } else { |
257 | throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) ); |
258 | } |
259 | } |
260 | |
261 | /** |
262 | * @see TitleFormatter::getText() |
263 | * |
264 | * @param LinkTarget|PageReference $title |
265 | * |
266 | * @return string |
267 | * @suppress PhanUndeclaredProperty |
268 | */ |
269 | public function getPrefixedText( $title ) { |
270 | if ( $title instanceof LinkTarget ) { |
271 | if ( !isset( $title->prefixedText ) ) { |
272 | $title->prefixedText = $this->formatTitle( |
273 | $title->getNamespace(), |
274 | $title->getText(), |
275 | '', |
276 | $title->getInterwiki() |
277 | ); |
278 | } |
279 | return $title->prefixedText; |
280 | } elseif ( $title instanceof PageReference ) { |
281 | $title->assertWiki( PageReference::LOCAL ); |
282 | return $this->formatTitle( |
283 | $title->getNamespace(), |
284 | $this->getText( $title ) |
285 | ); |
286 | } else { |
287 | throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) ); |
288 | } |
289 | } |
290 | |
291 | /** |
292 | * @since 1.27 |
293 | * @see TitleFormatter::getPrefixedDBkey() |
294 | * @param LinkTarget|PageReference $target |
295 | * @return string |
296 | */ |
297 | public function getPrefixedDBkey( $target ) { |
298 | if ( $target instanceof LinkTarget ) { |
299 | return strtr( $this->formatTitle( |
300 | $target->getNamespace(), |
301 | $target->getDBkey(), |
302 | '', |
303 | $target->getInterwiki() |
304 | ), ' ', '_' ); |
305 | } elseif ( $target instanceof PageReference ) { |
306 | $target->assertWiki( PageReference::LOCAL ); |
307 | return strtr( $this->formatTitle( |
308 | $target->getNamespace(), |
309 | $target->getDBkey() |
310 | ), ' ', '_' ); |
311 | } else { |
312 | throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $target ) ); |
313 | } |
314 | } |
315 | |
316 | /** |
317 | * @see TitleFormatter::getText() |
318 | * |
319 | * @param LinkTarget|PageReference $title |
320 | * |
321 | * @return string |
322 | */ |
323 | public function getFullText( $title ) { |
324 | if ( $title instanceof LinkTarget ) { |
325 | return $this->formatTitle( |
326 | $title->getNamespace(), |
327 | $title->getText(), |
328 | $title->getFragment(), |
329 | $title->getInterwiki() |
330 | ); |
331 | } elseif ( $title instanceof PageReference ) { |
332 | $title->assertWiki( PageReference::LOCAL ); |
333 | return $this->formatTitle( |
334 | $title->getNamespace(), |
335 | $this->getText( $title ) |
336 | ); |
337 | } else { |
338 | throw new InvalidArgumentException( '$title has invalid type: ' . get_class( $title ) ); |
339 | } |
340 | } |
341 | |
342 | /** |
343 | * Validates, normalizes and splits a title string. |
344 | * This is the "source of truth" for title validity. |
345 | * |
346 | * This function removes illegal characters, splits off the interwiki and |
347 | * namespace prefixes, sets the other forms, and canonicalizes |
348 | * everything. |
349 | * |
350 | * @todo this method is only exposed as a temporary measure to ease refactoring. |
351 | * It was copied with minimal changes from Title::secureAndSplit(). |
352 | * |
353 | * @todo This method should be split up and an appropriate interface |
354 | * defined for use by the Title class. |
355 | * |
356 | * @param string $text |
357 | * @param int $defaultNamespace |
358 | * |
359 | * @internal |
360 | * @throws MalformedTitleException If $text is not a valid title string. |
361 | * @return array A map with the fields 'interwiki', 'fragment', 'namespace', and 'dbkey'. |
362 | */ |
363 | public function splitTitleString( $text, $defaultNamespace = NS_MAIN ) { |
364 | $dbkey = str_replace( ' ', '_', $text ); |
365 | |
366 | # Initialisation |
367 | $parts = [ |
368 | 'interwiki' => '', |
369 | 'local_interwiki' => false, |
370 | 'fragment' => '', |
371 | 'namespace' => (int)$defaultNamespace, |
372 | 'dbkey' => $dbkey, |
373 | ]; |
374 | |
375 | # Strip Unicode bidi override characters. |
376 | # Sometimes they slip into cut-n-pasted page titles, where the |
377 | # override chars get included in list displays. |
378 | $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey ); |
379 | |
380 | if ( $dbkey === null ) { |
381 | # Regex had an error. Most likely this is caused by invalid UTF-8 |
382 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text ); |
383 | throw $exception; |
384 | } |
385 | |
386 | # Clean up whitespace |
387 | $dbkey = preg_replace( |
388 | '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', |
389 | '_', |
390 | $dbkey |
391 | ); |
392 | $dbkey = trim( $dbkey, '_' ); |
393 | |
394 | if ( strpos( $dbkey, \UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) { |
395 | # Contained illegal UTF-8 sequences or forbidden Unicode chars. |
396 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-utf8', $text ); |
397 | throw $exception; |
398 | } |
399 | |
400 | $parts['dbkey'] = $dbkey; |
401 | |
402 | # Initial colon indicates main namespace rather than specified default |
403 | # but should not create invalid {ns,title} pairs such as {0,Project:Foo} |
404 | if ( $dbkey !== '' && $dbkey[0] == ':' ) { |
405 | $parts['namespace'] = NS_MAIN; |
406 | $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing |
407 | $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace |
408 | } |
409 | |
410 | if ( $dbkey == '' ) { |
411 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text ); |
412 | throw $exception; |
413 | } |
414 | |
415 | # Namespace or interwiki prefix |
416 | $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; |
417 | do { |
418 | $m = []; |
419 | if ( preg_match( $prefixRegexp, $dbkey, $m ) ) { |
420 | $p = $m[1]; |
421 | $ns = $this->language->getNsIndex( $p ); |
422 | if ( $ns !== false ) { |
423 | # Ordinary namespace |
424 | $dbkey = $m[2]; |
425 | $parts['namespace'] = $ns; |
426 | # For Talk:X pages, check if X has a "namespace" prefix |
427 | if ( $ns === NS_TALK && preg_match( $prefixRegexp, $dbkey, $x ) ) { |
428 | if ( $this->language->getNsIndex( $x[1] ) ) { |
429 | # Disallow Talk:File:x type titles... |
430 | $exception = ( $this->createMalformedTitleException )( |
431 | 'title-invalid-talk-namespace', |
432 | $text |
433 | ); |
434 | throw $exception; |
435 | } elseif ( $this->interwikiLookup->isValidInterwiki( $x[1] ) ) { |
436 | # Disallow Talk:Interwiki:x type titles... |
437 | $exception = ( $this->createMalformedTitleException )( |
438 | 'title-invalid-talk-namespace', |
439 | $text |
440 | ); |
441 | throw $exception; |
442 | } |
443 | } |
444 | } elseif ( $this->interwikiLookup->isValidInterwiki( $p ) ) { |
445 | # Interwiki link |
446 | $dbkey = $m[2]; |
447 | $parts['interwiki'] = $this->language->lc( $p ); |
448 | |
449 | # Redundant interwiki prefix to the local wiki |
450 | foreach ( $this->localInterwikis as $localIW ) { |
451 | if ( strcasecmp( $parts['interwiki'], $localIW ) == 0 ) { |
452 | if ( $dbkey == '' ) { |
453 | # Empty self-links should point to the Main Page, to ensure |
454 | # compatibility with cross-wiki transclusions and the like. |
455 | $mainPage = Title::newMainPage(); |
456 | return [ |
457 | 'interwiki' => $mainPage->getInterwiki(), |
458 | 'local_interwiki' => true, |
459 | 'fragment' => $mainPage->getFragment(), |
460 | 'namespace' => $mainPage->getNamespace(), |
461 | 'dbkey' => $mainPage->getDBkey(), |
462 | ]; |
463 | } |
464 | $parts['interwiki'] = ''; |
465 | # local interwikis should behave like initial-colon links |
466 | $parts['local_interwiki'] = true; |
467 | |
468 | # Do another namespace split... |
469 | continue 2; |
470 | } |
471 | } |
472 | |
473 | # If there's an initial colon after the interwiki, that also |
474 | # resets the default namespace |
475 | if ( $dbkey !== '' && $dbkey[0] == ':' ) { |
476 | $parts['namespace'] = NS_MAIN; |
477 | $dbkey = substr( $dbkey, 1 ); |
478 | $dbkey = trim( $dbkey, '_' ); |
479 | } |
480 | } |
481 | # If there's no recognized interwiki or namespace, |
482 | # then let the colon expression be part of the title. |
483 | } |
484 | break; |
485 | } while ( true ); |
486 | |
487 | $fragment = strstr( $dbkey, '#' ); |
488 | if ( $fragment !== false ) { |
489 | $parts['fragment'] = str_replace( '_', ' ', substr( $fragment, 1 ) ); |
490 | $dbkey = substr( $dbkey, 0, strlen( $dbkey ) - strlen( $fragment ) ); |
491 | # remove whitespace again: prevents "Foo_bar_#" |
492 | # becoming "Foo_bar_" |
493 | $dbkey = rtrim( $dbkey, "_" ); |
494 | } |
495 | |
496 | # Reject illegal characters. |
497 | $rxTc = self::getTitleInvalidRegex(); |
498 | $matches = []; |
499 | if ( preg_match( $rxTc, $dbkey, $matches ) ) { |
500 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-characters', $text, [ $matches[0] ] ); |
501 | throw $exception; |
502 | } |
503 | |
504 | # Pages with "/./" or "/../" appearing in the URLs will often be un- |
505 | # reachable due to the way web browsers deal with 'relative' URLs. |
506 | # Also, they conflict with subpage syntax. Forbid them explicitly. |
507 | if ( |
508 | str_contains( $dbkey, '.' ) && |
509 | ( |
510 | $dbkey === '.' || $dbkey === '..' || |
511 | str_starts_with( $dbkey, './' ) || |
512 | str_starts_with( $dbkey, '../' ) || |
513 | str_contains( $dbkey, '/./' ) || |
514 | str_contains( $dbkey, '/../' ) || |
515 | str_ends_with( $dbkey, '/.' ) || |
516 | str_ends_with( $dbkey, '/..' ) |
517 | ) |
518 | ) { |
519 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-relative', $text ); |
520 | throw $exception; |
521 | } |
522 | |
523 | # Magic tilde sequences? Nu-uh! |
524 | if ( strpos( $dbkey, '~~~' ) !== false ) { |
525 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-magic-tilde', $text ); |
526 | throw $exception; |
527 | } |
528 | |
529 | # Limit the size of titles to 255 bytes. This is typically the size of the |
530 | # underlying database field. We make an exception for special pages, which |
531 | # don't need to be stored in the database, and may edge over 255 bytes due |
532 | # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] |
533 | $maxLength = ( $parts['namespace'] !== NS_SPECIAL ) ? 255 : 512; |
534 | if ( strlen( $dbkey ) > $maxLength ) { |
535 | $exception = ( $this->createMalformedTitleException )( |
536 | 'title-invalid-too-long', |
537 | $text, |
538 | [ Message::numParam( $maxLength ) ] |
539 | ); |
540 | throw $exception; |
541 | } |
542 | |
543 | # Normally, all wiki links are forced to have an initial capital letter so [[foo]] |
544 | # and [[Foo]] point to the same place. Don't force it for interwikis, since the |
545 | # other site might be case-sensitive. |
546 | if ( $parts['interwiki'] === '' && $this->nsInfo->isCapitalized( $parts['namespace'] ) ) { |
547 | $dbkey = $this->language->ucfirst( $dbkey ); |
548 | } |
549 | |
550 | # Can't make a link to a namespace alone... "empty" local links can only be |
551 | # self-links with a fragment identifier. |
552 | if ( $dbkey == '' && $parts['interwiki'] === '' && $parts['namespace'] !== NS_MAIN ) { |
553 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-empty', $text ); |
554 | throw $exception; |
555 | } |
556 | |
557 | // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. |
558 | // IP names are not allowed for accounts, and can only be referring to |
559 | // edits from the IP. Given '::' abbreviations and caps/lowercaps, |
560 | // there are numerous ways to present the same IP. Having sp:contribs scan |
561 | // them all is silly and having some show the edits and others not is |
562 | // inconsistent. Same for talk/userpages. Keep them normalized instead. |
563 | if ( $dbkey !== '' && ( $parts['namespace'] === NS_USER || $parts['namespace'] === NS_USER_TALK ) ) { |
564 | $dbkey = IPUtils::sanitizeIP( $dbkey ); |
565 | // IPUtils::sanitizeIP return null only for bad input |
566 | '@phan-var string $dbkey'; |
567 | } |
568 | |
569 | // Any remaining initial :s are illegal. |
570 | if ( $dbkey !== '' && $dbkey[0] == ':' ) { |
571 | $exception = ( $this->createMalformedTitleException )( 'title-invalid-leading-colon', $text ); |
572 | throw $exception; |
573 | } |
574 | |
575 | // Fill fields |
576 | $parts['dbkey'] = $dbkey; |
577 | |
578 | // Check to ensure that the return value can be used to construct a TitleValue. |
579 | // All issues should in theory be caught above, this is here to enforce consistency. |
580 | try { |
581 | TitleValue::assertValidSpec( |
582 | $parts['namespace'], |
583 | $parts['dbkey'], |
584 | $parts['fragment'], |
585 | $parts['interwiki'] |
586 | ); |
587 | } catch ( InvalidArgumentException $ex ) { |
588 | $exception = ( $this->createMalformedTitleException )( 'title-invalid', $text, [ $ex->getMessage() ] ); |
589 | throw $exception; |
590 | } |
591 | |
592 | return $parts; |
593 | } |
594 | |
595 | /** |
596 | * Returns a simple regex that will match on characters and sequences invalid in titles. |
597 | * Note that this doesn't pick up many things that could be wrong with titles, but that |
598 | * replacing this regex with something valid will make many titles valid. |
599 | * Previously Title::getTitleInvalidRegex() |
600 | * |
601 | * @return string Regex string |
602 | * @since 1.25 |
603 | */ |
604 | public static function getTitleInvalidRegex() { |
605 | static $rxTc = false; |
606 | if ( !$rxTc ) { |
607 | # Matching titles will be held as illegal. |
608 | $rxTc = '/' . |
609 | # Any character not allowed is forbidden... |
610 | '[^' . Title::legalChars() . ']' . |
611 | # URL percent encoding sequences interfere with the ability |
612 | # to round-trip titles -- you can't link to them consistently. |
613 | '|%[0-9A-Fa-f]{2}' . |
614 | # XML/HTML character references produce similar issues. |
615 | '|&[A-Za-z0-9\x80-\xff]+;' . |
616 | '/S'; |
617 | } |
618 | |
619 | return $rxTc; |
620 | } |
621 | } |
622 | |
623 | /** @deprecated class alias since 1.41 */ |
624 | class_alias( MediaWikiTitleCodec::class, 'MediaWikiTitleCodec' ); |