Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
73.29% |
118 / 161 |
|
17.65% |
3 / 17 |
CRAP | |
0.00% |
0 / 1 |
Title | |
73.29% |
118 / 161 |
|
17.65% |
3 / 17 |
170.76 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
newFromText | |
97.00% |
97 / 100 |
|
0.00% |
0 / 1 |
42 | |||
getInterwiki | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getKey | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getDBkey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPrefixedDBKey | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
getPrefixedText | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
getFullText | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getFullDBKey | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getNamespace | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNamespaceName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFragment | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
equals | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isSpecialPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fixSpecialName | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
createFragmentTarget | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
newFromLinkTarget | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\IPUtils; |
8 | use Wikimedia\Parsoid\Config\SiteConfig; |
9 | use Wikimedia\Parsoid\Core\LinkTarget; |
10 | use Wikimedia\Parsoid\Core\LinkTargetTrait; |
11 | |
12 | class Title implements LinkTarget { |
13 | use LinkTargetTrait; |
14 | |
15 | /** @var string */ |
16 | private $interwiki; |
17 | |
18 | /** @var int */ |
19 | private $namespaceId; |
20 | |
21 | /** @var string */ |
22 | private $namespaceName; |
23 | |
24 | /** @var string */ |
25 | private $dbkey; |
26 | |
27 | /** @var string */ |
28 | private $fragment; |
29 | |
30 | // cached values of prefixed title/key |
31 | private ?string $prefixedDBKey = null; |
32 | private ?string $prefixedText = null; |
33 | |
34 | /** |
35 | * @param string $interwiki Interwiki prefix, or empty string if none |
36 | * @param string $key Page DBkey (with underscores, not spaces) |
37 | * @param int $namespaceId |
38 | * @param string $namespaceName (with spaces, not underscores) |
39 | * @param ?string $fragment |
40 | */ |
41 | private function __construct( |
42 | string $interwiki, string $key, int $namespaceId, string $namespaceName, ?string $fragment = null |
43 | ) { |
44 | $this->interwiki = $interwiki; |
45 | $this->dbkey = $key; |
46 | $this->namespaceId = $namespaceId; |
47 | $this->namespaceName = $namespaceName; |
48 | $this->fragment = $fragment ?? ''; |
49 | } |
50 | |
51 | public static function newFromText( |
52 | string $title, SiteConfig $siteConfig, ?int $defaultNs = null |
53 | ): Title { |
54 | if ( $defaultNs === null ) { |
55 | $defaultNs = 0; |
56 | } |
57 | $origTitle = $title; |
58 | |
59 | // Title::newFromText() calls ::newFromTextThrow() which calls |
60 | // Sanitizer::decodeCharReferencesAndNormalize($title) here. |
61 | // We appear to ban char references in the title, see below. |
62 | |
63 | // This check appears to be Parsoid-specific, but mirrors a check |
64 | // below done in TitleParser::splitTitleString(). |
65 | if ( !mb_check_encoding( $title, 'UTF-8' ) ) { |
66 | throw new TitleException( "Bad UTF-8 in title \"$origTitle\"", 'title-invalid-utf8', $origTitle ); |
67 | } |
68 | |
69 | // Title::secureAndSplit() calls TitleParser::splitTitleString(), |
70 | // which the following code is from: |
71 | |
72 | // Strip Unicode bidi override characters. |
73 | $title = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $title ); |
74 | if ( $title === null ) { |
75 | throw new TitleException( "Bad UTF-8 in title \"$origTitle\"", 'title-invalid-utf8', $origTitle ); |
76 | } |
77 | |
78 | // Clean up whitespace |
79 | $title = preg_replace( |
80 | '/[ _\x{00A0}\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', |
81 | '_', $title |
82 | ); |
83 | // Trim _ from beginning and end |
84 | $title = trim( $title, '_' ); |
85 | |
86 | if ( str_contains( $title, \UtfNormal\Constants::UTF8_REPLACEMENT ) ) { |
87 | throw new TitleException( "Bad UTF-8 in title \"$title\"", 'title-invalid-utf8', $title ); |
88 | } |
89 | |
90 | // Initial colon indicates main namespace rather than specified default |
91 | // but should not create invalid {ns,title} pairs such as {0,Project:Foo} |
92 | if ( $title !== '' && $title[0] === ':' ) { |
93 | $title = ltrim( substr( $title, 1 ), '_' ); |
94 | $defaultNs = 0; |
95 | } |
96 | |
97 | if ( $title === '' ) { |
98 | throw new TitleException( 'Empty title', 'title-invalid-empty', $title ); |
99 | } |
100 | |
101 | $ns = $defaultNs; |
102 | $interwiki = null; |
103 | |
104 | # Namespace or interwiki prefix |
105 | $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; |
106 | // TitleParser::splitTitleString wraps a loop around the |
107 | // next section, to allow it to repeat this prefix processing if |
108 | // an interwiki prefix is found which points at the local wiki. |
109 | $m = []; |
110 | if ( preg_match( $prefixRegexp, $title, $m ) ) { |
111 | $p = $m[1]; |
112 | $pLower = mb_strtolower( $p ); |
113 | $nsId = $siteConfig->canonicalNamespaceId( $pLower ) ?? |
114 | $siteConfig->namespaceId( $pLower ); |
115 | if ( $nsId !== null ) { |
116 | $title = $m[2]; |
117 | $ns = $nsId; |
118 | # For Talk:X pages, check if X has a "namespace" prefix |
119 | if ( |
120 | $nsId === $siteConfig->canonicalNamespaceId( 'talk' ) && |
121 | preg_match( $prefixRegexp, $title, $x ) |
122 | ) { |
123 | $xLower = mb_strtolower( $x[1] ); |
124 | if ( $siteConfig->namespaceId( $xLower ) ) { |
125 | // Disallow Talk:File:x type titles. |
126 | throw new TitleException( |
127 | "Invalid Talk namespace title \"$origTitle\"", 'title-invalid-talk-namespace', $title |
128 | ); |
129 | } elseif ( $siteConfig->interwikiMapNoNamespaces()[$xLower] ?? null ) { |
130 | // Disallow Talk:Interwiki:x type titles. |
131 | throw new TitleException( |
132 | "Invalid Talk namespace title \"$origTitle\"", 'title-invalid-talk-namespace', $title |
133 | ); |
134 | } |
135 | } |
136 | } elseif ( $siteConfig->interwikiMapNoNamespaces()[$pLower] ?? null ) { |
137 | # Interwiki link |
138 | $title = $m[2]; |
139 | $interwiki = $pLower; |
140 | |
141 | # We don't check for a redundant interwiki prefix to the |
142 | # local wiki, like core does here in |
143 | # TitleParser::splitTitleString; |
144 | # core then does a `continue` to repeat the processing |
145 | |
146 | // If there's an initial colon after the interwiki, that also |
147 | // resets the default namespace |
148 | if ( $title !== '' && $title[0] === ':' ) { |
149 | $title = trim( substr( $title, 1 ), '_' ); |
150 | $ns = 0; |
151 | } |
152 | } |
153 | # If there's no recognized interwiki or namespace, |
154 | # then let the colon expression be part of the title |
155 | } |
156 | |
157 | $fragment = null; |
158 | $fragmentIndex = strpos( $title, '#' ); |
159 | if ( $fragmentIndex !== false ) { |
160 | $fragment = substr( $title, $fragmentIndex + 1 ); |
161 | $title = rtrim( substr( $title, 0, $fragmentIndex ), '_' ); |
162 | # TitleParser::splitTitleString replaces _ with spaces in |
163 | # $fragment here? |
164 | } |
165 | |
166 | // This is from TitleParser::getTitleInvalidRegex() |
167 | $illegalCharsRe = '/[^' . $siteConfig->legalTitleChars() . ']' |
168 | // URL percent encoding sequences interfere with the ability |
169 | // to round-trip titles -- you can't link to them consistently. |
170 | . '|%[0-9A-Fa-f]{2}' |
171 | // XML/HTML character references produce similar issues. |
172 | . '|&[A-Za-z0-9\x80-\xff]+;/S'; |
173 | if ( preg_match( $illegalCharsRe, $title ) ) { |
174 | throw new TitleException( |
175 | "Invalid characters in title \"$origTitle\"", 'title-invalid-characters', $title |
176 | ); |
177 | } |
178 | |
179 | // Pages with "/./" or "/../" appearing in the URLs will often be |
180 | // unreachable due to the way web browsers deal with 'relative' URLs. |
181 | // Also, they conflict with subpage syntax. Forbid them explicitly. |
182 | if ( str_contains( $title, '.' ) && ( |
183 | $title === '.' || $title === '..' || |
184 | str_starts_with( $title, './' ) || |
185 | str_starts_with( $title, '../' ) || |
186 | str_contains( $title, '/./' ) || |
187 | str_contains( $title, '/../' ) || |
188 | str_ends_with( $title, '/.' ) || |
189 | str_ends_with( $title, '/..' ) |
190 | ) ) { |
191 | throw new TitleException( |
192 | "Title \"$origTitle\" contains relative path components", 'title-invalid-relative', $title |
193 | ); |
194 | } |
195 | |
196 | // Magic tilde sequences? Nu-uh! |
197 | if ( str_contains( $title, '~~~' ) ) { |
198 | throw new TitleException( |
199 | "Title \"$origTitle\" contains ~~~", 'title-invalid-magic-tilde', $title |
200 | ); |
201 | } |
202 | |
203 | $maxLength = $ns === $siteConfig->canonicalNamespaceId( 'special' ) ? 512 : 255; |
204 | if ( strlen( $title ) > $maxLength ) { |
205 | throw new TitleException( |
206 | "Title \"$origTitle\" is too long", 'title-invalid-too-long', $title |
207 | ); |
208 | } |
209 | |
210 | # Normally, all wiki links are forced to have an initial capital letter so [[foo]] |
211 | # and [[Foo]] point to the same place. Don't force it for interwikis, since the |
212 | # other site might be case-sensitive. |
213 | if ( $interwiki === null && $siteConfig->namespaceCase( $ns ) === 'first-letter' ) { |
214 | $title = $siteConfig->ucfirst( $title ); |
215 | } |
216 | |
217 | # Can't make a link to a namespace alone... "empty" local links can only be |
218 | # self-links with a fragment identifier. |
219 | if ( $title === '' && $interwiki === null && $ns !== $siteConfig->canonicalNamespaceId( '' ) ) { |
220 | throw new TitleException( 'Empty title', 'title-invalid-empty', $title ); |
221 | } |
222 | |
223 | // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. |
224 | // IP names are not allowed for accounts, and can only be referring to |
225 | // edits from the IP. Given '::' abbreviations and caps/lowercaps, |
226 | // there are numerous ways to present the same IP. Having sp:contribs scan |
227 | // them all is silly and having some show the edits and others not is |
228 | // inconsistent. Same for talk/userpages. Keep them normalized instead. |
229 | if ( $title !== '' && ( # T329690 |
230 | $ns === $siteConfig->canonicalNamespaceId( 'user' ) || |
231 | $ns === $siteConfig->canonicalNamespaceId( 'user_talk' ) |
232 | ) ) { |
233 | $title = IPUtils::sanitizeIP( $title ); |
234 | // IPUtils::sanitizeIP return null only for bad input |
235 | '@phan-var string $title'; |
236 | } |
237 | |
238 | // Any remaining initial :s are illegal. |
239 | if ( $title !== '' && $title[0] == ':' ) { |
240 | throw new TitleException( |
241 | 'Leading colon title', 'title-invalid-leading-colon', $title |
242 | ); |
243 | } |
244 | |
245 | // This is not in core's TitleParser::splitTitleString but matches |
246 | // mediawiki-title's newFromText. |
247 | if ( $ns === $siteConfig->canonicalNamespaceId( 'special' ) ) { |
248 | $title = self::fixSpecialName( $siteConfig, $title ); |
249 | } |
250 | |
251 | $namespaceName = $siteConfig->namespaceName( $ns ); |
252 | return new self( $interwiki ?? '', $title, $ns, $namespaceName, $fragment ); |
253 | } |
254 | |
255 | /** |
256 | * The interwiki component of this LinkTarget. |
257 | * This is the empty string if there is no interwiki component. |
258 | * |
259 | * @return string |
260 | */ |
261 | public function getInterwiki(): string { |
262 | return $this->interwiki; |
263 | } |
264 | |
265 | /** |
266 | * Get the DBkey, prefixed with interwiki prefix if any. |
267 | * This is Parsoid's convention, which differs from core; |
268 | * use ::getDBkey() for a method compatible with core's |
269 | * convention. |
270 | * |
271 | * @return string |
272 | * @see ::getDBkey() |
273 | * @deprecated |
274 | */ |
275 | public function getKey(): string { |
276 | if ( $this->interwiki ) { |
277 | return $this->interwiki . ':' . $this->dbkey; |
278 | } |
279 | return $this->dbkey; |
280 | } |
281 | |
282 | /** |
283 | * Get the main part of the link target, in canonical database form. |
284 | * |
285 | * The main part is the link target without namespace prefix or hash fragment. |
286 | * The database form means that spaces become underscores, this is also |
287 | * used for URLs. |
288 | * |
289 | * @return string |
290 | */ |
291 | public function getDBkey(): string { |
292 | return $this->dbkey; |
293 | } |
294 | |
295 | /** |
296 | * Get the prefixed DBkey |
297 | * @return string |
298 | */ |
299 | public function getPrefixedDBKey(): string { |
300 | if ( $this->prefixedDBKey === null ) { |
301 | $this->prefixedDBKey = $this->interwiki === '' ? '' : |
302 | ( $this->interwiki . ':' ); |
303 | $this->prefixedDBKey .= $this->namespaceName === '' ? '' : |
304 | ( strtr( $this->namespaceName, ' ', '_' ) . ':' ); |
305 | $this->prefixedDBKey .= $this->getDBkey(); |
306 | } |
307 | return $this->prefixedDBKey; |
308 | } |
309 | |
310 | /** |
311 | * Get the prefixed text |
312 | * @return string |
313 | */ |
314 | public function getPrefixedText(): string { |
315 | if ( $this->prefixedText === null ) { |
316 | $this->prefixedText = $this->interwiki === '' ? '' : |
317 | ( $this->interwiki . ':' ); |
318 | $this->prefixedText .= $this->namespaceName === '' ? '' : |
319 | ( $this->namespaceName . ':' ); |
320 | $this->prefixedText .= $this->getText(); |
321 | } |
322 | return $this->prefixedText; |
323 | } |
324 | |
325 | /** |
326 | * Get the prefixed title with spaces, plus any fragment |
327 | * (part beginning with '#') |
328 | * |
329 | * @return string The prefixed title, with spaces and the fragment, including '#' |
330 | */ |
331 | public function getFullText(): string { |
332 | $text = $this->getPrefixedText(); |
333 | if ( $this->hasFragment() ) { |
334 | $text .= '#' . $this->getFragment(); |
335 | } |
336 | return $text; |
337 | } |
338 | |
339 | /** |
340 | * Get the prefixed title with underscores, plus any fragment |
341 | * (part beginning with '#') |
342 | * |
343 | * @return string The prefixed title, with underscores, and the fragment, including '#' |
344 | * @note This method is Parsoid-only and doesn't exist in mediawiki-core's |
345 | * Title class. |
346 | */ |
347 | public function getFullDBKey(): string { |
348 | $dbkey = $this->getPrefixedDBKey(); |
349 | if ( $this->hasFragment() ) { |
350 | $dbkey .= '#' . $this->getFragment(); |
351 | } |
352 | return $dbkey; |
353 | } |
354 | |
355 | /** |
356 | * Get the namespace ID |
357 | * @return int |
358 | */ |
359 | public function getNamespace(): int { |
360 | return $this->namespaceId; |
361 | } |
362 | |
363 | /** |
364 | * Get the human-readable name for the namespace |
365 | * (with spaces, not underscores). |
366 | * @return string |
367 | */ |
368 | public function getNamespaceName(): string { |
369 | return $this->namespaceName; |
370 | } |
371 | |
372 | /** |
373 | * Get the link fragment in text form (i.e. the bit after the hash `#`). |
374 | * |
375 | * @return string link fragment |
376 | */ |
377 | public function getFragment(): string { |
378 | return $this->fragment ?? ''; |
379 | } |
380 | |
381 | /** |
382 | * Compare with another title. |
383 | * |
384 | * @param Title $title |
385 | * @return bool |
386 | */ |
387 | public function equals( Title $title ) { |
388 | return $this->getNamespace() === $title->getNamespace() && |
389 | $this->getInterwiki() === $title->getInterwiki() && |
390 | $this->getDBkey() === $title->getDBkey(); |
391 | } |
392 | |
393 | /** |
394 | * Returns true if this is a special page. |
395 | * |
396 | * @return bool |
397 | */ |
398 | public function isSpecialPage() { |
399 | return $this->getNamespace() === -1; // NS_SPECIAL; |
400 | } |
401 | |
402 | /** |
403 | * Use the default special page alias. |
404 | * |
405 | * @param SiteConfig $siteConfig |
406 | * @param string $title |
407 | * @return string |
408 | */ |
409 | public static function fixSpecialName( |
410 | SiteConfig $siteConfig, string $title |
411 | ): string { |
412 | $parts = explode( '/', $title, 2 ); |
413 | $specialName = $siteConfig->specialPageLocalName( $parts[0] ); |
414 | if ( $specialName !== null ) { |
415 | $parts[0] = $specialName; |
416 | $title = implode( '/', $parts ); |
417 | } |
418 | return $title; |
419 | } |
420 | |
421 | /** |
422 | * Create a new LinkTarget with a different fragment on the same page. |
423 | * |
424 | * It is expected that the same type of object will be returned, but the |
425 | * only requirement is that it is a LinkTarget. |
426 | * |
427 | * @param string $fragment The fragment override, or "" to remove it. |
428 | * |
429 | * @return self |
430 | */ |
431 | public function createFragmentTarget( string $fragment ) { |
432 | return new self( $this->interwiki, $this->dbkey, $this->namespaceId, $this->namespaceName, $fragment ?: null ); |
433 | } |
434 | |
435 | /** |
436 | * Convert LinkTarget from core (or other implementation) into a |
437 | * Parsoid Title. |
438 | * |
439 | * @param LinkTarget $linkTarget |
440 | * @return self |
441 | */ |
442 | public static function newFromLinkTarget( |
443 | LinkTarget $linkTarget, SiteConfig $siteConfig |
444 | ) { |
445 | if ( $linkTarget instanceof Title ) { |
446 | return $linkTarget; |
447 | } |
448 | $ns = $linkTarget->getNamespace(); |
449 | $namespaceName = $siteConfig->namespaceName( $ns ); |
450 | Assert::invariant( |
451 | $namespaceName !== null, |
452 | "Badtitle ({$linkTarget}) in unknown namespace ({$ns})" |
453 | ); |
454 | return new self( |
455 | $linkTarget->getInterwiki(), |
456 | $linkTarget->getDBkey(), |
457 | $linkTarget->getNamespace(), |
458 | $namespaceName, |
459 | $linkTarget->getFragment() |
460 | ); |
461 | } |
462 | } |