Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
71.95% |
118 / 164 |
|
17.65% |
3 / 17 |
CRAP | |
0.00% |
0 / 1 |
| Title | |
71.95% |
118 / 164 |
|
17.65% |
3 / 17 |
186.39 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| newFromText | |
97.06% |
99 / 102 |
|
0.00% |
0 / 1 |
42 | |||
| getInterwiki | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getKey | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getDBkey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getPrefixedDBKey | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| getPrefixedText | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| getFullText | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getFullDBKey | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getNamespace | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getNamespaceName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getFragment | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| equals | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
| isSpecialPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| fixSpecialName | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| createFragmentTarget | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| newFromLinkTarget | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use UtfNormal\Validator as UtfNormalValidator; |
| 7 | use Wikimedia\Assert\Assert; |
| 8 | use Wikimedia\IPUtils; |
| 9 | use Wikimedia\Parsoid\Config\SiteConfig; |
| 10 | use Wikimedia\Parsoid\Core\LinkTarget; |
| 11 | use Wikimedia\Parsoid\Core\LinkTargetTrait; |
| 12 | |
| 13 | class Title implements LinkTarget { |
| 14 | use LinkTargetTrait; |
| 15 | |
| 16 | /** @var string */ |
| 17 | private $interwiki; |
| 18 | |
| 19 | /** @var int */ |
| 20 | private $namespaceId; |
| 21 | |
| 22 | /** @var string */ |
| 23 | private $namespaceName; |
| 24 | |
| 25 | /** @var string */ |
| 26 | private $dbkey; |
| 27 | |
| 28 | /** @var string */ |
| 29 | private $fragment; |
| 30 | |
| 31 | // cached values of prefixed title/key |
| 32 | private ?string $prefixedDBKey = null; |
| 33 | private ?string $prefixedText = null; |
| 34 | |
| 35 | /** |
| 36 | * @param string $interwiki Interwiki prefix, or empty string if none |
| 37 | * @param string $key Page DBkey (with underscores, not spaces) |
| 38 | * @param int $namespaceId |
| 39 | * @param string $namespaceName (with spaces, not underscores) |
| 40 | * @param ?string $fragment |
| 41 | */ |
| 42 | private function __construct( |
| 43 | string $interwiki, string $key, int $namespaceId, string $namespaceName, ?string $fragment = null |
| 44 | ) { |
| 45 | $this->interwiki = $interwiki; |
| 46 | $this->dbkey = $key; |
| 47 | $this->namespaceId = $namespaceId; |
| 48 | $this->namespaceName = $namespaceName; |
| 49 | $this->fragment = $fragment ?? ''; |
| 50 | } |
| 51 | |
| 52 | public static function newFromText( |
| 53 | string $title, SiteConfig $siteConfig, ?int $defaultNs = null |
| 54 | ): Title { |
| 55 | if ( $defaultNs === null ) { |
| 56 | $defaultNs = 0; |
| 57 | } |
| 58 | $origTitle = $title; |
| 59 | |
| 60 | // Title::newFromText() calls ::newFromTextThrow() which calls |
| 61 | // Sanitizer::decodeCharReferencesAndNormalize($title) here. |
| 62 | // We appear to ban char references in the title, see below. |
| 63 | |
| 64 | // This check appears to be Parsoid-specific, but mirrors a check |
| 65 | // below done in TitleParser::splitTitleString(). |
| 66 | if ( !mb_check_encoding( $title, 'UTF-8' ) ) { |
| 67 | throw new TitleException( "Bad UTF-8 in title \"$origTitle\"", 'title-invalid-utf8', $origTitle ); |
| 68 | } |
| 69 | |
| 70 | // Title::secureAndSplit() calls TitleParser::splitTitleString(), |
| 71 | // which the following code is from: |
| 72 | |
| 73 | // Strip Unicode bidi override characters. |
| 74 | $title = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $title ); |
| 75 | if ( $title === null ) { |
| 76 | throw new TitleException( "Bad UTF-8 in title \"$origTitle\"", 'title-invalid-utf8', $origTitle ); |
| 77 | } |
| 78 | |
| 79 | // Clean up whitespace |
| 80 | $title = preg_replace( |
| 81 | '/[ _\x{00A0}\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', |
| 82 | '_', $title |
| 83 | ); |
| 84 | // Trim _ from beginning and end |
| 85 | $title = trim( $title, '_' ); |
| 86 | |
| 87 | if ( str_contains( $title, \UtfNormal\Constants::UTF8_REPLACEMENT ) ) { |
| 88 | throw new TitleException( "Bad UTF-8 in title \"$title\"", 'title-invalid-utf8', $title ); |
| 89 | } |
| 90 | |
| 91 | // Initial colon indicates main namespace rather than specified default |
| 92 | // but should not create invalid {ns,title} pairs such as {0,Project:Foo} |
| 93 | if ( $title !== '' && $title[0] === ':' ) { |
| 94 | $title = ltrim( substr( $title, 1 ), '_' ); |
| 95 | $defaultNs = 0; |
| 96 | } |
| 97 | |
| 98 | if ( $title === '' ) { |
| 99 | throw new TitleException( 'Empty title', 'title-invalid-empty', $title ); |
| 100 | } |
| 101 | |
| 102 | $ns = $defaultNs; |
| 103 | $interwiki = null; |
| 104 | |
| 105 | # Namespace or interwiki prefix |
| 106 | $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; |
| 107 | // TitleParser::splitTitleString wraps a loop around the |
| 108 | // next section, to allow it to repeat this prefix processing if |
| 109 | // an interwiki prefix is found which points at the local wiki. |
| 110 | $m = []; |
| 111 | if ( preg_match( $prefixRegexp, $title, $m ) ) { |
| 112 | $p = $m[1]; |
| 113 | $pLower = mb_strtolower( $p ); |
| 114 | $nsId = $siteConfig->canonicalNamespaceId( $pLower ) ?? |
| 115 | $siteConfig->namespaceId( $pLower ); |
| 116 | if ( $nsId !== null ) { |
| 117 | $title = $m[2]; |
| 118 | $ns = $nsId; |
| 119 | # For Talk:X pages, check if X has a "namespace" prefix |
| 120 | if ( |
| 121 | $nsId === $siteConfig->canonicalNamespaceId( 'talk' ) && |
| 122 | preg_match( $prefixRegexp, $title, $x ) |
| 123 | ) { |
| 124 | $xLower = mb_strtolower( $x[1] ); |
| 125 | if ( $siteConfig->namespaceId( $xLower ) ) { |
| 126 | // Disallow Talk:File:x type titles. |
| 127 | throw new TitleException( |
| 128 | "Invalid Talk namespace title \"$origTitle\"", 'title-invalid-talk-namespace', $title |
| 129 | ); |
| 130 | } elseif ( $siteConfig->interwikiMapNoNamespaces()[$xLower] ?? null ) { |
| 131 | // Disallow Talk:Interwiki:x type titles. |
| 132 | throw new TitleException( |
| 133 | "Invalid Talk namespace title \"$origTitle\"", 'title-invalid-talk-namespace', $title |
| 134 | ); |
| 135 | } |
| 136 | } |
| 137 | } elseif ( $siteConfig->interwikiMapNoNamespaces()[$pLower] ?? null ) { |
| 138 | # Interwiki link |
| 139 | $title = $m[2]; |
| 140 | $interwiki = $pLower; |
| 141 | |
| 142 | # We don't check for a redundant interwiki prefix to the |
| 143 | # local wiki, like core does here in |
| 144 | # TitleParser::splitTitleString; |
| 145 | # core then does a `continue` to repeat the processing |
| 146 | |
| 147 | // If there's an initial colon after the interwiki, that also |
| 148 | // resets the default namespace |
| 149 | if ( $title !== '' && $title[0] === ':' ) { |
| 150 | $title = trim( substr( $title, 1 ), '_' ); |
| 151 | $ns = 0; |
| 152 | } |
| 153 | } |
| 154 | # If there's no recognized interwiki or namespace, |
| 155 | # then let the colon expression be part of the title |
| 156 | } |
| 157 | |
| 158 | $fragment = null; |
| 159 | $fragmentIndex = strpos( $title, '#' ); |
| 160 | if ( $fragmentIndex !== false ) { |
| 161 | $fragment = substr( $title, $fragmentIndex + 1 ); |
| 162 | $title = rtrim( substr( $title, 0, $fragmentIndex ), '_' ); |
| 163 | # TitleParser::splitTitleString replaces _ with spaces in |
| 164 | # $fragment here? |
| 165 | } |
| 166 | |
| 167 | // This is from TitleParser::getTitleInvalidRegex() |
| 168 | $illegalCharsRe = '/[^' . $siteConfig->legalTitleChars() . ']' |
| 169 | // URL percent encoding sequences interfere with the ability |
| 170 | // to round-trip titles -- you can't link to them consistently. |
| 171 | . '|%[0-9A-Fa-f]{2}' |
| 172 | // XML/HTML character references produce similar issues. |
| 173 | . '|&[A-Za-z0-9\x80-\xff]+;/S'; |
| 174 | if ( preg_match( $illegalCharsRe, $title ) ) { |
| 175 | throw new TitleException( |
| 176 | "Invalid characters in title \"$origTitle\"", 'title-invalid-characters', $title |
| 177 | ); |
| 178 | } |
| 179 | |
| 180 | // Pages with "/./" or "/../" appearing in the URLs will often be |
| 181 | // unreachable due to the way web browsers deal with 'relative' URLs. |
| 182 | // Also, they conflict with subpage syntax. Forbid them explicitly. |
| 183 | if ( str_contains( $title, '.' ) && ( |
| 184 | $title === '.' || $title === '..' || |
| 185 | str_starts_with( $title, './' ) || |
| 186 | str_starts_with( $title, '../' ) || |
| 187 | str_contains( $title, '/./' ) || |
| 188 | str_contains( $title, '/../' ) || |
| 189 | str_ends_with( $title, '/.' ) || |
| 190 | str_ends_with( $title, '/..' ) |
| 191 | ) ) { |
| 192 | throw new TitleException( |
| 193 | "Title \"$origTitle\" contains relative path components", 'title-invalid-relative', $title |
| 194 | ); |
| 195 | } |
| 196 | |
| 197 | // Magic tilde sequences? Nu-uh! |
| 198 | if ( str_contains( $title, '~~~' ) ) { |
| 199 | throw new TitleException( |
| 200 | "Title \"$origTitle\" contains ~~~", 'title-invalid-magic-tilde', $title |
| 201 | ); |
| 202 | } |
| 203 | |
| 204 | $maxLength = $ns === $siteConfig->canonicalNamespaceId( 'special' ) ? 512 : 255; |
| 205 | if ( strlen( $title ) > $maxLength ) { |
| 206 | throw new TitleException( |
| 207 | "Title \"$origTitle\" is too long", 'title-invalid-too-long', $title |
| 208 | ); |
| 209 | } |
| 210 | |
| 211 | # Normally, all wiki links are forced to have an initial capital letter so [[foo]] |
| 212 | # and [[Foo]] point to the same place. Don't force it for interwikis, since the |
| 213 | # other site might be case-sensitive. |
| 214 | if ( $interwiki === null ) { |
| 215 | $title = UtfNormalValidator::toNFC( $title ); |
| 216 | if ( $siteConfig->namespaceCase( $ns ) === 'first-letter' ) { |
| 217 | $title = $siteConfig->ucfirst( $title ); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | # Can't make a link to a namespace alone... "empty" local links can only be |
| 222 | # self-links with a fragment identifier. |
| 223 | if ( $title === '' && $interwiki === null && $ns !== $siteConfig->canonicalNamespaceId( '' ) ) { |
| 224 | throw new TitleException( 'Empty title', 'title-invalid-empty', $title ); |
| 225 | } |
| 226 | |
| 227 | // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. |
| 228 | // IP names are not allowed for accounts, and can only be referring to |
| 229 | // edits from the IP. Given '::' abbreviations and caps/lowercaps, |
| 230 | // there are numerous ways to present the same IP. Having sp:contribs scan |
| 231 | // them all is silly and having some show the edits and others not is |
| 232 | // inconsistent. Same for talk/userpages. Keep them normalized instead. |
| 233 | if ( $title !== '' && ( # T329690 |
| 234 | $ns === $siteConfig->canonicalNamespaceId( 'user' ) || |
| 235 | $ns === $siteConfig->canonicalNamespaceId( 'user_talk' ) |
| 236 | ) ) { |
| 237 | $title = IPUtils::sanitizeIP( $title ); |
| 238 | // IPUtils::sanitizeIP return null only for bad input |
| 239 | '@phan-var string $title'; |
| 240 | } |
| 241 | |
| 242 | // Any remaining initial :s are illegal. |
| 243 | if ( $title !== '' && $title[0] == ':' ) { |
| 244 | throw new TitleException( |
| 245 | 'Leading colon title', 'title-invalid-leading-colon', $title |
| 246 | ); |
| 247 | } |
| 248 | |
| 249 | // This is not in core's TitleParser::splitTitleString but matches |
| 250 | // mediawiki-title's newFromText. |
| 251 | if ( $ns === $siteConfig->canonicalNamespaceId( 'special' ) ) { |
| 252 | $title = self::fixSpecialName( $siteConfig, $title ); |
| 253 | } |
| 254 | |
| 255 | $namespaceName = $siteConfig->namespaceName( $ns ); |
| 256 | return new self( $interwiki ?? '', $title, $ns, $namespaceName, $fragment ); |
| 257 | } |
| 258 | |
| 259 | /** |
| 260 | * The interwiki component of this LinkTarget. |
| 261 | * This is the empty string if there is no interwiki component. |
| 262 | * |
| 263 | * @return string |
| 264 | */ |
| 265 | public function getInterwiki(): string { |
| 266 | return $this->interwiki; |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | * Get the DBkey, prefixed with interwiki prefix if any. |
| 271 | * This is Parsoid's convention, which differs from core; |
| 272 | * use ::getDBkey() for a method compatible with core's |
| 273 | * convention. |
| 274 | * |
| 275 | * @return string |
| 276 | * @see ::getDBkey() |
| 277 | * @deprecated since 0.20; use ::getDBkey() |
| 278 | */ |
| 279 | public function getKey(): string { |
| 280 | PHPUtils::deprecated( __METHOD__, "0.20" ); |
| 281 | if ( $this->interwiki ) { |
| 282 | return $this->interwiki . ':' . $this->dbkey; |
| 283 | } |
| 284 | return $this->dbkey; |
| 285 | } |
| 286 | |
| 287 | /** |
| 288 | * Get the main part of the link target, in canonical database form. |
| 289 | * |
| 290 | * The main part is the link target without namespace prefix or hash fragment. |
| 291 | * The database form means that spaces become underscores, this is also |
| 292 | * used for URLs. |
| 293 | * |
| 294 | * @return string |
| 295 | */ |
| 296 | public function getDBkey(): string { |
| 297 | return $this->dbkey; |
| 298 | } |
| 299 | |
| 300 | /** |
| 301 | * Get the prefixed DBkey |
| 302 | * @return string |
| 303 | */ |
| 304 | public function getPrefixedDBKey(): string { |
| 305 | if ( $this->prefixedDBKey === null ) { |
| 306 | $this->prefixedDBKey = $this->interwiki === '' ? '' : |
| 307 | ( $this->interwiki . ':' ); |
| 308 | $this->prefixedDBKey .= $this->namespaceName === '' ? '' : |
| 309 | ( strtr( $this->namespaceName, ' ', '_' ) . ':' ); |
| 310 | $this->prefixedDBKey .= $this->getDBkey(); |
| 311 | } |
| 312 | return $this->prefixedDBKey; |
| 313 | } |
| 314 | |
| 315 | /** |
| 316 | * Get the prefixed text |
| 317 | * @return string |
| 318 | */ |
| 319 | public function getPrefixedText(): string { |
| 320 | if ( $this->prefixedText === null ) { |
| 321 | $this->prefixedText = $this->interwiki === '' ? '' : |
| 322 | ( $this->interwiki . ':' ); |
| 323 | $this->prefixedText .= $this->namespaceName === '' ? '' : |
| 324 | ( $this->namespaceName . ':' ); |
| 325 | $this->prefixedText .= $this->getText(); |
| 326 | } |
| 327 | return $this->prefixedText; |
| 328 | } |
| 329 | |
| 330 | /** |
| 331 | * Get the prefixed title with spaces, plus any fragment |
| 332 | * (part beginning with '#') |
| 333 | * |
| 334 | * @return string The prefixed title, with spaces and the fragment, including '#' |
| 335 | */ |
| 336 | public function getFullText(): string { |
| 337 | $text = $this->getPrefixedText(); |
| 338 | if ( $this->hasFragment() ) { |
| 339 | $text .= '#' . $this->getFragment(); |
| 340 | } |
| 341 | return $text; |
| 342 | } |
| 343 | |
| 344 | /** |
| 345 | * Get the prefixed title with underscores, plus any fragment |
| 346 | * (part beginning with '#') |
| 347 | * |
| 348 | * @return string The prefixed title, with underscores, and the fragment, including '#' |
| 349 | * @note This method is Parsoid-only and doesn't exist in mediawiki-core's |
| 350 | * Title class. |
| 351 | */ |
| 352 | public function getFullDBKey(): string { |
| 353 | $dbkey = $this->getPrefixedDBKey(); |
| 354 | if ( $this->hasFragment() ) { |
| 355 | $dbkey .= '#' . $this->getFragment(); |
| 356 | } |
| 357 | return $dbkey; |
| 358 | } |
| 359 | |
| 360 | /** |
| 361 | * Get the namespace ID |
| 362 | * @return int |
| 363 | */ |
| 364 | public function getNamespace(): int { |
| 365 | return $this->namespaceId; |
| 366 | } |
| 367 | |
| 368 | /** |
| 369 | * Get the human-readable name for the namespace |
| 370 | * (with spaces, not underscores). |
| 371 | * @return string |
| 372 | */ |
| 373 | public function getNamespaceName(): string { |
| 374 | return $this->namespaceName; |
| 375 | } |
| 376 | |
| 377 | /** |
| 378 | * Get the link fragment in text form (i.e. the bit after the hash `#`). |
| 379 | * |
| 380 | * @return string link fragment |
| 381 | */ |
| 382 | public function getFragment(): string { |
| 383 | return $this->fragment ?? ''; |
| 384 | } |
| 385 | |
| 386 | /** |
| 387 | * Compare with another title. |
| 388 | * |
| 389 | * @param Title $title |
| 390 | * @return bool |
| 391 | */ |
| 392 | public function equals( Title $title ) { |
| 393 | return $this->getNamespace() === $title->getNamespace() && |
| 394 | $this->getInterwiki() === $title->getInterwiki() && |
| 395 | $this->getDBkey() === $title->getDBkey(); |
| 396 | } |
| 397 | |
| 398 | /** |
| 399 | * Returns true if this is a special page. |
| 400 | * |
| 401 | * @return bool |
| 402 | */ |
| 403 | public function isSpecialPage() { |
| 404 | return $this->getNamespace() === -1; // NS_SPECIAL; |
| 405 | } |
| 406 | |
| 407 | /** |
| 408 | * Use the default special page alias. |
| 409 | * |
| 410 | * @param SiteConfig $siteConfig |
| 411 | * @param string $title |
| 412 | * @return string |
| 413 | */ |
| 414 | public static function fixSpecialName( |
| 415 | SiteConfig $siteConfig, string $title |
| 416 | ): string { |
| 417 | $parts = explode( '/', $title, 2 ); |
| 418 | $specialName = $siteConfig->specialPageLocalName( $parts[0] ); |
| 419 | if ( $specialName !== null ) { |
| 420 | $parts[0] = $specialName; |
| 421 | $title = implode( '/', $parts ); |
| 422 | } |
| 423 | return $title; |
| 424 | } |
| 425 | |
| 426 | /** |
| 427 | * Create a new LinkTarget with a different fragment on the same page. |
| 428 | * |
| 429 | * It is expected that the same type of object will be returned, but the |
| 430 | * only requirement is that it is a LinkTarget. |
| 431 | * |
| 432 | * @param string $fragment The fragment override, or "" to remove it. |
| 433 | * |
| 434 | * @return self |
| 435 | */ |
| 436 | public function createFragmentTarget( string $fragment ) { |
| 437 | return new self( $this->interwiki, $this->dbkey, $this->namespaceId, $this->namespaceName, $fragment ?: null ); |
| 438 | } |
| 439 | |
| 440 | /** |
| 441 | * Convert LinkTarget from core (or other implementation) into a |
| 442 | * Parsoid Title. |
| 443 | * |
| 444 | * @param LinkTarget $linkTarget |
| 445 | * @return self |
| 446 | */ |
| 447 | public static function newFromLinkTarget( |
| 448 | LinkTarget $linkTarget, SiteConfig $siteConfig |
| 449 | ) { |
| 450 | if ( $linkTarget instanceof Title ) { |
| 451 | return $linkTarget; |
| 452 | } |
| 453 | $ns = $linkTarget->getNamespace(); |
| 454 | $namespaceName = $siteConfig->namespaceName( $ns ); |
| 455 | Assert::invariant( |
| 456 | $namespaceName !== null, |
| 457 | "Badtitle ({$linkTarget}) in unknown namespace ({$ns})" |
| 458 | ); |
| 459 | return new self( |
| 460 | $linkTarget->getInterwiki(), |
| 461 | $linkTarget->getDBkey(), |
| 462 | $linkTarget->getNamespace(), |
| 463 | $namespaceName, |
| 464 | $linkTarget->getFragment() |
| 465 | ); |
| 466 | } |
| 467 | } |