Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
81.42% |
276 / 339 |
|
67.80% |
40 / 59 |
CRAP | |
0.00% |
0 / 1 |
| SiteConfig | |
81.42% |
276 / 339 |
|
67.80% |
40 / 59 |
192.51 | |
0.00% |
0 / 1 |
| __construct | |
56.25% |
9 / 16 |
|
0.00% |
0 / 1 |
5.34 | |||
| reset | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| combineRegexArrays | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| addNamespace | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
| detectFeatures | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| hasVideoInfo | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getCustomSiteConfigFileName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| loadSiteData | |
95.56% |
86 / 90 |
|
0.00% |
0 / 1 |
22 | |||
| galleryOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| allowedExternalImagePrefixes | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| determineArticlePath | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
4.06 | |||
| baseURI | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| relativeLinkPrefix | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| canonicalNamespaceId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| namespaceId | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| namespaceName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| namespaceHasSubpages | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| namespaceCase | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| specialPageLocalName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| magicLinkEnabled | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| interwikiMagic | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| interwikiMap | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| iwp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| legalTitleChars | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| linkPrefixRegex | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
| linkTrail | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| langBcp47 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| mainpage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| mainPageLinkTarget | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getMWConfigValue | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
5.05 | |||
| rtl | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| langConverterEnabledBcp47 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| script | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| scriptpath | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| server | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| exportMetadataToHeadBcp47 | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
| redirectRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| categoryRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| bswRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| timezoneOffset | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| variantsFor | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| widthOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getVariableIDs | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| updateFunctionSynonym | |
97.67% |
42 / 43 |
|
0.00% |
0 / 1 |
6 | |||
| getMagicWords | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getMagicWordMatcher | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getParameterizedAliasMatcher | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
| ensureExtensionTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getNonNativeExtensionTags | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getMaxTemplateDepth | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getSpecialNSAliases | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| getSpecialPageAliases | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| getProtocols | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| metrics | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| incrementCounter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| observeTiming | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getNoFollowConfig | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| getExternalLinkTarget | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | declare( strict_types = 1 ); |
| 4 | |
| 5 | namespace Wikimedia\Parsoid\Config\Api; |
| 6 | |
| 7 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
| 8 | use Wikimedia\Bcp47Code\Bcp47Code; |
| 9 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
| 10 | use Wikimedia\Parsoid\Config\StubMetadataCollector; |
| 11 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
| 12 | use Wikimedia\Parsoid\DOM\Document; |
| 13 | use Wikimedia\Parsoid\Mocks\MockMetrics; |
| 14 | use Wikimedia\Parsoid\Utils\ConfigUtils; |
| 15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 16 | use Wikimedia\Parsoid\Utils\Title; |
| 17 | use Wikimedia\Parsoid\Utils\UrlUtils; |
| 18 | use Wikimedia\Parsoid\Utils\Utils; |
| 19 | |
| 20 | /** |
| 21 | * SiteConfig via MediaWiki's Action API |
| 22 | * |
| 23 | * Note this is intended for testing, not performance. |
| 24 | */ |
| 25 | class SiteConfig extends ISiteConfig { |
| 26 | |
| 27 | /** @var ApiHelper */ |
| 28 | private $api; |
| 29 | |
| 30 | /** @var array|null */ |
| 31 | private $siteData; |
| 32 | |
| 33 | /** @var array|null */ |
| 34 | private $protocols; |
| 35 | |
| 36 | /** @var string|null */ |
| 37 | private $baseUri; |
| 38 | |
| 39 | /** @var string|null */ |
| 40 | private $relativeLinkPrefix; |
| 41 | |
| 42 | /** @var string */ |
| 43 | private $savedCategoryRegexp; |
| 44 | |
| 45 | /** @var string */ |
| 46 | private $savedRedirectRegexp; |
| 47 | |
| 48 | /** @var string */ |
| 49 | private $savedBswRegexp; |
| 50 | |
| 51 | /** @var array<int,string> */ |
| 52 | protected $nsNames = []; |
| 53 | |
| 54 | /** @var array<int,string> */ |
| 55 | protected $nsCase = []; |
| 56 | |
| 57 | /** @var array<string,int> */ |
| 58 | protected $nsIds = []; |
| 59 | |
| 60 | /** @var array<string,int> */ |
| 61 | protected $nsCanon = []; |
| 62 | |
| 63 | /** @var array<int,bool> */ |
| 64 | protected $nsWithSubpages = []; |
| 65 | |
| 66 | /** @var array<string,string> */ |
| 67 | private $specialPageNames = []; |
| 68 | |
| 69 | /** @var array */ |
| 70 | private $specialPageAliases = []; |
| 71 | |
| 72 | /** @var array|null */ |
| 73 | private $interwikiMap; |
| 74 | |
| 75 | /** @var array<string,array>|null Keys are stored as lowercased BCP-47 code strings */ |
| 76 | private $variants; |
| 77 | |
| 78 | /** @var array<string,bool>|null Keys are stored as lowercased BCP-47 code strings */ |
| 79 | private $langConverterEnabled; |
| 80 | |
| 81 | /** @var array|null */ |
| 82 | private $apiMagicWords; |
| 83 | |
| 84 | /** @var array|null */ |
| 85 | private $paramMWs; |
| 86 | |
| 87 | /** @var array|null */ |
| 88 | private $apiVariables; |
| 89 | |
| 90 | /** @var array|null */ |
| 91 | private $apiFunctionHooks; |
| 92 | |
| 93 | /** @var array|null */ |
| 94 | private $allMWs; |
| 95 | |
| 96 | /** @var array|null */ |
| 97 | private $extensionTags; |
| 98 | |
| 99 | /** @var int|null */ |
| 100 | private $widthOption; |
| 101 | |
| 102 | /** @var int */ |
| 103 | private $maxDepth = 40; |
| 104 | |
| 105 | private bool $featureDetectionDone = false; |
| 106 | private bool $hasVideoInfo = false; |
| 107 | |
| 108 | /** If set, generate experimental Parsoid HTML v3 parser function output */ |
| 109 | private bool $v3pf; |
| 110 | |
| 111 | /** @var string[] Base parameters for a siteinfo query */ |
| 112 | public const SITE_CONFIG_QUERY_PARAMS = [ |
| 113 | 'action' => 'query', |
| 114 | 'meta' => 'siteinfo', |
| 115 | 'siprop' => 'general|protocols|namespaces|namespacealiases|magicwords|interwikimap|' |
| 116 | . 'languagevariants|defaultoptions|specialpagealiases|extensiontags|' |
| 117 | . 'functionhooks|variables', |
| 118 | ]; |
| 119 | |
| 120 | public function __construct( ApiHelper $api, array $opts ) { |
| 121 | parent::__construct(); |
| 122 | |
| 123 | $this->api = $api; |
| 124 | |
| 125 | $this->linterEnabled = (bool)( $opts['linting'] ?? false ); |
| 126 | $this->addHTMLTemplateParameters = (bool)( $opts['addHTMLTemplateParameters'] ?? false ); |
| 127 | |
| 128 | if ( isset( $opts['maxDepth'] ) ) { |
| 129 | $this->maxDepth = (int)$opts['maxDepth']; |
| 130 | } |
| 131 | |
| 132 | $this->setLogger( $opts['logger'] ?? self::createLogger() ); |
| 133 | |
| 134 | if ( isset( $opts['wt2htmlLimits'] ) ) { |
| 135 | $this->wt2htmlLimits = array_merge( |
| 136 | $this->wt2htmlLimits, $opts['wt2htmlLimits'] |
| 137 | ); |
| 138 | } |
| 139 | if ( isset( $opts['html2wtLimits'] ) ) { |
| 140 | $this->html2wtLimits = array_merge( |
| 141 | $this->html2wtLimits, $opts['html2wtLimits'] |
| 142 | ); |
| 143 | } |
| 144 | |
| 145 | $this->v3pf = $opts['v3pf'] ?? false; |
| 146 | } |
| 147 | |
| 148 | protected function reset() { |
| 149 | $this->siteData = null; |
| 150 | $this->baseUri = null; |
| 151 | $this->relativeLinkPrefix = null; |
| 152 | // Superclass value reset since parsertests reuse SiteConfig objects |
| 153 | $this->linkTrailRegex = false; |
| 154 | $this->mwAliases = null; |
| 155 | $this->interwikiMapNoNamespaces = null; |
| 156 | $this->iwMatcher = null; |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * Combine sets of regex fragments |
| 161 | * @param string[][] $res |
| 162 | * - $regexes[0] are case-insensitive regex fragments. Must not be empty. |
| 163 | * - $regexes[1] are case-sensitive regex fragments. Must not be empty. |
| 164 | * @return string Combined regex fragment. May be an alternation. Assumes |
| 165 | * the outer environment is case-sensitive. |
| 166 | */ |
| 167 | private function combineRegexArrays( array $res ): string { |
| 168 | if ( $res ) { |
| 169 | if ( isset( $res[0] ) ) { |
| 170 | $res[0] = '(?i:' . implode( '|', $res[0] ) . ')'; |
| 171 | } |
| 172 | if ( isset( $res[1] ) ) { |
| 173 | $res[1] = '(?:' . implode( '|', $res[1] ) . ')'; |
| 174 | } |
| 175 | return implode( '|', $res ); |
| 176 | } |
| 177 | // None? Return a failing regex |
| 178 | return '(?!)'; |
| 179 | } |
| 180 | |
| 181 | /** |
| 182 | * Add a new namespace to the config |
| 183 | * |
| 184 | * Protected access to let mocks and parser tests versions |
| 185 | * add new namespaces as required. |
| 186 | * |
| 187 | * @param array $ns Namespace info |
| 188 | */ |
| 189 | protected function addNamespace( array $ns ): void { |
| 190 | $id = (int)$ns['id']; |
| 191 | $this->nsNames[$id] = $ns['name']; |
| 192 | $this->nsIds[Utils::normalizeNamespaceName( $ns['name'] )] = $id; |
| 193 | $this->nsCanon[Utils::normalizeNamespaceName( $ns['canonical'] ?? $ns['name'] )] = $id; |
| 194 | if ( $ns['subpages'] ) { |
| 195 | $this->nsWithSubpages[$id] = true; |
| 196 | } |
| 197 | $this->nsCase[$id] = (string)$ns['case']; |
| 198 | } |
| 199 | |
| 200 | private function detectFeatures(): void { |
| 201 | if ( !$this->featureDetectionDone ) { |
| 202 | $this->featureDetectionDone = true; |
| 203 | $data = $this->api->makeRequest( [ 'action' => 'paraminfo', 'modules' => 'query' ] ); |
| 204 | $props = $data["paraminfo"]["modules"][0]["parameters"]["0"]["type"] ?? []; |
| 205 | $this->hasVideoInfo = in_array( 'videoinfo', $props, true ); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | public function hasVideoInfo(): bool { |
| 210 | $this->detectFeatures(); |
| 211 | return $this->hasVideoInfo; |
| 212 | } |
| 213 | |
| 214 | /** |
| 215 | * Let us do standalone development testing of features that need |
| 216 | * custom siteconfig. For now, we need new magic words defined. |
| 217 | * In the future, this file could include other custom config. |
| 218 | * |
| 219 | * @return string |
| 220 | */ |
| 221 | protected function getCustomSiteConfigFileName(): string { |
| 222 | return __DIR__ . "/standalone.siteconfig.json"; |
| 223 | } |
| 224 | |
| 225 | /** |
| 226 | * Load site data from the Action API, if necessary |
| 227 | */ |
| 228 | private function loadSiteData(): void { |
| 229 | if ( $this->siteData !== null ) { |
| 230 | return; |
| 231 | } |
| 232 | |
| 233 | $data = $this->api->makeRequest( self::SITE_CONFIG_QUERY_PARAMS )['query']; |
| 234 | |
| 235 | $this->siteData = $data['general']; |
| 236 | $this->widthOption = $data['general']['thumblimits'][$data['defaultoptions']['thumbsize']]; |
| 237 | $this->protocols = $data['protocols']; |
| 238 | $this->apiVariables = $data['variables']; |
| 239 | $this->apiFunctionHooks = PHPUtils::makeSet( $data['functionhooks'] ); |
| 240 | |
| 241 | // Process namespace data from API |
| 242 | $this->nsNames = []; |
| 243 | $this->nsCase = []; |
| 244 | $this->nsIds = []; |
| 245 | $this->nsCanon = []; |
| 246 | $this->nsWithSubpages = []; |
| 247 | foreach ( $data['namespaces'] as $ns ) { |
| 248 | $this->addNamespace( $ns ); |
| 249 | } |
| 250 | foreach ( $data['namespacealiases'] as $ns ) { |
| 251 | $this->nsIds[Utils::normalizeNamespaceName( $ns['alias'] )] = $ns['id']; |
| 252 | } |
| 253 | |
| 254 | // Process magic word data from API |
| 255 | $bsws = []; |
| 256 | $this->paramMWs = []; |
| 257 | $this->allMWs = []; |
| 258 | |
| 259 | // Fold custom magic words into the API response |
| 260 | $f = $this->getCustomSiteConfigFileName(); |
| 261 | if ( file_exists( $f ) ) { |
| 262 | $config = json_decode( file_get_contents( $f ), true ); |
| 263 | PHPUtils::pushArray( $data['magicwords'], $config['magicwords'] ); |
| 264 | } |
| 265 | |
| 266 | // Recast the API results in the format that core MediaWiki returns internally |
| 267 | // This enables us to use the Production SiteConfig without changes and add the |
| 268 | // extra overhead to this developer API usage. |
| 269 | $this->apiMagicWords = []; |
| 270 | foreach ( $data['magicwords'] as $mw ) { |
| 271 | $cs = (int)$mw['case-sensitive']; |
| 272 | $mwName = $mw['name']; |
| 273 | $this->apiMagicWords[$mwName][] = $cs; |
| 274 | $pmws = []; |
| 275 | $allMWs = []; |
| 276 | foreach ( $mw['aliases'] as $alias ) { |
| 277 | $this->apiMagicWords[$mwName][] = $alias; |
| 278 | // Aliases for double underscore mws include the underscores |
| 279 | if ( substr( $alias, 0, 2 ) === '__' && substr( $alias, -2 ) === '__' ) { |
| 280 | $bsws[$cs][] = preg_quote( substr( $alias, 2, -2 ), '@' ); |
| 281 | } |
| 282 | if ( strpos( $alias, '$1' ) !== false ) { |
| 283 | $pmws[$cs][] = strtr( preg_quote( $alias, '/' ), [ '\\$1' => "(.*?)" ] ); |
| 284 | } |
| 285 | $allMWs[$cs][] = preg_quote( $alias, '/' ); |
| 286 | } |
| 287 | |
| 288 | if ( $pmws ) { |
| 289 | $this->paramMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $pmws ) . ')$/uDS'; |
| 290 | } |
| 291 | $this->allMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $allMWs ) . ')$/D'; |
| 292 | } |
| 293 | |
| 294 | $bswRegexp = $this->combineRegexArrays( $bsws ); |
| 295 | |
| 296 | // Parse interwiki map data from the API |
| 297 | $this->interwikiMap = ConfigUtils::computeInterwikiMap( $data['interwikimap'] ); |
| 298 | |
| 299 | // Parse variant data from the API |
| 300 | # T320662: API should return these in BCP-47 forms |
| 301 | $this->langConverterEnabled = []; |
| 302 | $this->variants = []; |
| 303 | foreach ( $data['languagevariants'] as $base => $variants ) { |
| 304 | $baseBcp47 = Utils::mwCodeToBcp47( $base ); |
| 305 | if ( $this->siteData['langconversion'] ) { |
| 306 | $baseKey = strtolower( $baseBcp47->toBcp47Code() ); |
| 307 | $this->langConverterEnabled[$baseKey] = true; |
| 308 | foreach ( $variants as $code => $vdata ) { |
| 309 | $variantKey = strtolower( Utils::mwCodeToBcp47( $code )->toBcp47Code() ); |
| 310 | $this->variants[$variantKey] = [ |
| 311 | 'base' => $baseBcp47, |
| 312 | 'fallbacks' => array_map( |
| 313 | [ Utils::class, 'mwCodeToBcp47' ], |
| 314 | $vdata['fallbacks'] |
| 315 | ), |
| 316 | ]; |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | // Parse extension tag data from the API |
| 322 | $this->extensionTags = []; |
| 323 | foreach ( $data['extensiontags'] as $tag ) { |
| 324 | $tag = preg_replace( '/^<|>$/D', '', $tag ); |
| 325 | $this->ensureExtensionTag( $tag ); |
| 326 | } |
| 327 | |
| 328 | $this->specialPageAliases = $data['specialpagealiases']; |
| 329 | $this->specialPageNames = []; |
| 330 | foreach ( $this->specialPageAliases as $special ) { |
| 331 | $alias = strtr( mb_strtoupper( $special['realname'] ), ' ', '_' ); |
| 332 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
| 333 | foreach ( $special['aliases'] as $alias ) { |
| 334 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
| 335 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | $redirect = '(?i:\#REDIRECT)'; |
| 340 | $quote = static function ( $s ) { |
| 341 | $q = preg_quote( $s, '@' ); |
| 342 | # Note that PHP < 7.3 doesn't escape # in preg_quote. That means |
| 343 | # that the $redirect regexp will fail if used with the `x` flag. |
| 344 | # Manually hack around this for PHP 7.2; can remove this workaround |
| 345 | # once minimum PHP version >= 7.3 |
| 346 | if ( preg_quote( '#' ) === '#' ) { |
| 347 | $q = str_replace( '#', '\\#', $q ); |
| 348 | } |
| 349 | return $q; |
| 350 | }; |
| 351 | foreach ( $data['magicwords'] as $mw ) { |
| 352 | if ( $mw['name'] === 'redirect' ) { |
| 353 | $redirect = implode( '|', array_map( $quote, $mw['aliases'] ) ); |
| 354 | if ( !$mw['case-sensitive'] ) { |
| 355 | $redirect = '(?i:' . $redirect . ')'; |
| 356 | } |
| 357 | break; |
| 358 | } |
| 359 | } |
| 360 | // `$this->nsNames[14]` is set earlier by the calls to `$this->addNamespace( $ns )` |
| 361 | // @phan-suppress-next-line PhanCoalescingAlwaysNull |
| 362 | $category = $this->quoteTitleRe( $this->nsNames[14] ?? 'Category', '@' ); |
| 363 | if ( $category !== 'Category' ) { |
| 364 | $category = "(?:$category|Category)"; |
| 365 | } |
| 366 | |
| 367 | $this->savedCategoryRegexp = "@{$category}@"; |
| 368 | $this->savedRedirectRegexp = "@{$redirect}@"; |
| 369 | $this->savedBswRegexp = "@{$bswRegexp}@"; |
| 370 | } |
| 371 | |
| 372 | public function galleryOptions(): array { |
| 373 | $this->loadSiteData(); |
| 374 | return $this->siteData['galleryoptions']; |
| 375 | } |
| 376 | |
| 377 | public function allowedExternalImagePrefixes(): array { |
| 378 | $this->loadSiteData(); |
| 379 | return $this->siteData['externalimages'] ?? []; |
| 380 | } |
| 381 | |
| 382 | /** |
| 383 | * Determine the article base URI and relative prefix |
| 384 | */ |
| 385 | private function determineArticlePath(): void { |
| 386 | $this->loadSiteData(); |
| 387 | |
| 388 | $url = $this->siteData['server'] . $this->siteData['articlepath']; |
| 389 | |
| 390 | if ( substr( $url, -2 ) !== '$1' ) { |
| 391 | throw new \UnexpectedValueException( "Article path '$url' does not have '$1' at the end" ); |
| 392 | } |
| 393 | $url = substr( $url, 0, -2 ); |
| 394 | |
| 395 | $bits = UrlUtils::parseUrl( $url ); |
| 396 | if ( !$bits ) { |
| 397 | throw new \UnexpectedValueException( "Failed to parse article path '$url'" ); |
| 398 | } |
| 399 | |
| 400 | if ( empty( $bits['path'] ) ) { |
| 401 | $path = '/'; |
| 402 | } else { |
| 403 | $path = UrlUtils::removeDotSegments( $bits['path'] ); |
| 404 | } |
| 405 | |
| 406 | $relParts = [ 'query' => true, 'fragment' => true ]; |
| 407 | $base = array_diff_key( $bits, $relParts ); |
| 408 | $rel = array_intersect_key( $bits, $relParts ); |
| 409 | |
| 410 | $i = strrpos( $path, '/' ); |
| 411 | $base['path'] = substr( $path, 0, $i + 1 ); |
| 412 | $rel['path'] = '.' . substr( $path, $i ); |
| 413 | |
| 414 | $this->baseUri = UrlUtils::assembleUrl( $base ); |
| 415 | $this->relativeLinkPrefix = UrlUtils::assembleUrl( $rel ); |
| 416 | } |
| 417 | |
| 418 | public function baseURI(): string { |
| 419 | if ( $this->baseUri === null ) { |
| 420 | $this->determineArticlePath(); |
| 421 | } |
| 422 | return $this->baseUri; |
| 423 | } |
| 424 | |
| 425 | public function relativeLinkPrefix(): string { |
| 426 | if ( $this->relativeLinkPrefix === null ) { |
| 427 | $this->determineArticlePath(); |
| 428 | } |
| 429 | return $this->relativeLinkPrefix; |
| 430 | } |
| 431 | |
| 432 | /** @inheritDoc */ |
| 433 | public function canonicalNamespaceId( string $name ): ?int { |
| 434 | $this->loadSiteData(); |
| 435 | return $this->nsCanon[Utils::normalizeNamespaceName( $name )] ?? null; |
| 436 | } |
| 437 | |
| 438 | /** @inheritDoc */ |
| 439 | public function namespaceId( string $name ): ?int { |
| 440 | $this->loadSiteData(); |
| 441 | $name = Utils::normalizeNamespaceName( $name ); |
| 442 | return $this->nsCanon[$name] ?? $this->nsIds[$name] ?? null; |
| 443 | } |
| 444 | |
| 445 | /** @inheritDoc */ |
| 446 | public function namespaceName( int $ns ): ?string { |
| 447 | $this->loadSiteData(); |
| 448 | return $this->nsNames[$ns] ?? null; |
| 449 | } |
| 450 | |
| 451 | /** @inheritDoc */ |
| 452 | public function namespaceHasSubpages( int $ns ): bool { |
| 453 | $this->loadSiteData(); |
| 454 | return $this->nsWithSubpages[$ns] ?? false; |
| 455 | } |
| 456 | |
| 457 | /** @inheritDoc */ |
| 458 | public function namespaceCase( int $ns ): string { |
| 459 | $this->loadSiteData(); |
| 460 | return $this->nsCase[$ns] ?? 'first-letter'; |
| 461 | } |
| 462 | |
| 463 | /** @inheritDoc */ |
| 464 | public function specialPageLocalName( string $alias ): ?string { |
| 465 | $this->loadSiteData(); |
| 466 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
| 467 | return $this->specialPageNames[$alias] ?? null; |
| 468 | } |
| 469 | |
| 470 | /** @inheritDoc */ |
| 471 | public function magicLinkEnabled( string $which ): bool { |
| 472 | $this->loadSiteData(); |
| 473 | $magic = $this->siteData['magiclinks'] ?? []; |
| 474 | // Default to true, as wikis too old to export the 'magiclinks' |
| 475 | // property always had magic links enabled. |
| 476 | return $magic[$which] ?? true; |
| 477 | } |
| 478 | |
| 479 | public function interwikiMagic(): bool { |
| 480 | $this->loadSiteData(); |
| 481 | return $this->siteData['interwikimagic']; |
| 482 | } |
| 483 | |
| 484 | public function interwikiMap(): array { |
| 485 | $this->loadSiteData(); |
| 486 | return $this->interwikiMap; |
| 487 | } |
| 488 | |
| 489 | public function iwp(): string { |
| 490 | $this->loadSiteData(); |
| 491 | return $this->siteData['wikiid']; |
| 492 | } |
| 493 | |
| 494 | public function legalTitleChars(): string { |
| 495 | $this->loadSiteData(); |
| 496 | return $this->siteData['legaltitlechars']; |
| 497 | } |
| 498 | |
| 499 | public function linkPrefixRegex(): ?string { |
| 500 | $this->loadSiteData(); |
| 501 | |
| 502 | if ( !empty( $this->siteData['linkprefixcharset'] ) ) { |
| 503 | return '/[' . $this->siteData['linkprefixcharset'] . ']+$/Du'; |
| 504 | } else { |
| 505 | // We don't care about super-old MediaWiki, so don't try to parse 'linkprefix'. |
| 506 | return null; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | /** @inheritDoc */ |
| 511 | protected function linkTrail(): string { |
| 512 | $this->loadSiteData(); |
| 513 | return $this->siteData['linktrail']; |
| 514 | } |
| 515 | |
| 516 | public function langBcp47(): Bcp47Code { |
| 517 | $this->loadSiteData(); |
| 518 | return Utils::mwCodeToBcp47( $this->siteData['lang'] ); |
| 519 | } |
| 520 | |
| 521 | public function mainpage(): string { |
| 522 | $this->loadSiteData(); |
| 523 | return $this->siteData['mainpage']; |
| 524 | } |
| 525 | |
| 526 | public function mainPageLinkTarget(): Title { |
| 527 | $this->loadSiteData(); |
| 528 | return Title::newFromText( $this->siteData['mainpage'], $this ); |
| 529 | } |
| 530 | |
| 531 | /** @inheritDoc */ |
| 532 | public function getMWConfigValue( string $key ) { |
| 533 | $this->loadSiteData(); |
| 534 | switch ( $key ) { |
| 535 | // Hardcoded values for these 2 keys |
| 536 | case 'CiteResponsiveReferences': |
| 537 | return $this->siteData['citeresponsivereferences'] ?? false; |
| 538 | |
| 539 | case 'CiteResponsiveReferencesThreshold': |
| 540 | return 10; |
| 541 | |
| 542 | case 'ParsoidExperimentalParserFunctionOutput': |
| 543 | return $this->v3pf; |
| 544 | |
| 545 | // We can add more hardcoded keys based on testing needs |
| 546 | // but null is the default for keys unsupported in this mode. |
| 547 | default: |
| 548 | return null; |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | public function rtl(): bool { |
| 553 | $this->loadSiteData(); |
| 554 | return $this->siteData['rtl']; |
| 555 | } |
| 556 | |
| 557 | /** @inheritDoc */ |
| 558 | public function langConverterEnabledBcp47( Bcp47Code $lang ): bool { |
| 559 | $this->loadSiteData(); |
| 560 | return $this->langConverterEnabled[strtolower( $lang->toBcp47Code() )] ?? false; |
| 561 | } |
| 562 | |
| 563 | public function script(): string { |
| 564 | $this->loadSiteData(); |
| 565 | return $this->siteData['script']; |
| 566 | } |
| 567 | |
| 568 | public function scriptpath(): string { |
| 569 | $this->loadSiteData(); |
| 570 | return $this->siteData['scriptpath']; |
| 571 | } |
| 572 | |
| 573 | public function server(): string { |
| 574 | $this->loadSiteData(); |
| 575 | return $this->siteData['server']; |
| 576 | } |
| 577 | |
| 578 | /** |
| 579 | * @inheritDoc |
| 580 | */ |
| 581 | public function exportMetadataToHeadBcp47( |
| 582 | Document $document, |
| 583 | ContentMetadataCollector $metadata, |
| 584 | string $defaultTitle, |
| 585 | Bcp47Code $lang |
| 586 | ): void { |
| 587 | '@phan-var StubMetadataCollector $metadata'; // @var StubMetadataCollector $metadata |
| 588 | $moduleLoadURI = $this->server() . $this->scriptpath() . '/load.php'; |
| 589 | // Parsoid/JS always made this protocol-relative, so match |
| 590 | // that (for now at least) |
| 591 | $moduleLoadURI = preg_replace( '#^https?://#', '//', $moduleLoadURI ); |
| 592 | // Look for a displaytitle. |
| 593 | $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?? |
| 594 | // Use the default title, properly escaped |
| 595 | Utils::escapeHtml( $defaultTitle ); |
| 596 | $this->exportMetadataHelper( |
| 597 | $document, |
| 598 | $moduleLoadURI, |
| 599 | $metadata->getModules(), |
| 600 | $metadata->getModuleStyles(), |
| 601 | $metadata->getJsConfigVars(), |
| 602 | $displayTitle, |
| 603 | $lang |
| 604 | ); |
| 605 | } |
| 606 | |
| 607 | public function redirectRegexp(): string { |
| 608 | $this->loadSiteData(); |
| 609 | return $this->savedRedirectRegexp; |
| 610 | } |
| 611 | |
| 612 | public function categoryRegexp(): string { |
| 613 | $this->loadSiteData(); |
| 614 | return $this->savedCategoryRegexp; |
| 615 | } |
| 616 | |
| 617 | public function bswRegexp(): string { |
| 618 | $this->loadSiteData(); |
| 619 | return $this->savedBswRegexp; |
| 620 | } |
| 621 | |
| 622 | public function timezoneOffset(): int { |
| 623 | $this->loadSiteData(); |
| 624 | return $this->siteData['timeoffset']; |
| 625 | } |
| 626 | |
| 627 | /** @inheritDoc */ |
| 628 | public function variantsFor( Bcp47Code $lang ): ?array { |
| 629 | $this->loadSiteData(); |
| 630 | return $this->variants[strtolower( $lang->toBcp47Code() )] ?? null; |
| 631 | } |
| 632 | |
| 633 | public function widthOption(): int { |
| 634 | $this->loadSiteData(); |
| 635 | return $this->widthOption; |
| 636 | } |
| 637 | |
| 638 | /** @inheritDoc */ |
| 639 | protected function getVariableIDs(): array { |
| 640 | $this->loadSiteData(); |
| 641 | return $this->apiVariables; |
| 642 | } |
| 643 | |
| 644 | /** @inheritDoc */ |
| 645 | protected function haveComputedFunctionSynonyms(): bool { |
| 646 | return false; |
| 647 | } |
| 648 | |
| 649 | private static ?array $noHashFunctions = null; |
| 650 | |
| 651 | /** @inheritDoc */ |
| 652 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
| 653 | if ( !$this->apiFunctionHooks ) { |
| 654 | $this->loadSiteData(); |
| 655 | } |
| 656 | if ( isset( $this->apiFunctionHooks[$magicword] ) ) { |
| 657 | if ( !self::$noHashFunctions ) { |
| 658 | // FIXME: This is an approximation only computed in non-integrated mode for |
| 659 | // commandline and developer testing. This set is probably not up to date |
| 660 | // and also doesn't reflect no-hash functions registered by extensions |
| 661 | // via setFunctionHook calls. As such, you might run into GOTCHAs during |
| 662 | // debugging of production issues in standalone / API config mode. |
| 663 | // Keep this in sync with CoreParserFunctions::register in core. |
| 664 | self::$noHashFunctions = PHPUtils::makeSet( [ |
| 665 | 'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc', |
| 666 | 'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl', |
| 667 | 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'formal', |
| 668 | 'bidi', 'numberingroup', 'language', |
| 669 | 'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath', |
| 670 | 'pagesincategory', 'pagesize', 'protectionlevel', 'protectionexpiry', |
| 671 | # The following are the "parser function" forms of magic |
| 672 | # variables defined in CoreMagicVariables. The no-args form will |
| 673 | # go through the magic variable code path (and be cached); the |
| 674 | # presence of arguments will cause the parser function form to |
| 675 | # be invoked. (Note that the actual implementation will pass |
| 676 | # a Parser object as first argument, in addition to the |
| 677 | # parser function parameters.) |
| 678 | |
| 679 | # For this group, the first parameter to the parser function is |
| 680 | # "page title", and the no-args form (and the magic variable) |
| 681 | # defaults to "current page title". |
| 682 | 'pagename', 'pagenamee', |
| 683 | 'fullpagename', 'fullpagenamee', |
| 684 | 'subpagename', 'subpagenamee', |
| 685 | 'rootpagename', 'rootpagenamee', |
| 686 | 'basepagename', 'basepagenamee', |
| 687 | 'talkpagename', 'talkpagenamee', |
| 688 | 'subjectpagename', 'subjectpagenamee', |
| 689 | 'pageid', 'revisionid', 'revisionday', |
| 690 | 'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear', |
| 691 | 'revisiontimestamp', |
| 692 | 'revisionuser', |
| 693 | 'cascadingsources', |
| 694 | 'namespace', 'namespacee', 'namespacenumber', 'talkspace', 'talkspacee', |
| 695 | 'subjectspace', 'subjectspacee', |
| 696 | |
| 697 | # More parser functions corresponding to CoreMagicVariables. |
| 698 | # For this group, the first parameter to the parser function is |
| 699 | # "raw" (uses the 'raw' format if present) and the no-args form |
| 700 | # (and the magic variable) defaults to 'not raw'. |
| 701 | 'numberofarticles', 'numberoffiles', |
| 702 | 'numberofusers', |
| 703 | 'numberofactiveusers', |
| 704 | 'numberofpages', |
| 705 | 'numberofadmins', |
| 706 | 'numberofedits', |
| 707 | |
| 708 | # These magic words already contain the hash, and the no-args form |
| 709 | # is the same as passing an empty first argument |
| 710 | 'bcp47', |
| 711 | 'dir', |
| 712 | 'interwikilink', |
| 713 | 'interlanguagelink', |
| 714 | |
| 715 | # ############################################### |
| 716 | # The following are not from core's $noHash list |
| 717 | # but are instead special callbacks from core: |
| 718 | 'int', 'displaytitle', 'pagesinnamespace', |
| 719 | ] ); |
| 720 | } |
| 721 | |
| 722 | $syn = $func; |
| 723 | if ( substr( $syn, -1 ) === ':' ) { |
| 724 | $syn = substr( $syn, 0, -1 ); |
| 725 | } |
| 726 | if ( !isset( self::$noHashFunctions[$magicword] ) ) { |
| 727 | $syn = '#' . $syn; |
| 728 | } |
| 729 | $this->functionSynonyms[intval( $caseSensitive )][$syn] = $magicword; |
| 730 | } |
| 731 | } |
| 732 | |
| 733 | /** @inheritDoc */ |
| 734 | protected function getMagicWords(): array { |
| 735 | $this->loadSiteData(); |
| 736 | return $this->apiMagicWords; |
| 737 | } |
| 738 | |
| 739 | /** @inheritDoc */ |
| 740 | public function getMagicWordMatcher( string $id ): string { |
| 741 | $this->loadSiteData(); |
| 742 | return $this->allMWs[$id] ?? '/^(?!)$/'; |
| 743 | } |
| 744 | |
| 745 | /** @inheritDoc */ |
| 746 | public function getParameterizedAliasMatcher( array $words ): callable { |
| 747 | $this->loadSiteData(); |
| 748 | $regexes = array_intersect_key( $this->paramMWs, array_flip( $words ) ); |
| 749 | return static function ( $text ) use ( $regexes ) { |
| 750 | /** |
| 751 | * $name is the canonical magic word name |
| 752 | * $re has patterns for matching aliases |
| 753 | */ |
| 754 | foreach ( $regexes as $name => $re ) { |
| 755 | if ( preg_match( $re, $text, $m ) ) { |
| 756 | unset( $m[0] ); |
| 757 | |
| 758 | // Ex. regexp here is, /^(?:(?:|vinculo\=(.*?)|enlace\=(.*?)|link\=(.*?)))$/uS |
| 759 | // Check all the capture groups for a value, if not, it's safe to return an |
| 760 | // empty string since we did get a match. |
| 761 | foreach ( $m as $v ) { |
| 762 | if ( $v !== '' ) { |
| 763 | return [ 'k' => $name, 'v' => $v ]; |
| 764 | } |
| 765 | } |
| 766 | return [ 'k' => $name, 'v' => '' ]; |
| 767 | } |
| 768 | } |
| 769 | return null; |
| 770 | }; |
| 771 | } |
| 772 | |
| 773 | /** |
| 774 | * This function is public so it can be used to synchronize env for |
| 775 | * hybrid parserTests. The parserTests setup includes the definition |
| 776 | * of a number of non-standard extension tags, whose names are passed |
| 777 | * over from the JS side in hybrid testing. |
| 778 | * @param string $tag Name of an extension tag assumed to be present |
| 779 | */ |
| 780 | public function ensureExtensionTag( string $tag ): void { |
| 781 | $this->loadSiteData(); |
| 782 | $this->extensionTags[mb_strtolower( $tag )] = true; |
| 783 | } |
| 784 | |
| 785 | /** @inheritDoc */ |
| 786 | protected function getNonNativeExtensionTags(): array { |
| 787 | $this->loadSiteData(); |
| 788 | return $this->extensionTags; |
| 789 | } |
| 790 | |
| 791 | /** @inheritDoc */ |
| 792 | public function getMaxTemplateDepth(): int { |
| 793 | // Not in the API result |
| 794 | return $this->maxDepth; |
| 795 | } |
| 796 | |
| 797 | /** @inheritDoc */ |
| 798 | protected function getSpecialNSAliases(): array { |
| 799 | $nsAliases = [ |
| 800 | 'Special', |
| 801 | ]; |
| 802 | foreach ( $this->nsIds as $name => $id ) { |
| 803 | if ( $id === -1 ) { |
| 804 | $nsAliases[] = $this->quoteTitleRe( $name, '!' ); |
| 805 | } |
| 806 | } |
| 807 | return $nsAliases; |
| 808 | } |
| 809 | |
| 810 | /** @inheritDoc */ |
| 811 | protected function getSpecialPageAliases( string $specialPage ): array { |
| 812 | $spAliases = [ $specialPage ]; |
| 813 | foreach ( $this->specialPageAliases as $special ) { |
| 814 | if ( $special['realname'] === $specialPage ) { |
| 815 | $spAliases = array_merge( $spAliases, $special['aliases'] ); |
| 816 | break; |
| 817 | } |
| 818 | } |
| 819 | return $spAliases; |
| 820 | } |
| 821 | |
| 822 | /** @inheritDoc */ |
| 823 | protected function getProtocols(): array { |
| 824 | $this->loadSiteData(); |
| 825 | return $this->protocols; |
| 826 | } |
| 827 | |
| 828 | /** @var ?MockMetrics */ |
| 829 | private $metrics; |
| 830 | |
| 831 | /** @inheritDoc */ |
| 832 | public function metrics(): ?StatsdDataFactoryInterface { |
| 833 | if ( $this->metrics === null ) { |
| 834 | $this->metrics = new MockMetrics(); |
| 835 | } |
| 836 | return $this->metrics; |
| 837 | } |
| 838 | |
| 839 | /** |
| 840 | * Increment a counter metric |
| 841 | * @param string $name |
| 842 | * @param array $labels |
| 843 | * @param float $amount |
| 844 | * @return void |
| 845 | */ |
| 846 | public function incrementCounter( string $name, array $labels, float $amount = 1 ): void { |
| 847 | // We don't use the labels for now, using MockMetrics instead |
| 848 | $this->metrics->increment( $name ); |
| 849 | } |
| 850 | |
| 851 | /** |
| 852 | * Record a timing metric |
| 853 | * @param string $name |
| 854 | * @param float $value |
| 855 | * @param array $labels |
| 856 | * @return void |
| 857 | */ |
| 858 | public function observeTiming( string $name, float $value, array $labels ): void { |
| 859 | // We don't use the labels for now, using MockMetrics instead |
| 860 | $this->metrics->timing( $name, $value ); |
| 861 | } |
| 862 | |
| 863 | /** @inheritDoc */ |
| 864 | public function getNoFollowConfig(): array { |
| 865 | $this->loadSiteData(); |
| 866 | return [ |
| 867 | 'nofollow' => $this->siteData['nofollowlinks'] ?? true, |
| 868 | 'nsexceptions' => $this->siteData['nofollownsexceptions'] ?? [], |
| 869 | 'domainexceptions' => $this->siteData['nofollowdomainexceptions'] ?? [ 'mediawiki.org' ] |
| 870 | ]; |
| 871 | } |
| 872 | |
| 873 | /** @inheritDoc */ |
| 874 | public function getExternalLinkTarget() { |
| 875 | $this->loadSiteData(); |
| 876 | return $this->siteData['externallinktarget'] ?? false; |
| 877 | } |
| 878 | } |