Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
78.98% covered (warning)
78.98%
263 / 333
69.64% covered (warning)
69.64%
39 / 56
CRAP
0.00% covered (danger)
0.00%
0 / 1
SiteConfig
78.98% covered (warning)
78.98%
263 / 333
69.64% covered (warning)
69.64%
39 / 56
237.84
0.00% covered (danger)
0.00%
0 / 1
 __construct
47.37% covered (danger)
47.37%
9 / 19
0.00% covered (danger)
0.00%
0 / 1
14.14
 reset
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 combineRegexArrays
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
4.05
 addNamespace
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 detectFeatures
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
 hasVideoInfo
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 loadSiteData
97.67% covered (success)
97.67%
84 / 86
0.00% covered (danger)
0.00%
0 / 1
21
 galleryOptions
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 allowedExternalImagePrefixes
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 determineArticlePath
84.21% covered (warning)
84.21%
16 / 19
0.00% covered (danger)
0.00%
0 / 1
4.06
 baseURI
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 relativeLinkPrefix
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 canonicalNamespaceId
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 namespaceId
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 namespaceName
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 namespaceHasSubpages
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 namespaceCase
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 specialPageLocalName
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 interwikiMagic
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 interwikiMap
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 iwp
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 legalTitleChars
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 linkPrefixRegex
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
2.06
 linkTrail
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 lang
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 mainpage
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getMWConfigValue
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 rtl
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 langConverterEnabledBcp47
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 script
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 scriptpath
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 server
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 exportMetadataToHeadBcp47
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 redirectRegexp
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 categoryRegexp
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 bswRegexp
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 timezoneOffset
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 variants
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
2
 variantsFor
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 widthOption
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getVariableIDs
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 haveComputedFunctionSynonyms
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 updateFunctionSynonym
96.55% covered (success)
96.55%
28 / 29
0.00% covered (danger)
0.00%
0 / 1
6
 getMagicWords
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getMagicWordMatcher
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getParameterizedAliasMatcher
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
5
 ensureExtensionTag
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getNonNativeExtensionTags
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getMaxTemplateDepth
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getSpecialNSAliases
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
3
 getSpecialPageAliases
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 getProtocols
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 fromSettings
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
20
 metrics
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 getNoFollowConfig
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getExternalLinkTarget
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3declare( strict_types = 1 );
4
5namespace Wikimedia\Parsoid\Config\Api;
6
7use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
8use Wikimedia\Bcp47Code\Bcp47Code;
9use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig;
10use Wikimedia\Parsoid\Config\StubMetadataCollector;
11use Wikimedia\Parsoid\Core\ContentMetadataCollector;
12use Wikimedia\Parsoid\DOM\Document;
13use Wikimedia\Parsoid\Mocks\MockMetrics;
14use Wikimedia\Parsoid\Utils\ConfigUtils;
15use Wikimedia\Parsoid\Utils\PHPUtils;
16use Wikimedia\Parsoid\Utils\UrlUtils;
17use Wikimedia\Parsoid\Utils\Utils;
18
19/**
20 * SiteConfig via MediaWiki's Action API
21 *
22 * Note this is intended for testing, not performance.
23 */
24class SiteConfig extends ISiteConfig {
25
26    /** @var ApiHelper */
27    private $api;
28
29    /** @var array|null */
30    private $siteData;
31
32    /** @var array|null */
33    private $protocols;
34
35    /** @var string|null */
36    private $baseUri;
37
38    /** @var string|null */
39    private $relativeLinkPrefix;
40
41    /** @var string */
42    private $savedCategoryRegexp;
43
44    /** @var string */
45    private $savedRedirectRegexp;
46
47    /** @var string */
48    private $savedBswRegexp;
49
50    /** @phan-var array<int,string> */
51    protected $nsNames = [];
52
53    /** @phan-var array<int,string> */
54    protected $nsCase = [];
55
56    /** @phan-var array<string,int> */
57    protected $nsIds = [];
58
59    /** @phan-var array<string,int> */
60    protected $nsCanon = [];
61
62    /** @phan-var array<int,bool> */
63    protected $nsWithSubpages = [];
64
65    /** @phan-var array<string,string> */
66    private $specialPageNames = [];
67
68    /** @phan-var array */
69    private $specialPageAliases = [];
70
71    /** @var array|null */
72    private $interwikiMap;
73
74    /** @var array<string,array>|null Keys are stored as lowercased BCP-47 code strings */
75    private $variants;
76
77    /** @var array<string,bool>|null Keys are stored as lowercased BCP-47 code strings */
78    private $langConverterEnabled;
79
80    /** @var array|null */
81    private $apiMagicWords;
82
83    /** @var array|null */
84    private $paramMWs;
85
86    /** @var array|null */
87    private $apiVariables;
88
89    /** @var array|null */
90    private $apiFunctionHooks;
91
92    /** @var array|null */
93    private $allMWs;
94
95    /** @var array|null */
96    private $extensionTags;
97
98    /** @var int|null */
99    private $widthOption;
100
101    /** @var int */
102    private $maxDepth = 40;
103
104    private $featureDetectionDone = false;
105    private $hasVideoInfo = false;
106
107    /** @var string[] Base parameters for a siteinfo query */
108    public const SITE_CONFIG_QUERY_PARAMS = [
109        'action' => 'query',
110        'meta' => 'siteinfo',
111        'siprop' => 'general|protocols|namespaces|namespacealiases|magicwords|interwikimap|'
112            . 'languagevariants|defaultoptions|specialpagealiases|extensiontags|'
113            . 'functionhooks|variables',
114    ];
115
116    /**
117     * @param ApiHelper $api
118     * @param array $opts
119     */
120    public function __construct( ApiHelper $api, array $opts ) {
121        parent::__construct();
122
123        $this->api = $api;
124
125        if ( isset( $opts['linting'] ) ) {
126            $this->linterEnabled = !empty( $opts['linting'] );
127        }
128
129        if ( isset( $opts['addHTMLTemplateParameters'] ) ) {
130            $this->addHTMLTemplateParameters = !empty( $opts['addHTMLTemplateParameters'] );
131        }
132
133        if ( isset( $opts['maxDepth'] ) ) {
134            $this->maxDepth = $opts['maxDepth'];
135        }
136
137        if ( isset( $opts['logger'] ) ) {
138            $this->setLogger( $opts['logger'] );
139        } else {
140            $this->setLogger( self::createLogger() );
141        }
142
143        if ( isset( $opts['wt2htmlLimits'] ) ) {
144            $this->wt2htmlLimits = array_merge(
145                $this->wt2htmlLimits, $opts['wt2htmlLimits']
146            );
147        }
148        if ( isset( $opts['html2wtLimits'] ) ) {
149            $this->html2wtLimits = array_merge(
150                $this->html2wtLimits, $opts['html2wtLimits']
151            );
152        }
153    }
154
155    protected function reset() {
156        $this->siteData = null;
157        $this->baseUri = null;
158        $this->relativeLinkPrefix = null;
159        // Superclass value reset since parsertests reuse SiteConfig objects
160        $this->linkTrailRegex = false;
161        $this->magicWordMap = null;
162        $this->interwikiMapNoNamespaces = null;
163    }
164
165    /**
166     * Combine sets of regex fragments
167     * @param string[][] $res
168     *  - $regexes[0] are case-insensitive regex fragments. Must not be empty.
169     *  - $regexes[1] are case-sensitive regex fragments. Must not be empty.
170     * @return string Combined regex fragment. May be an alternation. Assumes
171     *  the outer environment is case-sensitive.
172     */
173    private function combineRegexArrays( array $res ): string {
174        if ( $res ) {
175            if ( isset( $res[0] ) ) {
176                $res[0] = '(?i:' . implode( '|', $res[0] ) . ')';
177            }
178            if ( isset( $res[1] ) ) {
179                $res[1] = '(?:' . implode( '|', $res[1] ) . ')';
180            }
181            return implode( '|', $res );
182        }
183        // None? Return a failing regex
184        return '(?!)';
185    }
186
187    /**
188     * Add a new namespace to the config
189     *
190     * Protected access to let mocks and parser tests versions
191     * add new namespaces as required.
192     *
193     * @param array $ns Namespace info
194     */
195    protected function addNamespace( array $ns ): void {
196        $id = (int)$ns['id'];
197        $this->nsNames[$id] = $ns['name'];
198        $this->nsIds[Utils::normalizeNamespaceName( $ns['name'] )] = $id;
199        $this->nsCanon[Utils::normalizeNamespaceName( $ns['canonical'] ?? $ns['name'] )] = $id;
200        if ( $ns['subpages'] ) {
201            $this->nsWithSubpages[$id] = true;
202        }
203        $this->nsCase[$id] = (string)$ns['case'];
204    }
205
206    private function detectFeatures(): void {
207        if ( !$this->featureDetectionDone ) {
208            $this->featureDetectionDone = true;
209            $data = $this->api->makeRequest( [ 'action' => 'paraminfo', 'modules' => 'query' ] );
210            $props = $data["paraminfo"]["modules"][0]["parameters"]["0"]["type"] ?? [];
211            $this->hasVideoInfo = in_array( 'videoinfo', $props, true );
212        }
213    }
214
215    public function hasVideoInfo(): bool {
216        $this->detectFeatures();
217        return $this->hasVideoInfo;
218    }
219
220    /**
221     * Load site data from the Action API, if necessary
222     */
223    private function loadSiteData(): void {
224        if ( $this->siteData !== null ) {
225            return;
226        }
227
228        $data = $this->api->makeRequest( self::SITE_CONFIG_QUERY_PARAMS )['query'];
229
230        $this->siteData = $data['general'];
231        $this->widthOption = $data['general']['thumblimits'][$data['defaultoptions']['thumbsize']];
232        $this->protocols = $data['protocols'];
233        $this->apiVariables = $data['variables'];
234        $this->apiFunctionHooks = PHPUtils::makeSet( $data['functionhooks'] );
235
236        // Process namespace data from API
237        $this->nsNames = [];
238        $this->nsCase = [];
239        $this->nsIds = [];
240        $this->nsCanon = [];
241        $this->nsWithSubpages = [];
242        foreach ( $data['namespaces'] as $ns ) {
243            $this->addNamespace( $ns );
244        }
245        foreach ( $data['namespacealiases'] as $ns ) {
246            $this->nsIds[Utils::normalizeNamespaceName( $ns['alias'] )] = $ns['id'];
247        }
248
249        // Process magic word data from API
250        $bsws = [];
251        $this->paramMWs = [];
252        $this->allMWs = [];
253
254        // Recast the API results in the format that core MediaWiki returns internally
255        // This enables us to use the Production SiteConfig without changes and add the
256        // extra overhead to this developer API usage.
257        $this->apiMagicWords = [];
258        foreach ( $data['magicwords'] as $mw ) {
259            $cs = (int)$mw['case-sensitive'];
260            $mwName = $mw['name'];
261            $this->apiMagicWords[$mwName][] = $cs;
262            $pmws = [];
263            $allMWs = [];
264            foreach ( $mw['aliases'] as $alias ) {
265                $this->apiMagicWords[$mwName][] = $alias;
266                // Aliases for double underscore mws include the underscores
267                if ( substr( $alias, 0, 2 ) === '__' && substr( $alias, -2 ) === '__' ) {
268                    $bsws[$cs][] = preg_quote( substr( $alias, 2, -2 ), '@' );
269                }
270                if ( strpos( $alias, '$1' ) !== false ) {
271                    $pmws[$cs][] = strtr( preg_quote( $alias, '/' ), [ '\\$1' => "(.*?)" ] );
272                }
273                $allMWs[$cs][] = preg_quote( $alias, '/' );
274            }
275
276            if ( $pmws ) {
277                $this->paramMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $pmws ) . ')$/uDS';
278            }
279            $this->allMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $allMWs ) . ')$/D';
280        }
281
282        $bswRegexp = $this->combineRegexArrays( $bsws );
283
284        // Parse interwiki map data from the API
285        $this->interwikiMap = ConfigUtils::computeInterwikiMap( $data['interwikimap'] );
286
287        // Parse variant data from the API
288        # T320662: API should return these in BCP-47 forms
289        $this->langConverterEnabled = [];
290        $this->variants = [];
291        foreach ( $data['languagevariants'] as $base => $variants ) {
292            $baseBcp47 = Utils::mwCodeToBcp47( $base );
293            if ( $this->siteData['langconversion'] ) {
294                $baseKey = strtolower( $baseBcp47->toBcp47Code() );
295                $this->langConverterEnabled[$baseKey] = true;
296                foreach ( $variants as $code => $vdata ) {
297                    $variantKey = strtolower( Utils::mwCodeToBcp47( $code )->toBcp47Code() );
298                    $this->variants[$variantKey] = [
299                        'base' => $baseBcp47,
300                        'fallbacks' => array_map(
301                            [ Utils::class, 'mwCodeToBcp47' ],
302                            $vdata['fallbacks']
303                        ),
304                    ];
305                }
306            }
307        }
308
309        // Parse extension tag data from the API
310        $this->extensionTags = [];
311        foreach ( $data['extensiontags'] as $tag ) {
312            $tag = preg_replace( '/^<|>$/D', '', $tag );
313            $this->ensureExtensionTag( $tag );
314        }
315
316        $this->specialPageAliases = $data['specialpagealiases'];
317        $this->specialPageNames = [];
318        foreach ( $this->specialPageAliases as $special ) {
319            $alias = strtr( mb_strtoupper( $special['realname'] ), ' ', '_' );
320            $this->specialPageNames[$alias] = $special['aliases'][0];
321            foreach ( $special['aliases'] as $alias ) {
322                $alias = strtr( mb_strtoupper( $alias ), ' ', '_' );
323                $this->specialPageNames[$alias] = $special['aliases'][0];
324            }
325        }
326
327        $redirect = '(?i:\#REDIRECT)';
328        $quote = static function ( $s ) {
329            $q = preg_quote( $s, '@' );
330            # Note that PHP < 7.3 doesn't escape # in preg_quote.  That means
331            # that the $redirect regexp will fail if used with the `x` flag.
332            # Manually hack around this for PHP 7.2; can remove this workaround
333            # once minimum PHP version >= 7.3
334            if ( preg_quote( '#' ) === '#' ) {
335                $q = str_replace( '#', '\\#', $q );
336            }
337            return $q;
338        };
339        foreach ( $data['magicwords'] as $mw ) {
340            if ( $mw['name'] === 'redirect' ) {
341                $redirect = implode( '|', array_map( $quote, $mw['aliases'] ) );
342                if ( !$mw['case-sensitive'] ) {
343                    $redirect = '(?i:' . $redirect . ')';
344                }
345                break;
346            }
347        }
348        // `$this->nsNames[14]` is set earlier by the calls to `$this->addNamespace( $ns )`
349        // @phan-suppress-next-line PhanCoalescingAlwaysNull
350        $category = $this->quoteTitleRe( $this->nsNames[14] ?? 'Category', '@' );
351        if ( $category !== 'Category' ) {
352            $category = "(?:$category|Category)";
353        }
354
355        $this->savedCategoryRegexp = "@{$category}@";
356        $this->savedRedirectRegexp = "@{$redirect}@";
357        $this->savedBswRegexp = "@{$bswRegexp}@";
358    }
359
360    public function galleryOptions(): array {
361        $this->loadSiteData();
362        return $this->siteData['galleryoptions'];
363    }
364
365    public function allowedExternalImagePrefixes(): array {
366        $this->loadSiteData();
367        return $this->siteData['externalimages'] ?? [];
368    }
369
370    /**
371     * Determine the article base URI and relative prefix
372     */
373    private function determineArticlePath(): void {
374        $this->loadSiteData();
375
376        $url = $this->siteData['server'] . $this->siteData['articlepath'];
377
378        if ( substr( $url, -2 ) !== '$1' ) {
379            throw new \UnexpectedValueException( "Article path '$url' does not have '$1' at the end" );
380        }
381        $url = substr( $url, 0, -2 );
382
383        $bits = UrlUtils::parseUrl( $url );
384        if ( !$bits ) {
385            throw new \UnexpectedValueException( "Failed to parse article path '$url'" );
386        }
387
388        if ( empty( $bits['path'] ) ) {
389            $path = '/';
390        } else {
391            $path = UrlUtils::removeDotSegments( $bits['path'] );
392        }
393
394        $relParts = [ 'query' => true, 'fragment' => true ];
395        $base = array_diff_key( $bits, $relParts );
396        $rel = array_intersect_key( $bits, $relParts );
397
398        $i = strrpos( $path, '/' );
399        $base['path'] = substr( $path, 0, $i + 1 );
400        $rel['path'] = '.' . substr( $path, $i );
401
402        $this->baseUri = UrlUtils::assembleUrl( $base );
403        $this->relativeLinkPrefix = UrlUtils::assembleUrl( $rel );
404    }
405
406    public function baseURI(): string {
407        if ( $this->baseUri === null ) {
408            $this->determineArticlePath();
409        }
410        return $this->baseUri;
411    }
412
413    public function relativeLinkPrefix(): string {
414        if ( $this->relativeLinkPrefix === null ) {
415            $this->determineArticlePath();
416        }
417        return $this->relativeLinkPrefix;
418    }
419
420    /** @inheritDoc */
421    public function canonicalNamespaceId( string $name ): ?int {
422        $this->loadSiteData();
423        return $this->nsCanon[Utils::normalizeNamespaceName( $name )] ?? null;
424    }
425
426    /** @inheritDoc */
427    public function namespaceId( string $name ): ?int {
428        $this->loadSiteData();
429        $ns = $this->canonicalNamespaceId( $name );
430        if ( $ns !== null ) {
431            return $ns;
432        }
433        return $this->nsIds[Utils::normalizeNamespaceName( $name )] ?? null;
434    }
435
436    /** @inheritDoc */
437    public function namespaceName( int $ns ): ?string {
438        $this->loadSiteData();
439        return $this->nsNames[$ns] ?? null;
440    }
441
442    /** @inheritDoc */
443    public function namespaceHasSubpages( int $ns ): bool {
444        $this->loadSiteData();
445        return $this->nsWithSubpages[$ns] ?? false;
446    }
447
448    /** @inheritDoc */
449    public function namespaceCase( int $ns ): string {
450        $this->loadSiteData();
451        return $this->nsCase[$ns] ?? 'first-letter';
452    }
453
454    /** @inheritDoc */
455    public function specialPageLocalName( string $alias ): ?string {
456        $this->loadSiteData();
457        $alias = strtr( mb_strtoupper( $alias ), ' ', '_' );
458        return $this->specialPageNames[$alias] ?? null;
459    }
460
461    public function interwikiMagic(): bool {
462        $this->loadSiteData();
463        return $this->siteData['interwikimagic'];
464    }
465
466    public function interwikiMap(): array {
467        $this->loadSiteData();
468        return $this->interwikiMap;
469    }
470
471    public function iwp(): string {
472        $this->loadSiteData();
473        return $this->siteData['wikiid'];
474    }
475
476    public function legalTitleChars(): string {
477        $this->loadSiteData();
478        return $this->siteData['legaltitlechars'];
479    }
480
481    public function linkPrefixRegex(): ?string {
482        $this->loadSiteData();
483
484        if ( !empty( $this->siteData['linkprefixcharset'] ) ) {
485            return '/[' . $this->siteData['linkprefixcharset'] . ']+$/Du';
486        } else {
487            // We don't care about super-old MediaWiki, so don't try to parse 'linkprefix'.
488            return null;
489        }
490    }
491
492    /** @inheritDoc */
493    protected function linkTrail(): string {
494        $this->loadSiteData();
495        return $this->siteData['linktrail'];
496    }
497
498    public function lang(): string {
499        $this->loadSiteData();
500        return $this->siteData['lang'];
501    }
502
503    public function mainpage(): string {
504        $this->loadSiteData();
505        return $this->siteData['mainpage'];
506    }
507
508    /** @inheritDoc */
509    public function getMWConfigValue( string $key ) {
510        $this->loadSiteData();
511        switch ( $key ) {
512            // Hardcoded values for these 2 keys
513            case 'CiteResponsiveReferences':
514                return $this->siteData['citeresponsivereferences'] ?? false;
515
516            case 'CiteResponsiveReferencesThreshold':
517                return 10;
518
519            // We can add more hardcoded keys based on testing needs
520            // but null is the default for keys unsupported in this mode.
521            default:
522                return null;
523        }
524    }
525
526    public function rtl(): bool {
527        $this->loadSiteData();
528        return $this->siteData['rtl'];
529    }
530
531    /** @inheritDoc */
532    public function langConverterEnabledBcp47( Bcp47Code $lang ): bool {
533        $this->loadSiteData();
534        return $this->langConverterEnabled[strtolower( $lang->toBcp47Code() )] ?? false;
535    }
536
537    public function script(): string {
538        $this->loadSiteData();
539        return $this->siteData['script'];
540    }
541
542    public function scriptpath(): string {
543        $this->loadSiteData();
544        return $this->siteData['scriptpath'];
545    }
546
547    public function server(): string {
548        $this->loadSiteData();
549        return $this->siteData['server'];
550    }
551
552    /**
553     * @inheritDoc
554     */
555    public function exportMetadataToHeadBcp47(
556        Document $document,
557        ContentMetadataCollector $metadata,
558        string $defaultTitle,
559        Bcp47Code $lang
560    ): void {
561        '@phan-var StubMetadataCollector $metadata'; // @var StubMetadataCollector $metadata
562        $moduleLoadURI = $this->server() . $this->scriptpath() . '/load.php';
563        // Parsoid/JS always made this protocol-relative, so match
564        // that (for now at least)
565        $moduleLoadURI = preg_replace( '#^https?://#', '//', $moduleLoadURI );
566        // Look for a displaytitle.
567        $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ??
568            // Use the default title, properly escaped
569            Utils::escapeHtml( $defaultTitle );
570        $this->exportMetadataHelper(
571            $document,
572            $moduleLoadURI,
573            $metadata->getModules(),
574            $metadata->getModuleStyles(),
575            $metadata->getJsConfigVars(),
576            $displayTitle,
577            $lang
578        );
579    }
580
581    public function redirectRegexp(): string {
582        $this->loadSiteData();
583        return $this->savedRedirectRegexp;
584    }
585
586    public function categoryRegexp(): string {
587        $this->loadSiteData();
588        return $this->savedCategoryRegexp;
589    }
590
591    public function bswRegexp(): string {
592        $this->loadSiteData();
593        return $this->savedBswRegexp;
594    }
595
596    public function timezoneOffset(): int {
597        $this->loadSiteData();
598        return $this->siteData['timeoffset'];
599    }
600
601    /** @inheritDoc */
602    public function variants(): array {
603        $this->loadSiteData();
604        $result = [];
605        foreach ( $this->variants as $variantKey => $tuple ) {
606            $result[Utils::bcp47ToMwCode( $variantKey )] = [
607                'base' => Utils::bcp47ToMwCode( $tuple['base'] ),
608                'fallbacks' => array_map( [ Utils::class, 'bcp47ToMwCode' ], $tuple['fallbacks'] ),
609            ];
610        }
611        return $result;
612    }
613
614    /** @inheritDoc */
615    public function variantsFor( Bcp47Code $lang ): array {
616        $this->loadSiteData();
617        return $this->variants[strtolower( $lang->toBcp47Code() )];
618    }
619
620    public function widthOption(): int {
621        $this->loadSiteData();
622        return $this->widthOption;
623    }
624
625    /** @inheritDoc */
626    protected function getVariableIDs(): array {
627        $this->loadSiteData();
628        return $this->apiVariables;
629    }
630
631    /** @inheritDoc */
632    protected function haveComputedFunctionSynonyms(): bool {
633        return false;
634    }
635
636    private static $noHashFunctions = null;
637
638    /** @inheritDoc */
639    protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void {
640        if ( !$this->apiFunctionHooks ) {
641            $this->loadSiteData();
642        }
643        if ( isset( $this->apiFunctionHooks[$magicword] ) ) {
644            if ( !self::$noHashFunctions ) {
645                // FIXME: This is an approximation only computed in non-integrated mode for
646                // commandline and developer testing. This set is probably not up to date
647                // and also doesn't reflect no-hash functions registered by extensions
648                // via setFunctionHook calls. As such, you might run into GOTCHAs during
649                // debugging of production issues in standalone / API config mode.
650                self::$noHashFunctions = PHPUtils::makeSet( [
651                    'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc',
652                    'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl',
653                    'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'bidi',
654                    'numberofpages', 'numberofusers', 'numberofactiveusers',
655                    'numberofarticles', 'numberoffiles', 'numberofadmins',
656                    'numberingroup', 'numberofedits', 'language',
657                    'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath',
658                    'pagesincategory', 'pagesize', 'protectionlevel', 'protectionexpiry',
659                    'namespacee', 'namespacenumber', 'talkspace', 'talkspacee',
660                    'subjectspace', 'subjectspacee', 'pagename', 'pagenamee',
661                    'fullpagename', 'fullpagenamee', 'rootpagename', 'rootpagenamee',
662                    'basepagename', 'basepagenamee', 'subpagename', 'subpagenamee',
663                    'talkpagename', 'talkpagenamee', 'subjectpagename',
664                    'subjectpagenamee', 'pageid', 'revisionid', 'revisionday',
665                    'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear',
666                    'revisiontimestamp', 'revisionuser', 'cascadingsources',
667                    // Special callbacks in core
668                    'namespace', 'int', 'displaytitle', 'pagesinnamespace',
669                ] );
670            }
671
672            $syn = $func;
673            if ( substr( $syn, -1 ) === ':' ) {
674                $syn = substr( $syn, 0, -1 );
675            }
676            if ( !isset( self::$noHashFunctions[$magicword] ) ) {
677                $syn = '#' . $syn;
678            }
679            $this->functionSynonyms[intval( $caseSensitive )][$syn] = $magicword;
680        }
681    }
682
683    /** @inheritDoc */
684    protected function getMagicWords(): array {
685        $this->loadSiteData();
686        return $this->apiMagicWords;
687    }
688
689    /** @inheritDoc */
690    public function getMagicWordMatcher( string $id ): string {
691        $this->loadSiteData();
692        return $this->allMWs[$id] ?? '/^(?!)$/';
693    }
694
695    /** @inheritDoc */
696    public function getParameterizedAliasMatcher( array $words ): callable {
697        $this->loadSiteData();
698        $regexes = array_intersect_key( $this->paramMWs, array_flip( $words ) );
699        return static function ( $text ) use ( $regexes ) {
700            /**
701             * $name is the canonical magic word name
702             * $re has patterns for matching aliases
703             */
704            foreach ( $regexes as $name => $re ) {
705                if ( preg_match( $re, $text, $m ) ) {
706                    unset( $m[0] );
707
708                    // Ex. regexp here is, /^(?:(?:|vinculo\=(.*?)|enlace\=(.*?)|link\=(.*?)))$/uS
709                    // Check all the capture groups for a value, if not, it's safe to return an
710                    // empty string since we did get a match.
711                    foreach ( $m as $v ) {
712                        if ( $v !== '' ) {
713                            return [ 'k' => $name, 'v' => $v ];
714                        }
715                    }
716                    return [ 'k' => $name, 'v' => '' ];
717                }
718            }
719            return null;
720        };
721    }
722
723    /**
724     * This function is public so it can be used to synchronize env for
725     * hybrid parserTests.  The parserTests setup includes the definition
726     * of a number of non-standard extension tags, whose names are passed
727     * over from the JS side in hybrid testing.
728     * @param string $tag Name of an extension tag assumed to be present
729     */
730    public function ensureExtensionTag( string $tag ): void {
731        $this->loadSiteData();
732        $this->extensionTags[mb_strtolower( $tag )] = true;
733    }
734
735    /** @inheritDoc */
736    protected function getNonNativeExtensionTags(): array {
737        $this->loadSiteData();
738        return $this->extensionTags;
739    }
740
741    /** @inheritDoc */
742    public function getMaxTemplateDepth(): int {
743        // Not in the API result
744        return $this->maxDepth;
745    }
746
747    /** @inheritDoc */
748    protected function getSpecialNSAliases(): array {
749        $nsAliases = [
750            'Special',
751        ];
752        foreach ( $this->nsIds as $name => $id ) {
753            if ( $id === -1 ) {
754                $nsAliases[] = $this->quoteTitleRe( $name, '!' );
755            }
756        }
757        return $nsAliases;
758    }
759
760    /** @inheritDoc */
761    protected function getSpecialPageAliases( string $specialPage ): array {
762        $spAliases = [ $specialPage ];
763        foreach ( $this->specialPageAliases as $special ) {
764            if ( $special['realname'] === $specialPage ) {
765                $spAliases = array_merge( $spAliases, $special['aliases'] );
766                break;
767            }
768        }
769        return $spAliases;
770    }
771
772    /** @inheritDoc */
773    protected function getProtocols(): array {
774        $this->loadSiteData();
775        return $this->protocols;
776    }
777
778    /**
779     * @param array $parsoidSettings
780     * @return SiteConfig
781     */
782    public static function fromSettings( array $parsoidSettings ): SiteConfig {
783        $opts = [];
784        if ( isset( $parsoidSettings['linting'] ) ) {
785            $opts['linting'] = !empty( $parsoidSettings['linting'] );
786        }
787        if ( isset( $parsoidSettings['wt2htmlLimits'] ) ) {
788            $opts['wt2htmlLimits'] = $parsoidSettings['wt2htmlLimits'];
789        }
790        if ( isset( $parsoidSettings['html2wtLimits'] ) ) {
791            $opts['html2wtLimits'] = $parsoidSettings['html2wtLimits'];
792        }
793        $api = ApiHelper::fromSettings( $parsoidSettings );
794        return new SiteConfig( $api, $opts );
795    }
796
797    /** @inheritDoc */
798    public function metrics(): ?StatsdDataFactoryInterface {
799        static $metrics = null;
800        if ( $metrics === null ) {
801            $metrics = new MockMetrics();
802        }
803        return $metrics;
804    }
805
806    /** @inheritDoc */
807    public function getNoFollowConfig(): array {
808        $this->loadSiteData();
809        return [
810            'nofollow' => $this->siteData['nofollowlinks'] ?? true,
811            'nsexceptions' => $this->siteData['nofollownsexceptions'] ?? [],
812            'domainexceptions' => $this->siteData['nofollowdomainexceptions'] ?? [ 'mediawiki.org' ]
813        ];
814    }
815
816    /** @inheritDoc */
817    public function getExternalLinkTarget() {
818        $this->loadSiteData();
819        return $this->siteData['externallinktarget'] ?? false;
820    }
821}