Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
78.98% |
263 / 333 |
|
69.64% |
39 / 56 |
CRAP | |
0.00% |
0 / 1 |
SiteConfig | |
78.98% |
263 / 333 |
|
69.64% |
39 / 56 |
237.84 | |
0.00% |
0 / 1 |
__construct | |
47.37% |
9 / 19 |
|
0.00% |
0 / 1 |
14.14 | |||
reset | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
combineRegexArrays | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
addNamespace | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
detectFeatures | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
hasVideoInfo | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
loadSiteData | |
97.67% |
84 / 86 |
|
0.00% |
0 / 1 |
21 | |||
galleryOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
allowedExternalImagePrefixes | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
determineArticlePath | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
4.06 | |||
baseURI | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
relativeLinkPrefix | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
canonicalNamespaceId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceId | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
namespaceName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceHasSubpages | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceCase | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
specialPageLocalName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
interwikiMagic | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
interwikiMap | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
iwp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
legalTitleChars | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
linkPrefixRegex | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
linkTrail | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
lang | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
mainpage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMWConfigValue | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
rtl | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
langConverterEnabledBcp47 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
script | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
scriptpath | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
server | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
exportMetadataToHeadBcp47 | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
redirectRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
categoryRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
bswRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
timezoneOffset | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
variants | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
variantsFor | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
widthOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getVariableIDs | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
updateFunctionSynonym | |
96.55% |
28 / 29 |
|
0.00% |
0 / 1 |
6 | |||
getMagicWords | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordMatcher | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getParameterizedAliasMatcher | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
ensureExtensionTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getNonNativeExtensionTags | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMaxTemplateDepth | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSpecialNSAliases | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getSpecialPageAliases | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
getProtocols | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
fromSettings | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
metrics | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getNoFollowConfig | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getExternalLinkTarget | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Config\Api; |
6 | |
7 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
8 | use Wikimedia\Bcp47Code\Bcp47Code; |
9 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
10 | use Wikimedia\Parsoid\Config\StubMetadataCollector; |
11 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
12 | use Wikimedia\Parsoid\DOM\Document; |
13 | use Wikimedia\Parsoid\Mocks\MockMetrics; |
14 | use Wikimedia\Parsoid\Utils\ConfigUtils; |
15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
16 | use Wikimedia\Parsoid\Utils\UrlUtils; |
17 | use Wikimedia\Parsoid\Utils\Utils; |
18 | |
19 | /** |
20 | * SiteConfig via MediaWiki's Action API |
21 | * |
22 | * Note this is intended for testing, not performance. |
23 | */ |
24 | class SiteConfig extends ISiteConfig { |
25 | |
26 | /** @var ApiHelper */ |
27 | private $api; |
28 | |
29 | /** @var array|null */ |
30 | private $siteData; |
31 | |
32 | /** @var array|null */ |
33 | private $protocols; |
34 | |
35 | /** @var string|null */ |
36 | private $baseUri; |
37 | |
38 | /** @var string|null */ |
39 | private $relativeLinkPrefix; |
40 | |
41 | /** @var string */ |
42 | private $savedCategoryRegexp; |
43 | |
44 | /** @var string */ |
45 | private $savedRedirectRegexp; |
46 | |
47 | /** @var string */ |
48 | private $savedBswRegexp; |
49 | |
50 | /** @phan-var array<int,string> */ |
51 | protected $nsNames = []; |
52 | |
53 | /** @phan-var array<int,string> */ |
54 | protected $nsCase = []; |
55 | |
56 | /** @phan-var array<string,int> */ |
57 | protected $nsIds = []; |
58 | |
59 | /** @phan-var array<string,int> */ |
60 | protected $nsCanon = []; |
61 | |
62 | /** @phan-var array<int,bool> */ |
63 | protected $nsWithSubpages = []; |
64 | |
65 | /** @phan-var array<string,string> */ |
66 | private $specialPageNames = []; |
67 | |
68 | /** @phan-var array */ |
69 | private $specialPageAliases = []; |
70 | |
71 | /** @var array|null */ |
72 | private $interwikiMap; |
73 | |
74 | /** @var array<string,array>|null Keys are stored as lowercased BCP-47 code strings */ |
75 | private $variants; |
76 | |
77 | /** @var array<string,bool>|null Keys are stored as lowercased BCP-47 code strings */ |
78 | private $langConverterEnabled; |
79 | |
80 | /** @var array|null */ |
81 | private $apiMagicWords; |
82 | |
83 | /** @var array|null */ |
84 | private $paramMWs; |
85 | |
86 | /** @var array|null */ |
87 | private $apiVariables; |
88 | |
89 | /** @var array|null */ |
90 | private $apiFunctionHooks; |
91 | |
92 | /** @var array|null */ |
93 | private $allMWs; |
94 | |
95 | /** @var array|null */ |
96 | private $extensionTags; |
97 | |
98 | /** @var int|null */ |
99 | private $widthOption; |
100 | |
101 | /** @var int */ |
102 | private $maxDepth = 40; |
103 | |
104 | private $featureDetectionDone = false; |
105 | private $hasVideoInfo = false; |
106 | |
107 | /** @var string[] Base parameters for a siteinfo query */ |
108 | public const SITE_CONFIG_QUERY_PARAMS = [ |
109 | 'action' => 'query', |
110 | 'meta' => 'siteinfo', |
111 | 'siprop' => 'general|protocols|namespaces|namespacealiases|magicwords|interwikimap|' |
112 | . 'languagevariants|defaultoptions|specialpagealiases|extensiontags|' |
113 | . 'functionhooks|variables', |
114 | ]; |
115 | |
116 | /** |
117 | * @param ApiHelper $api |
118 | * @param array $opts |
119 | */ |
120 | public function __construct( ApiHelper $api, array $opts ) { |
121 | parent::__construct(); |
122 | |
123 | $this->api = $api; |
124 | |
125 | if ( isset( $opts['linting'] ) ) { |
126 | $this->linterEnabled = !empty( $opts['linting'] ); |
127 | } |
128 | |
129 | if ( isset( $opts['addHTMLTemplateParameters'] ) ) { |
130 | $this->addHTMLTemplateParameters = !empty( $opts['addHTMLTemplateParameters'] ); |
131 | } |
132 | |
133 | if ( isset( $opts['maxDepth'] ) ) { |
134 | $this->maxDepth = $opts['maxDepth']; |
135 | } |
136 | |
137 | if ( isset( $opts['logger'] ) ) { |
138 | $this->setLogger( $opts['logger'] ); |
139 | } else { |
140 | $this->setLogger( self::createLogger() ); |
141 | } |
142 | |
143 | if ( isset( $opts['wt2htmlLimits'] ) ) { |
144 | $this->wt2htmlLimits = array_merge( |
145 | $this->wt2htmlLimits, $opts['wt2htmlLimits'] |
146 | ); |
147 | } |
148 | if ( isset( $opts['html2wtLimits'] ) ) { |
149 | $this->html2wtLimits = array_merge( |
150 | $this->html2wtLimits, $opts['html2wtLimits'] |
151 | ); |
152 | } |
153 | } |
154 | |
155 | protected function reset() { |
156 | $this->siteData = null; |
157 | $this->baseUri = null; |
158 | $this->relativeLinkPrefix = null; |
159 | // Superclass value reset since parsertests reuse SiteConfig objects |
160 | $this->linkTrailRegex = false; |
161 | $this->magicWordMap = null; |
162 | $this->interwikiMapNoNamespaces = null; |
163 | } |
164 | |
165 | /** |
166 | * Combine sets of regex fragments |
167 | * @param string[][] $res |
168 | * - $regexes[0] are case-insensitive regex fragments. Must not be empty. |
169 | * - $regexes[1] are case-sensitive regex fragments. Must not be empty. |
170 | * @return string Combined regex fragment. May be an alternation. Assumes |
171 | * the outer environment is case-sensitive. |
172 | */ |
173 | private function combineRegexArrays( array $res ): string { |
174 | if ( $res ) { |
175 | if ( isset( $res[0] ) ) { |
176 | $res[0] = '(?i:' . implode( '|', $res[0] ) . ')'; |
177 | } |
178 | if ( isset( $res[1] ) ) { |
179 | $res[1] = '(?:' . implode( '|', $res[1] ) . ')'; |
180 | } |
181 | return implode( '|', $res ); |
182 | } |
183 | // None? Return a failing regex |
184 | return '(?!)'; |
185 | } |
186 | |
187 | /** |
188 | * Add a new namespace to the config |
189 | * |
190 | * Protected access to let mocks and parser tests versions |
191 | * add new namespaces as required. |
192 | * |
193 | * @param array $ns Namespace info |
194 | */ |
195 | protected function addNamespace( array $ns ): void { |
196 | $id = (int)$ns['id']; |
197 | $this->nsNames[$id] = $ns['name']; |
198 | $this->nsIds[Utils::normalizeNamespaceName( $ns['name'] )] = $id; |
199 | $this->nsCanon[Utils::normalizeNamespaceName( $ns['canonical'] ?? $ns['name'] )] = $id; |
200 | if ( $ns['subpages'] ) { |
201 | $this->nsWithSubpages[$id] = true; |
202 | } |
203 | $this->nsCase[$id] = (string)$ns['case']; |
204 | } |
205 | |
206 | private function detectFeatures(): void { |
207 | if ( !$this->featureDetectionDone ) { |
208 | $this->featureDetectionDone = true; |
209 | $data = $this->api->makeRequest( [ 'action' => 'paraminfo', 'modules' => 'query' ] ); |
210 | $props = $data["paraminfo"]["modules"][0]["parameters"]["0"]["type"] ?? []; |
211 | $this->hasVideoInfo = in_array( 'videoinfo', $props, true ); |
212 | } |
213 | } |
214 | |
215 | public function hasVideoInfo(): bool { |
216 | $this->detectFeatures(); |
217 | return $this->hasVideoInfo; |
218 | } |
219 | |
220 | /** |
221 | * Load site data from the Action API, if necessary |
222 | */ |
223 | private function loadSiteData(): void { |
224 | if ( $this->siteData !== null ) { |
225 | return; |
226 | } |
227 | |
228 | $data = $this->api->makeRequest( self::SITE_CONFIG_QUERY_PARAMS )['query']; |
229 | |
230 | $this->siteData = $data['general']; |
231 | $this->widthOption = $data['general']['thumblimits'][$data['defaultoptions']['thumbsize']]; |
232 | $this->protocols = $data['protocols']; |
233 | $this->apiVariables = $data['variables']; |
234 | $this->apiFunctionHooks = PHPUtils::makeSet( $data['functionhooks'] ); |
235 | |
236 | // Process namespace data from API |
237 | $this->nsNames = []; |
238 | $this->nsCase = []; |
239 | $this->nsIds = []; |
240 | $this->nsCanon = []; |
241 | $this->nsWithSubpages = []; |
242 | foreach ( $data['namespaces'] as $ns ) { |
243 | $this->addNamespace( $ns ); |
244 | } |
245 | foreach ( $data['namespacealiases'] as $ns ) { |
246 | $this->nsIds[Utils::normalizeNamespaceName( $ns['alias'] )] = $ns['id']; |
247 | } |
248 | |
249 | // Process magic word data from API |
250 | $bsws = []; |
251 | $this->paramMWs = []; |
252 | $this->allMWs = []; |
253 | |
254 | // Recast the API results in the format that core MediaWiki returns internally |
255 | // This enables us to use the Production SiteConfig without changes and add the |
256 | // extra overhead to this developer API usage. |
257 | $this->apiMagicWords = []; |
258 | foreach ( $data['magicwords'] as $mw ) { |
259 | $cs = (int)$mw['case-sensitive']; |
260 | $mwName = $mw['name']; |
261 | $this->apiMagicWords[$mwName][] = $cs; |
262 | $pmws = []; |
263 | $allMWs = []; |
264 | foreach ( $mw['aliases'] as $alias ) { |
265 | $this->apiMagicWords[$mwName][] = $alias; |
266 | // Aliases for double underscore mws include the underscores |
267 | if ( substr( $alias, 0, 2 ) === '__' && substr( $alias, -2 ) === '__' ) { |
268 | $bsws[$cs][] = preg_quote( substr( $alias, 2, -2 ), '@' ); |
269 | } |
270 | if ( strpos( $alias, '$1' ) !== false ) { |
271 | $pmws[$cs][] = strtr( preg_quote( $alias, '/' ), [ '\\$1' => "(.*?)" ] ); |
272 | } |
273 | $allMWs[$cs][] = preg_quote( $alias, '/' ); |
274 | } |
275 | |
276 | if ( $pmws ) { |
277 | $this->paramMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $pmws ) . ')$/uDS'; |
278 | } |
279 | $this->allMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $allMWs ) . ')$/D'; |
280 | } |
281 | |
282 | $bswRegexp = $this->combineRegexArrays( $bsws ); |
283 | |
284 | // Parse interwiki map data from the API |
285 | $this->interwikiMap = ConfigUtils::computeInterwikiMap( $data['interwikimap'] ); |
286 | |
287 | // Parse variant data from the API |
288 | # T320662: API should return these in BCP-47 forms |
289 | $this->langConverterEnabled = []; |
290 | $this->variants = []; |
291 | foreach ( $data['languagevariants'] as $base => $variants ) { |
292 | $baseBcp47 = Utils::mwCodeToBcp47( $base ); |
293 | if ( $this->siteData['langconversion'] ) { |
294 | $baseKey = strtolower( $baseBcp47->toBcp47Code() ); |
295 | $this->langConverterEnabled[$baseKey] = true; |
296 | foreach ( $variants as $code => $vdata ) { |
297 | $variantKey = strtolower( Utils::mwCodeToBcp47( $code )->toBcp47Code() ); |
298 | $this->variants[$variantKey] = [ |
299 | 'base' => $baseBcp47, |
300 | 'fallbacks' => array_map( |
301 | [ Utils::class, 'mwCodeToBcp47' ], |
302 | $vdata['fallbacks'] |
303 | ), |
304 | ]; |
305 | } |
306 | } |
307 | } |
308 | |
309 | // Parse extension tag data from the API |
310 | $this->extensionTags = []; |
311 | foreach ( $data['extensiontags'] as $tag ) { |
312 | $tag = preg_replace( '/^<|>$/D', '', $tag ); |
313 | $this->ensureExtensionTag( $tag ); |
314 | } |
315 | |
316 | $this->specialPageAliases = $data['specialpagealiases']; |
317 | $this->specialPageNames = []; |
318 | foreach ( $this->specialPageAliases as $special ) { |
319 | $alias = strtr( mb_strtoupper( $special['realname'] ), ' ', '_' ); |
320 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
321 | foreach ( $special['aliases'] as $alias ) { |
322 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
323 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
324 | } |
325 | } |
326 | |
327 | $redirect = '(?i:\#REDIRECT)'; |
328 | $quote = static function ( $s ) { |
329 | $q = preg_quote( $s, '@' ); |
330 | # Note that PHP < 7.3 doesn't escape # in preg_quote. That means |
331 | # that the $redirect regexp will fail if used with the `x` flag. |
332 | # Manually hack around this for PHP 7.2; can remove this workaround |
333 | # once minimum PHP version >= 7.3 |
334 | if ( preg_quote( '#' ) === '#' ) { |
335 | $q = str_replace( '#', '\\#', $q ); |
336 | } |
337 | return $q; |
338 | }; |
339 | foreach ( $data['magicwords'] as $mw ) { |
340 | if ( $mw['name'] === 'redirect' ) { |
341 | $redirect = implode( '|', array_map( $quote, $mw['aliases'] ) ); |
342 | if ( !$mw['case-sensitive'] ) { |
343 | $redirect = '(?i:' . $redirect . ')'; |
344 | } |
345 | break; |
346 | } |
347 | } |
348 | // `$this->nsNames[14]` is set earlier by the calls to `$this->addNamespace( $ns )` |
349 | // @phan-suppress-next-line PhanCoalescingAlwaysNull |
350 | $category = $this->quoteTitleRe( $this->nsNames[14] ?? 'Category', '@' ); |
351 | if ( $category !== 'Category' ) { |
352 | $category = "(?:$category|Category)"; |
353 | } |
354 | |
355 | $this->savedCategoryRegexp = "@{$category}@"; |
356 | $this->savedRedirectRegexp = "@{$redirect}@"; |
357 | $this->savedBswRegexp = "@{$bswRegexp}@"; |
358 | } |
359 | |
360 | public function galleryOptions(): array { |
361 | $this->loadSiteData(); |
362 | return $this->siteData['galleryoptions']; |
363 | } |
364 | |
365 | public function allowedExternalImagePrefixes(): array { |
366 | $this->loadSiteData(); |
367 | return $this->siteData['externalimages'] ?? []; |
368 | } |
369 | |
370 | /** |
371 | * Determine the article base URI and relative prefix |
372 | */ |
373 | private function determineArticlePath(): void { |
374 | $this->loadSiteData(); |
375 | |
376 | $url = $this->siteData['server'] . $this->siteData['articlepath']; |
377 | |
378 | if ( substr( $url, -2 ) !== '$1' ) { |
379 | throw new \UnexpectedValueException( "Article path '$url' does not have '$1' at the end" ); |
380 | } |
381 | $url = substr( $url, 0, -2 ); |
382 | |
383 | $bits = UrlUtils::parseUrl( $url ); |
384 | if ( !$bits ) { |
385 | throw new \UnexpectedValueException( "Failed to parse article path '$url'" ); |
386 | } |
387 | |
388 | if ( empty( $bits['path'] ) ) { |
389 | $path = '/'; |
390 | } else { |
391 | $path = UrlUtils::removeDotSegments( $bits['path'] ); |
392 | } |
393 | |
394 | $relParts = [ 'query' => true, 'fragment' => true ]; |
395 | $base = array_diff_key( $bits, $relParts ); |
396 | $rel = array_intersect_key( $bits, $relParts ); |
397 | |
398 | $i = strrpos( $path, '/' ); |
399 | $base['path'] = substr( $path, 0, $i + 1 ); |
400 | $rel['path'] = '.' . substr( $path, $i ); |
401 | |
402 | $this->baseUri = UrlUtils::assembleUrl( $base ); |
403 | $this->relativeLinkPrefix = UrlUtils::assembleUrl( $rel ); |
404 | } |
405 | |
406 | public function baseURI(): string { |
407 | if ( $this->baseUri === null ) { |
408 | $this->determineArticlePath(); |
409 | } |
410 | return $this->baseUri; |
411 | } |
412 | |
413 | public function relativeLinkPrefix(): string { |
414 | if ( $this->relativeLinkPrefix === null ) { |
415 | $this->determineArticlePath(); |
416 | } |
417 | return $this->relativeLinkPrefix; |
418 | } |
419 | |
420 | /** @inheritDoc */ |
421 | public function canonicalNamespaceId( string $name ): ?int { |
422 | $this->loadSiteData(); |
423 | return $this->nsCanon[Utils::normalizeNamespaceName( $name )] ?? null; |
424 | } |
425 | |
426 | /** @inheritDoc */ |
427 | public function namespaceId( string $name ): ?int { |
428 | $this->loadSiteData(); |
429 | $ns = $this->canonicalNamespaceId( $name ); |
430 | if ( $ns !== null ) { |
431 | return $ns; |
432 | } |
433 | return $this->nsIds[Utils::normalizeNamespaceName( $name )] ?? null; |
434 | } |
435 | |
436 | /** @inheritDoc */ |
437 | public function namespaceName( int $ns ): ?string { |
438 | $this->loadSiteData(); |
439 | return $this->nsNames[$ns] ?? null; |
440 | } |
441 | |
442 | /** @inheritDoc */ |
443 | public function namespaceHasSubpages( int $ns ): bool { |
444 | $this->loadSiteData(); |
445 | return $this->nsWithSubpages[$ns] ?? false; |
446 | } |
447 | |
448 | /** @inheritDoc */ |
449 | public function namespaceCase( int $ns ): string { |
450 | $this->loadSiteData(); |
451 | return $this->nsCase[$ns] ?? 'first-letter'; |
452 | } |
453 | |
454 | /** @inheritDoc */ |
455 | public function specialPageLocalName( string $alias ): ?string { |
456 | $this->loadSiteData(); |
457 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
458 | return $this->specialPageNames[$alias] ?? null; |
459 | } |
460 | |
461 | public function interwikiMagic(): bool { |
462 | $this->loadSiteData(); |
463 | return $this->siteData['interwikimagic']; |
464 | } |
465 | |
466 | public function interwikiMap(): array { |
467 | $this->loadSiteData(); |
468 | return $this->interwikiMap; |
469 | } |
470 | |
471 | public function iwp(): string { |
472 | $this->loadSiteData(); |
473 | return $this->siteData['wikiid']; |
474 | } |
475 | |
476 | public function legalTitleChars(): string { |
477 | $this->loadSiteData(); |
478 | return $this->siteData['legaltitlechars']; |
479 | } |
480 | |
481 | public function linkPrefixRegex(): ?string { |
482 | $this->loadSiteData(); |
483 | |
484 | if ( !empty( $this->siteData['linkprefixcharset'] ) ) { |
485 | return '/[' . $this->siteData['linkprefixcharset'] . ']+$/Du'; |
486 | } else { |
487 | // We don't care about super-old MediaWiki, so don't try to parse 'linkprefix'. |
488 | return null; |
489 | } |
490 | } |
491 | |
492 | /** @inheritDoc */ |
493 | protected function linkTrail(): string { |
494 | $this->loadSiteData(); |
495 | return $this->siteData['linktrail']; |
496 | } |
497 | |
498 | public function lang(): string { |
499 | $this->loadSiteData(); |
500 | return $this->siteData['lang']; |
501 | } |
502 | |
503 | public function mainpage(): string { |
504 | $this->loadSiteData(); |
505 | return $this->siteData['mainpage']; |
506 | } |
507 | |
508 | /** @inheritDoc */ |
509 | public function getMWConfigValue( string $key ) { |
510 | $this->loadSiteData(); |
511 | switch ( $key ) { |
512 | // Hardcoded values for these 2 keys |
513 | case 'CiteResponsiveReferences': |
514 | return $this->siteData['citeresponsivereferences'] ?? false; |
515 | |
516 | case 'CiteResponsiveReferencesThreshold': |
517 | return 10; |
518 | |
519 | // We can add more hardcoded keys based on testing needs |
520 | // but null is the default for keys unsupported in this mode. |
521 | default: |
522 | return null; |
523 | } |
524 | } |
525 | |
526 | public function rtl(): bool { |
527 | $this->loadSiteData(); |
528 | return $this->siteData['rtl']; |
529 | } |
530 | |
531 | /** @inheritDoc */ |
532 | public function langConverterEnabledBcp47( Bcp47Code $lang ): bool { |
533 | $this->loadSiteData(); |
534 | return $this->langConverterEnabled[strtolower( $lang->toBcp47Code() )] ?? false; |
535 | } |
536 | |
537 | public function script(): string { |
538 | $this->loadSiteData(); |
539 | return $this->siteData['script']; |
540 | } |
541 | |
542 | public function scriptpath(): string { |
543 | $this->loadSiteData(); |
544 | return $this->siteData['scriptpath']; |
545 | } |
546 | |
547 | public function server(): string { |
548 | $this->loadSiteData(); |
549 | return $this->siteData['server']; |
550 | } |
551 | |
552 | /** |
553 | * @inheritDoc |
554 | */ |
555 | public function exportMetadataToHeadBcp47( |
556 | Document $document, |
557 | ContentMetadataCollector $metadata, |
558 | string $defaultTitle, |
559 | Bcp47Code $lang |
560 | ): void { |
561 | '@phan-var StubMetadataCollector $metadata'; // @var StubMetadataCollector $metadata |
562 | $moduleLoadURI = $this->server() . $this->scriptpath() . '/load.php'; |
563 | // Parsoid/JS always made this protocol-relative, so match |
564 | // that (for now at least) |
565 | $moduleLoadURI = preg_replace( '#^https?://#', '//', $moduleLoadURI ); |
566 | // Look for a displaytitle. |
567 | $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?? |
568 | // Use the default title, properly escaped |
569 | Utils::escapeHtml( $defaultTitle ); |
570 | $this->exportMetadataHelper( |
571 | $document, |
572 | $moduleLoadURI, |
573 | $metadata->getModules(), |
574 | $metadata->getModuleStyles(), |
575 | $metadata->getJsConfigVars(), |
576 | $displayTitle, |
577 | $lang |
578 | ); |
579 | } |
580 | |
581 | public function redirectRegexp(): string { |
582 | $this->loadSiteData(); |
583 | return $this->savedRedirectRegexp; |
584 | } |
585 | |
586 | public function categoryRegexp(): string { |
587 | $this->loadSiteData(); |
588 | return $this->savedCategoryRegexp; |
589 | } |
590 | |
591 | public function bswRegexp(): string { |
592 | $this->loadSiteData(); |
593 | return $this->savedBswRegexp; |
594 | } |
595 | |
596 | public function timezoneOffset(): int { |
597 | $this->loadSiteData(); |
598 | return $this->siteData['timeoffset']; |
599 | } |
600 | |
601 | /** @inheritDoc */ |
602 | public function variants(): array { |
603 | $this->loadSiteData(); |
604 | $result = []; |
605 | foreach ( $this->variants as $variantKey => $tuple ) { |
606 | $result[Utils::bcp47ToMwCode( $variantKey )] = [ |
607 | 'base' => Utils::bcp47ToMwCode( $tuple['base'] ), |
608 | 'fallbacks' => array_map( [ Utils::class, 'bcp47ToMwCode' ], $tuple['fallbacks'] ), |
609 | ]; |
610 | } |
611 | return $result; |
612 | } |
613 | |
614 | /** @inheritDoc */ |
615 | public function variantsFor( Bcp47Code $lang ): array { |
616 | $this->loadSiteData(); |
617 | return $this->variants[strtolower( $lang->toBcp47Code() )]; |
618 | } |
619 | |
620 | public function widthOption(): int { |
621 | $this->loadSiteData(); |
622 | return $this->widthOption; |
623 | } |
624 | |
625 | /** @inheritDoc */ |
626 | protected function getVariableIDs(): array { |
627 | $this->loadSiteData(); |
628 | return $this->apiVariables; |
629 | } |
630 | |
631 | /** @inheritDoc */ |
632 | protected function haveComputedFunctionSynonyms(): bool { |
633 | return false; |
634 | } |
635 | |
636 | private static $noHashFunctions = null; |
637 | |
638 | /** @inheritDoc */ |
639 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
640 | if ( !$this->apiFunctionHooks ) { |
641 | $this->loadSiteData(); |
642 | } |
643 | if ( isset( $this->apiFunctionHooks[$magicword] ) ) { |
644 | if ( !self::$noHashFunctions ) { |
645 | // FIXME: This is an approximation only computed in non-integrated mode for |
646 | // commandline and developer testing. This set is probably not up to date |
647 | // and also doesn't reflect no-hash functions registered by extensions |
648 | // via setFunctionHook calls. As such, you might run into GOTCHAs during |
649 | // debugging of production issues in standalone / API config mode. |
650 | self::$noHashFunctions = PHPUtils::makeSet( [ |
651 | 'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc', |
652 | 'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl', |
653 | 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'bidi', |
654 | 'numberofpages', 'numberofusers', 'numberofactiveusers', |
655 | 'numberofarticles', 'numberoffiles', 'numberofadmins', |
656 | 'numberingroup', 'numberofedits', 'language', |
657 | 'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath', |
658 | 'pagesincategory', 'pagesize', 'protectionlevel', 'protectionexpiry', |
659 | 'namespacee', 'namespacenumber', 'talkspace', 'talkspacee', |
660 | 'subjectspace', 'subjectspacee', 'pagename', 'pagenamee', |
661 | 'fullpagename', 'fullpagenamee', 'rootpagename', 'rootpagenamee', |
662 | 'basepagename', 'basepagenamee', 'subpagename', 'subpagenamee', |
663 | 'talkpagename', 'talkpagenamee', 'subjectpagename', |
664 | 'subjectpagenamee', 'pageid', 'revisionid', 'revisionday', |
665 | 'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear', |
666 | 'revisiontimestamp', 'revisionuser', 'cascadingsources', |
667 | // Special callbacks in core |
668 | 'namespace', 'int', 'displaytitle', 'pagesinnamespace', |
669 | ] ); |
670 | } |
671 | |
672 | $syn = $func; |
673 | if ( substr( $syn, -1 ) === ':' ) { |
674 | $syn = substr( $syn, 0, -1 ); |
675 | } |
676 | if ( !isset( self::$noHashFunctions[$magicword] ) ) { |
677 | $syn = '#' . $syn; |
678 | } |
679 | $this->functionSynonyms[intval( $caseSensitive )][$syn] = $magicword; |
680 | } |
681 | } |
682 | |
683 | /** @inheritDoc */ |
684 | protected function getMagicWords(): array { |
685 | $this->loadSiteData(); |
686 | return $this->apiMagicWords; |
687 | } |
688 | |
689 | /** @inheritDoc */ |
690 | public function getMagicWordMatcher( string $id ): string { |
691 | $this->loadSiteData(); |
692 | return $this->allMWs[$id] ?? '/^(?!)$/'; |
693 | } |
694 | |
695 | /** @inheritDoc */ |
696 | public function getParameterizedAliasMatcher( array $words ): callable { |
697 | $this->loadSiteData(); |
698 | $regexes = array_intersect_key( $this->paramMWs, array_flip( $words ) ); |
699 | return static function ( $text ) use ( $regexes ) { |
700 | /** |
701 | * $name is the canonical magic word name |
702 | * $re has patterns for matching aliases |
703 | */ |
704 | foreach ( $regexes as $name => $re ) { |
705 | if ( preg_match( $re, $text, $m ) ) { |
706 | unset( $m[0] ); |
707 | |
708 | // Ex. regexp here is, /^(?:(?:|vinculo\=(.*?)|enlace\=(.*?)|link\=(.*?)))$/uS |
709 | // Check all the capture groups for a value, if not, it's safe to return an |
710 | // empty string since we did get a match. |
711 | foreach ( $m as $v ) { |
712 | if ( $v !== '' ) { |
713 | return [ 'k' => $name, 'v' => $v ]; |
714 | } |
715 | } |
716 | return [ 'k' => $name, 'v' => '' ]; |
717 | } |
718 | } |
719 | return null; |
720 | }; |
721 | } |
722 | |
723 | /** |
724 | * This function is public so it can be used to synchronize env for |
725 | * hybrid parserTests. The parserTests setup includes the definition |
726 | * of a number of non-standard extension tags, whose names are passed |
727 | * over from the JS side in hybrid testing. |
728 | * @param string $tag Name of an extension tag assumed to be present |
729 | */ |
730 | public function ensureExtensionTag( string $tag ): void { |
731 | $this->loadSiteData(); |
732 | $this->extensionTags[mb_strtolower( $tag )] = true; |
733 | } |
734 | |
735 | /** @inheritDoc */ |
736 | protected function getNonNativeExtensionTags(): array { |
737 | $this->loadSiteData(); |
738 | return $this->extensionTags; |
739 | } |
740 | |
741 | /** @inheritDoc */ |
742 | public function getMaxTemplateDepth(): int { |
743 | // Not in the API result |
744 | return $this->maxDepth; |
745 | } |
746 | |
747 | /** @inheritDoc */ |
748 | protected function getSpecialNSAliases(): array { |
749 | $nsAliases = [ |
750 | 'Special', |
751 | ]; |
752 | foreach ( $this->nsIds as $name => $id ) { |
753 | if ( $id === -1 ) { |
754 | $nsAliases[] = $this->quoteTitleRe( $name, '!' ); |
755 | } |
756 | } |
757 | return $nsAliases; |
758 | } |
759 | |
760 | /** @inheritDoc */ |
761 | protected function getSpecialPageAliases( string $specialPage ): array { |
762 | $spAliases = [ $specialPage ]; |
763 | foreach ( $this->specialPageAliases as $special ) { |
764 | if ( $special['realname'] === $specialPage ) { |
765 | $spAliases = array_merge( $spAliases, $special['aliases'] ); |
766 | break; |
767 | } |
768 | } |
769 | return $spAliases; |
770 | } |
771 | |
772 | /** @inheritDoc */ |
773 | protected function getProtocols(): array { |
774 | $this->loadSiteData(); |
775 | return $this->protocols; |
776 | } |
777 | |
778 | /** |
779 | * @param array $parsoidSettings |
780 | * @return SiteConfig |
781 | */ |
782 | public static function fromSettings( array $parsoidSettings ): SiteConfig { |
783 | $opts = []; |
784 | if ( isset( $parsoidSettings['linting'] ) ) { |
785 | $opts['linting'] = !empty( $parsoidSettings['linting'] ); |
786 | } |
787 | if ( isset( $parsoidSettings['wt2htmlLimits'] ) ) { |
788 | $opts['wt2htmlLimits'] = $parsoidSettings['wt2htmlLimits']; |
789 | } |
790 | if ( isset( $parsoidSettings['html2wtLimits'] ) ) { |
791 | $opts['html2wtLimits'] = $parsoidSettings['html2wtLimits']; |
792 | } |
793 | $api = ApiHelper::fromSettings( $parsoidSettings ); |
794 | return new SiteConfig( $api, $opts ); |
795 | } |
796 | |
797 | /** @inheritDoc */ |
798 | public function metrics(): ?StatsdDataFactoryInterface { |
799 | static $metrics = null; |
800 | if ( $metrics === null ) { |
801 | $metrics = new MockMetrics(); |
802 | } |
803 | return $metrics; |
804 | } |
805 | |
806 | /** @inheritDoc */ |
807 | public function getNoFollowConfig(): array { |
808 | $this->loadSiteData(); |
809 | return [ |
810 | 'nofollow' => $this->siteData['nofollowlinks'] ?? true, |
811 | 'nsexceptions' => $this->siteData['nofollownsexceptions'] ?? [], |
812 | 'domainexceptions' => $this->siteData['nofollowdomainexceptions'] ?? [ 'mediawiki.org' ] |
813 | ]; |
814 | } |
815 | |
816 | /** @inheritDoc */ |
817 | public function getExternalLinkTarget() { |
818 | $this->loadSiteData(); |
819 | return $this->siteData['externallinktarget'] ?? false; |
820 | } |
821 | } |