Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.94% |
276 / 341 |
|
65.57% |
40 / 61 |
CRAP | |
0.00% |
0 / 1 |
SiteConfig | |
80.94% |
276 / 341 |
|
65.57% |
40 / 61 |
204.01 | |
0.00% |
0 / 1 |
__construct | |
56.25% |
9 / 16 |
|
0.00% |
0 / 1 |
5.34 | |||
reset | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
combineRegexArrays | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
addNamespace | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
detectFeatures | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
hasVideoInfo | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getCustomSiteConfigFileName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadSiteData | |
95.56% |
86 / 90 |
|
0.00% |
0 / 1 |
22 | |||
galleryOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
allowedExternalImagePrefixes | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
determineArticlePath | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
4.06 | |||
baseURI | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
relativeLinkPrefix | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
canonicalNamespaceId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceId | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
namespaceName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceHasSubpages | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
namespaceCase | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
specialPageLocalName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
magicLinkEnabled | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
interwikiMagic | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
interwikiMap | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
iwp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
legalTitleChars | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
linkPrefixRegex | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
linkTrail | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
langBcp47 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
mainpage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
mainPageLinkTarget | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getMWConfigValue | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
5.05 | |||
rtl | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
langConverterEnabledBcp47 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
script | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
scriptpath | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
server | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
exportMetadataToHeadBcp47 | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
redirectRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
categoryRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
bswRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
timezoneOffset | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
variantsFor | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
widthOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getVariableIDs | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
updateFunctionSynonym | |
97.67% |
42 / 43 |
|
0.00% |
0 / 1 |
6 | |||
getMagicWords | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordMatcher | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getParameterizedAliasMatcher | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
ensureExtensionTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getNonNativeExtensionTags | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMaxTemplateDepth | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSpecialNSAliases | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getSpecialPageAliases | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
getProtocols | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
metrics | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
incrementCounter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
observeTiming | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
observeHistogram | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHistogramBuckets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getNoFollowConfig | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getExternalLinkTarget | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Config\Api; |
6 | |
7 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
8 | use Wikimedia\Bcp47Code\Bcp47Code; |
9 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
10 | use Wikimedia\Parsoid\Config\StubMetadataCollector; |
11 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
12 | use Wikimedia\Parsoid\DOM\Document; |
13 | use Wikimedia\Parsoid\Mocks\MockMetrics; |
14 | use Wikimedia\Parsoid\Utils\ConfigUtils; |
15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
16 | use Wikimedia\Parsoid\Utils\Title; |
17 | use Wikimedia\Parsoid\Utils\UrlUtils; |
18 | use Wikimedia\Parsoid\Utils\Utils; |
19 | |
20 | /** |
21 | * SiteConfig via MediaWiki's Action API |
22 | * |
23 | * Note this is intended for testing, not performance. |
24 | */ |
25 | class SiteConfig extends ISiteConfig { |
26 | |
27 | /** @var ApiHelper */ |
28 | private $api; |
29 | |
30 | /** @var array|null */ |
31 | private $siteData; |
32 | |
33 | /** @var array|null */ |
34 | private $protocols; |
35 | |
36 | /** @var string|null */ |
37 | private $baseUri; |
38 | |
39 | /** @var string|null */ |
40 | private $relativeLinkPrefix; |
41 | |
42 | /** @var string */ |
43 | private $savedCategoryRegexp; |
44 | |
45 | /** @var string */ |
46 | private $savedRedirectRegexp; |
47 | |
48 | /** @var string */ |
49 | private $savedBswRegexp; |
50 | |
51 | /** @var array<int,string> */ |
52 | protected $nsNames = []; |
53 | |
54 | /** @var array<int,string> */ |
55 | protected $nsCase = []; |
56 | |
57 | /** @var array<string,int> */ |
58 | protected $nsIds = []; |
59 | |
60 | /** @var array<string,int> */ |
61 | protected $nsCanon = []; |
62 | |
63 | /** @var array<int,bool> */ |
64 | protected $nsWithSubpages = []; |
65 | |
66 | /** @var array<string,string> */ |
67 | private $specialPageNames = []; |
68 | |
69 | /** @var array */ |
70 | private $specialPageAliases = []; |
71 | |
72 | /** @var array|null */ |
73 | private $interwikiMap; |
74 | |
75 | /** @var array<string,array>|null Keys are stored as lowercased BCP-47 code strings */ |
76 | private $variants; |
77 | |
78 | /** @var array<string,bool>|null Keys are stored as lowercased BCP-47 code strings */ |
79 | private $langConverterEnabled; |
80 | |
81 | /** @var array|null */ |
82 | private $apiMagicWords; |
83 | |
84 | /** @var array|null */ |
85 | private $paramMWs; |
86 | |
87 | /** @var array|null */ |
88 | private $apiVariables; |
89 | |
90 | /** @var array|null */ |
91 | private $apiFunctionHooks; |
92 | |
93 | /** @var array|null */ |
94 | private $allMWs; |
95 | |
96 | /** @var array|null */ |
97 | private $extensionTags; |
98 | |
99 | /** @var int|null */ |
100 | private $widthOption; |
101 | |
102 | /** @var int */ |
103 | private $maxDepth = 40; |
104 | |
105 | private bool $featureDetectionDone = false; |
106 | private bool $hasVideoInfo = false; |
107 | |
108 | /** If set, generate experimental Parsoid HTML v3 parser function output */ |
109 | private bool $v3pf; |
110 | |
111 | /** @var string[] Base parameters for a siteinfo query */ |
112 | public const SITE_CONFIG_QUERY_PARAMS = [ |
113 | 'action' => 'query', |
114 | 'meta' => 'siteinfo', |
115 | 'siprop' => 'general|protocols|namespaces|namespacealiases|magicwords|interwikimap|' |
116 | . 'languagevariants|defaultoptions|specialpagealiases|extensiontags|' |
117 | . 'functionhooks|variables', |
118 | ]; |
119 | |
120 | public function __construct( ApiHelper $api, array $opts ) { |
121 | parent::__construct(); |
122 | |
123 | $this->api = $api; |
124 | |
125 | $this->linterEnabled = (bool)( $opts['linting'] ?? false ); |
126 | $this->addHTMLTemplateParameters = (bool)( $opts['addHTMLTemplateParameters'] ?? false ); |
127 | |
128 | if ( isset( $opts['maxDepth'] ) ) { |
129 | $this->maxDepth = (int)$opts['maxDepth']; |
130 | } |
131 | |
132 | $this->setLogger( $opts['logger'] ?? self::createLogger() ); |
133 | |
134 | if ( isset( $opts['wt2htmlLimits'] ) ) { |
135 | $this->wt2htmlLimits = array_merge( |
136 | $this->wt2htmlLimits, $opts['wt2htmlLimits'] |
137 | ); |
138 | } |
139 | if ( isset( $opts['html2wtLimits'] ) ) { |
140 | $this->html2wtLimits = array_merge( |
141 | $this->html2wtLimits, $opts['html2wtLimits'] |
142 | ); |
143 | } |
144 | |
145 | $this->v3pf = $opts['v3pf'] ?? false; |
146 | } |
147 | |
148 | protected function reset(): void { |
149 | $this->siteData = null; |
150 | $this->baseUri = null; |
151 | $this->relativeLinkPrefix = null; |
152 | // Superclass value reset since parsertests reuse SiteConfig objects |
153 | $this->linkTrailRegex = false; |
154 | $this->mwAliases = null; |
155 | $this->interwikiMapNoNamespaces = null; |
156 | $this->iwMatcher = null; |
157 | } |
158 | |
159 | /** |
160 | * Combine sets of regex fragments |
161 | * @param string[][] $res |
162 | * - $regexes[0] are case-insensitive regex fragments. Must not be empty. |
163 | * - $regexes[1] are case-sensitive regex fragments. Must not be empty. |
164 | * @return string Combined regex fragment. May be an alternation. Assumes |
165 | * the outer environment is case-sensitive. |
166 | */ |
167 | private function combineRegexArrays( array $res ): string { |
168 | if ( $res ) { |
169 | if ( isset( $res[0] ) ) { |
170 | $res[0] = '(?i:' . implode( '|', $res[0] ) . ')'; |
171 | } |
172 | if ( isset( $res[1] ) ) { |
173 | $res[1] = '(?:' . implode( '|', $res[1] ) . ')'; |
174 | } |
175 | return implode( '|', $res ); |
176 | } |
177 | // None? Return a failing regex |
178 | return '(?!)'; |
179 | } |
180 | |
181 | /** |
182 | * Add a new namespace to the config |
183 | * |
184 | * Protected access to let mocks and parser tests versions |
185 | * add new namespaces as required. |
186 | * |
187 | * @param array $ns Namespace info |
188 | */ |
189 | protected function addNamespace( array $ns ): void { |
190 | $id = (int)$ns['id']; |
191 | $this->nsNames[$id] = $ns['name']; |
192 | $this->nsIds[Utils::normalizeNamespaceName( $ns['name'] )] = $id; |
193 | $this->nsCanon[Utils::normalizeNamespaceName( $ns['canonical'] ?? $ns['name'] )] = $id; |
194 | if ( $ns['subpages'] ) { |
195 | $this->nsWithSubpages[$id] = true; |
196 | } |
197 | $this->nsCase[$id] = (string)$ns['case']; |
198 | } |
199 | |
200 | private function detectFeatures(): void { |
201 | if ( !$this->featureDetectionDone ) { |
202 | $this->featureDetectionDone = true; |
203 | $data = $this->api->makeRequest( [ 'action' => 'paraminfo', 'modules' => 'query' ] ); |
204 | $props = $data["paraminfo"]["modules"][0]["parameters"]["0"]["type"] ?? []; |
205 | $this->hasVideoInfo = in_array( 'videoinfo', $props, true ); |
206 | } |
207 | } |
208 | |
209 | public function hasVideoInfo(): bool { |
210 | $this->detectFeatures(); |
211 | return $this->hasVideoInfo; |
212 | } |
213 | |
214 | /** |
215 | * Let us do standalone development testing of features that need |
216 | * custom siteconfig. For now, we need new magic words defined. |
217 | * In the future, this file could include other custom config. |
218 | * |
219 | * @return string |
220 | */ |
221 | protected function getCustomSiteConfigFileName(): string { |
222 | return __DIR__ . "/standalone.siteconfig.json"; |
223 | } |
224 | |
225 | /** |
226 | * Load site data from the Action API, if necessary |
227 | */ |
228 | private function loadSiteData(): void { |
229 | if ( $this->siteData !== null ) { |
230 | return; |
231 | } |
232 | |
233 | $data = $this->api->makeRequest( self::SITE_CONFIG_QUERY_PARAMS )['query']; |
234 | |
235 | $this->siteData = $data['general']; |
236 | $this->widthOption = $data['general']['thumblimits'][$data['defaultoptions']['thumbsize']]; |
237 | $this->protocols = $data['protocols']; |
238 | $this->apiVariables = $data['variables']; |
239 | $this->apiFunctionHooks = PHPUtils::makeSet( $data['functionhooks'] ); |
240 | |
241 | // Process namespace data from API |
242 | $this->nsNames = []; |
243 | $this->nsCase = []; |
244 | $this->nsIds = []; |
245 | $this->nsCanon = []; |
246 | $this->nsWithSubpages = []; |
247 | foreach ( $data['namespaces'] as $ns ) { |
248 | $this->addNamespace( $ns ); |
249 | } |
250 | foreach ( $data['namespacealiases'] as $ns ) { |
251 | $this->nsIds[Utils::normalizeNamespaceName( $ns['alias'] )] = $ns['id']; |
252 | } |
253 | |
254 | // Process magic word data from API |
255 | $bsws = []; |
256 | $this->paramMWs = []; |
257 | $this->allMWs = []; |
258 | |
259 | // Fold custom magic words into the API response |
260 | $f = $this->getCustomSiteConfigFileName(); |
261 | if ( file_exists( $f ) ) { |
262 | $config = json_decode( file_get_contents( $f ), true ); |
263 | PHPUtils::pushArray( $data['magicwords'], $config['magicwords'] ); |
264 | } |
265 | |
266 | // Recast the API results in the format that core MediaWiki returns internally |
267 | // This enables us to use the Production SiteConfig without changes and add the |
268 | // extra overhead to this developer API usage. |
269 | $this->apiMagicWords = []; |
270 | foreach ( $data['magicwords'] as $mw ) { |
271 | $cs = (int)$mw['case-sensitive']; |
272 | $mwName = $mw['name']; |
273 | $this->apiMagicWords[$mwName][] = $cs; |
274 | $pmws = []; |
275 | $allMWs = []; |
276 | foreach ( $mw['aliases'] as $alias ) { |
277 | $this->apiMagicWords[$mwName][] = $alias; |
278 | // Aliases for double underscore mws include the underscores |
279 | if ( substr( $alias, 0, 2 ) === '__' && substr( $alias, -2 ) === '__' ) { |
280 | $bsws[$cs][] = preg_quote( substr( $alias, 2, -2 ), '@' ); |
281 | } |
282 | if ( strpos( $alias, '$1' ) !== false ) { |
283 | $pmws[$cs][] = strtr( preg_quote( $alias, '/' ), [ '\\$1' => "(.*?)" ] ); |
284 | } |
285 | $allMWs[$cs][] = preg_quote( $alias, '/' ); |
286 | } |
287 | |
288 | if ( $pmws ) { |
289 | $this->paramMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $pmws ) . ')$/uDS'; |
290 | } |
291 | $this->allMWs[$mwName] = '/^(?:' . $this->combineRegexArrays( $allMWs ) . ')$/D'; |
292 | } |
293 | |
294 | $bswRegexp = $this->combineRegexArrays( $bsws ); |
295 | |
296 | // Parse interwiki map data from the API |
297 | $this->interwikiMap = ConfigUtils::computeInterwikiMap( $data['interwikimap'] ); |
298 | |
299 | // Parse variant data from the API |
300 | # T320662: API should return these in BCP-47 forms |
301 | $this->langConverterEnabled = []; |
302 | $this->variants = []; |
303 | foreach ( $data['languagevariants'] as $base => $variants ) { |
304 | $baseBcp47 = Utils::mwCodeToBcp47( $base ); |
305 | if ( $this->siteData['langconversion'] ) { |
306 | $baseKey = strtolower( $baseBcp47->toBcp47Code() ); |
307 | $this->langConverterEnabled[$baseKey] = true; |
308 | foreach ( $variants as $code => $vdata ) { |
309 | $variantKey = strtolower( Utils::mwCodeToBcp47( $code )->toBcp47Code() ); |
310 | $this->variants[$variantKey] = [ |
311 | 'base' => $baseBcp47, |
312 | 'fallbacks' => array_map( |
313 | [ Utils::class, 'mwCodeToBcp47' ], |
314 | $vdata['fallbacks'] |
315 | ), |
316 | ]; |
317 | } |
318 | } |
319 | } |
320 | |
321 | // Parse extension tag data from the API |
322 | $this->extensionTags = []; |
323 | foreach ( $data['extensiontags'] as $tag ) { |
324 | $tag = preg_replace( '/^<|>$/D', '', $tag ); |
325 | $this->ensureExtensionTag( $tag ); |
326 | } |
327 | |
328 | $this->specialPageAliases = $data['specialpagealiases']; |
329 | $this->specialPageNames = []; |
330 | foreach ( $this->specialPageAliases as $special ) { |
331 | $alias = strtr( mb_strtoupper( $special['realname'] ), ' ', '_' ); |
332 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
333 | foreach ( $special['aliases'] as $alias ) { |
334 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
335 | $this->specialPageNames[$alias] = $special['aliases'][0]; |
336 | } |
337 | } |
338 | |
339 | $redirect = '(?i:\#REDIRECT)'; |
340 | $quote = static function ( string $s ): string { |
341 | $q = preg_quote( $s, '@' ); |
342 | # Note that PHP < 7.3 doesn't escape # in preg_quote. That means |
343 | # that the $redirect regexp will fail if used with the `x` flag. |
344 | # Manually hack around this for PHP 7.2; can remove this workaround |
345 | # once minimum PHP version >= 7.3 |
346 | if ( preg_quote( '#' ) === '#' ) { |
347 | $q = str_replace( '#', '\\#', $q ); |
348 | } |
349 | return $q; |
350 | }; |
351 | foreach ( $data['magicwords'] as $mw ) { |
352 | if ( $mw['name'] === 'redirect' ) { |
353 | $redirect = implode( '|', array_map( $quote, $mw['aliases'] ) ); |
354 | if ( !$mw['case-sensitive'] ) { |
355 | $redirect = '(?i:' . $redirect . ')'; |
356 | } |
357 | break; |
358 | } |
359 | } |
360 | // `$this->nsNames[14]` is set earlier by the calls to `$this->addNamespace( $ns )` |
361 | // @phan-suppress-next-line PhanCoalescingAlwaysNull |
362 | $category = $this->quoteTitleRe( $this->nsNames[14] ?? 'Category', '@' ); |
363 | if ( $category !== 'Category' ) { |
364 | $category = "(?:$category|Category)"; |
365 | } |
366 | |
367 | $this->savedCategoryRegexp = "@{$category}@"; |
368 | $this->savedRedirectRegexp = "@{$redirect}@"; |
369 | $this->savedBswRegexp = "@{$bswRegexp}@"; |
370 | } |
371 | |
372 | public function galleryOptions(): array { |
373 | $this->loadSiteData(); |
374 | return $this->siteData['galleryoptions']; |
375 | } |
376 | |
377 | public function allowedExternalImagePrefixes(): array { |
378 | $this->loadSiteData(); |
379 | return $this->siteData['externalimages'] ?? []; |
380 | } |
381 | |
382 | /** |
383 | * Determine the article base URI and relative prefix |
384 | */ |
385 | private function determineArticlePath(): void { |
386 | $this->loadSiteData(); |
387 | |
388 | $url = $this->siteData['server'] . $this->siteData['articlepath']; |
389 | |
390 | if ( substr( $url, -2 ) !== '$1' ) { |
391 | throw new \UnexpectedValueException( "Article path '$url' does not have '$1' at the end" ); |
392 | } |
393 | $url = substr( $url, 0, -2 ); |
394 | |
395 | $bits = UrlUtils::parseUrl( $url ); |
396 | if ( !$bits ) { |
397 | throw new \UnexpectedValueException( "Failed to parse article path '$url'" ); |
398 | } |
399 | |
400 | if ( empty( $bits['path'] ) ) { |
401 | $path = '/'; |
402 | } else { |
403 | $path = UrlUtils::removeDotSegments( $bits['path'] ); |
404 | } |
405 | |
406 | $relParts = [ 'query' => true, 'fragment' => true ]; |
407 | $base = array_diff_key( $bits, $relParts ); |
408 | $rel = array_intersect_key( $bits, $relParts ); |
409 | |
410 | $i = strrpos( $path, '/' ); |
411 | $base['path'] = substr( $path, 0, $i + 1 ); |
412 | $rel['path'] = '.' . substr( $path, $i ); |
413 | |
414 | $this->baseUri = UrlUtils::assembleUrl( $base ); |
415 | $this->relativeLinkPrefix = UrlUtils::assembleUrl( $rel ); |
416 | } |
417 | |
418 | public function baseURI(): string { |
419 | if ( $this->baseUri === null ) { |
420 | $this->determineArticlePath(); |
421 | } |
422 | return $this->baseUri; |
423 | } |
424 | |
425 | public function relativeLinkPrefix(): string { |
426 | if ( $this->relativeLinkPrefix === null ) { |
427 | $this->determineArticlePath(); |
428 | } |
429 | return $this->relativeLinkPrefix; |
430 | } |
431 | |
432 | /** @inheritDoc */ |
433 | public function canonicalNamespaceId( string $name ): ?int { |
434 | $this->loadSiteData(); |
435 | return $this->nsCanon[Utils::normalizeNamespaceName( $name )] ?? null; |
436 | } |
437 | |
438 | /** @inheritDoc */ |
439 | public function namespaceId( string $name ): ?int { |
440 | $this->loadSiteData(); |
441 | $name = Utils::normalizeNamespaceName( $name ); |
442 | return $this->nsCanon[$name] ?? $this->nsIds[$name] ?? null; |
443 | } |
444 | |
445 | /** @inheritDoc */ |
446 | public function namespaceName( int $ns ): ?string { |
447 | $this->loadSiteData(); |
448 | return $this->nsNames[$ns] ?? null; |
449 | } |
450 | |
451 | /** @inheritDoc */ |
452 | public function namespaceHasSubpages( int $ns ): bool { |
453 | $this->loadSiteData(); |
454 | return $this->nsWithSubpages[$ns] ?? false; |
455 | } |
456 | |
457 | /** @inheritDoc */ |
458 | public function namespaceCase( int $ns ): string { |
459 | $this->loadSiteData(); |
460 | return $this->nsCase[$ns] ?? 'first-letter'; |
461 | } |
462 | |
463 | /** @inheritDoc */ |
464 | public function specialPageLocalName( string $alias ): ?string { |
465 | $this->loadSiteData(); |
466 | $alias = strtr( mb_strtoupper( $alias ), ' ', '_' ); |
467 | return $this->specialPageNames[$alias] ?? null; |
468 | } |
469 | |
470 | /** @inheritDoc */ |
471 | public function magicLinkEnabled( string $which ): bool { |
472 | $this->loadSiteData(); |
473 | $magic = $this->siteData['magiclinks'] ?? []; |
474 | // Default to true, as wikis too old to export the 'magiclinks' |
475 | // property always had magic links enabled. |
476 | return $magic[$which] ?? true; |
477 | } |
478 | |
479 | public function interwikiMagic(): bool { |
480 | $this->loadSiteData(); |
481 | return $this->siteData['interwikimagic']; |
482 | } |
483 | |
484 | public function interwikiMap(): array { |
485 | $this->loadSiteData(); |
486 | return $this->interwikiMap; |
487 | } |
488 | |
489 | public function iwp(): string { |
490 | $this->loadSiteData(); |
491 | return $this->siteData['wikiid']; |
492 | } |
493 | |
494 | public function legalTitleChars(): string { |
495 | $this->loadSiteData(); |
496 | return $this->siteData['legaltitlechars']; |
497 | } |
498 | |
499 | public function linkPrefixRegex(): ?string { |
500 | $this->loadSiteData(); |
501 | |
502 | if ( !empty( $this->siteData['linkprefixcharset'] ) ) { |
503 | return '/[' . $this->siteData['linkprefixcharset'] . ']+$/Du'; |
504 | } else { |
505 | // We don't care about super-old MediaWiki, so don't try to parse 'linkprefix'. |
506 | return null; |
507 | } |
508 | } |
509 | |
510 | /** @inheritDoc */ |
511 | protected function linkTrail(): string { |
512 | $this->loadSiteData(); |
513 | return $this->siteData['linktrail']; |
514 | } |
515 | |
516 | public function langBcp47(): Bcp47Code { |
517 | $this->loadSiteData(); |
518 | return Utils::mwCodeToBcp47( $this->siteData['lang'] ); |
519 | } |
520 | |
521 | public function mainpage(): string { |
522 | $this->loadSiteData(); |
523 | return $this->siteData['mainpage']; |
524 | } |
525 | |
526 | public function mainPageLinkTarget(): Title { |
527 | $this->loadSiteData(); |
528 | return Title::newFromText( $this->siteData['mainpage'], $this ); |
529 | } |
530 | |
531 | /** @inheritDoc */ |
532 | public function getMWConfigValue( string $key ) { |
533 | $this->loadSiteData(); |
534 | switch ( $key ) { |
535 | // Hardcoded values for these 2 keys |
536 | case 'CiteResponsiveReferences': |
537 | return $this->siteData['citeresponsivereferences'] ?? false; |
538 | |
539 | case 'CiteResponsiveReferencesThreshold': |
540 | return 10; |
541 | |
542 | case 'ParsoidExperimentalParserFunctionOutput': |
543 | return $this->v3pf; |
544 | |
545 | // We can add more hardcoded keys based on testing needs |
546 | // but null is the default for keys unsupported in this mode. |
547 | default: |
548 | return null; |
549 | } |
550 | } |
551 | |
552 | public function rtl(): bool { |
553 | $this->loadSiteData(); |
554 | return $this->siteData['rtl']; |
555 | } |
556 | |
557 | /** @inheritDoc */ |
558 | public function langConverterEnabledBcp47( Bcp47Code $lang ): bool { |
559 | $this->loadSiteData(); |
560 | return $this->langConverterEnabled[strtolower( $lang->toBcp47Code() )] ?? false; |
561 | } |
562 | |
563 | public function script(): string { |
564 | $this->loadSiteData(); |
565 | return $this->siteData['script']; |
566 | } |
567 | |
568 | public function scriptpath(): string { |
569 | $this->loadSiteData(); |
570 | return $this->siteData['scriptpath']; |
571 | } |
572 | |
573 | public function server(): string { |
574 | $this->loadSiteData(); |
575 | return $this->siteData['server']; |
576 | } |
577 | |
578 | /** |
579 | * @inheritDoc |
580 | */ |
581 | public function exportMetadataToHeadBcp47( |
582 | Document $document, |
583 | ContentMetadataCollector $metadata, |
584 | string $defaultTitle, |
585 | Bcp47Code $lang |
586 | ): void { |
587 | '@phan-var StubMetadataCollector $metadata'; // @var StubMetadataCollector $metadata |
588 | $moduleLoadURI = $this->server() . $this->scriptpath() . '/load.php'; |
589 | // Parsoid/JS always made this protocol-relative, so match |
590 | // that (for now at least) |
591 | $moduleLoadURI = preg_replace( '#^https?://#', '//', $moduleLoadURI ); |
592 | // Look for a displaytitle. |
593 | $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?? |
594 | // Use the default title, properly escaped |
595 | Utils::escapeHtml( $defaultTitle ); |
596 | $this->exportMetadataHelper( |
597 | $document, |
598 | $moduleLoadURI, |
599 | $metadata->getModules(), |
600 | $metadata->getModuleStyles(), |
601 | $metadata->getJsConfigVars(), |
602 | $displayTitle, |
603 | $lang |
604 | ); |
605 | } |
606 | |
607 | public function redirectRegexp(): string { |
608 | $this->loadSiteData(); |
609 | return $this->savedRedirectRegexp; |
610 | } |
611 | |
612 | public function categoryRegexp(): string { |
613 | $this->loadSiteData(); |
614 | return $this->savedCategoryRegexp; |
615 | } |
616 | |
617 | public function bswRegexp(): string { |
618 | $this->loadSiteData(); |
619 | return $this->savedBswRegexp; |
620 | } |
621 | |
622 | public function timezoneOffset(): int { |
623 | $this->loadSiteData(); |
624 | return $this->siteData['timeoffset']; |
625 | } |
626 | |
627 | /** @inheritDoc */ |
628 | public function variantsFor( Bcp47Code $lang ): ?array { |
629 | $this->loadSiteData(); |
630 | return $this->variants[strtolower( $lang->toBcp47Code() )] ?? null; |
631 | } |
632 | |
633 | public function widthOption(): int { |
634 | $this->loadSiteData(); |
635 | return $this->widthOption; |
636 | } |
637 | |
638 | /** @inheritDoc */ |
639 | protected function getVariableIDs(): array { |
640 | $this->loadSiteData(); |
641 | return $this->apiVariables; |
642 | } |
643 | |
644 | /** @inheritDoc */ |
645 | protected function haveComputedFunctionSynonyms(): bool { |
646 | return false; |
647 | } |
648 | |
649 | private static ?array $noHashFunctions = null; |
650 | |
651 | /** @inheritDoc */ |
652 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
653 | if ( !$this->apiFunctionHooks ) { |
654 | $this->loadSiteData(); |
655 | } |
656 | if ( isset( $this->apiFunctionHooks[$magicword] ) ) { |
657 | if ( !self::$noHashFunctions ) { |
658 | // FIXME: This is an approximation only computed in non-integrated mode for |
659 | // commandline and developer testing. This set is probably not up to date |
660 | // and also doesn't reflect no-hash functions registered by extensions |
661 | // via setFunctionHook calls. As such, you might run into GOTCHAs during |
662 | // debugging of production issues in standalone / API config mode. |
663 | // Keep this in sync with CoreParserFunctions::register in core. |
664 | self::$noHashFunctions = PHPUtils::makeSet( [ |
665 | 'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc', |
666 | 'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl', |
667 | 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'formal', |
668 | 'bidi', 'numberingroup', 'language', |
669 | 'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath', |
670 | 'pagesincategory', 'pagesize', 'protectionlevel', 'protectionexpiry', |
671 | # The following are the "parser function" forms of magic |
672 | # variables defined in CoreMagicVariables. The no-args form will |
673 | # go through the magic variable code path (and be cached); the |
674 | # presence of arguments will cause the parser function form to |
675 | # be invoked. (Note that the actual implementation will pass |
676 | # a Parser object as first argument, in addition to the |
677 | # parser function parameters.) |
678 | |
679 | # For this group, the first parameter to the parser function is |
680 | # "page title", and the no-args form (and the magic variable) |
681 | # defaults to "current page title". |
682 | 'pagename', 'pagenamee', |
683 | 'fullpagename', 'fullpagenamee', |
684 | 'subpagename', 'subpagenamee', |
685 | 'rootpagename', 'rootpagenamee', |
686 | 'basepagename', 'basepagenamee', |
687 | 'talkpagename', 'talkpagenamee', |
688 | 'subjectpagename', 'subjectpagenamee', |
689 | 'pageid', 'revisionid', 'revisionday', |
690 | 'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear', |
691 | 'revisiontimestamp', |
692 | 'revisionuser', |
693 | 'cascadingsources', |
694 | 'namespace', 'namespacee', 'namespacenumber', 'talkspace', 'talkspacee', |
695 | 'subjectspace', 'subjectspacee', |
696 | |
697 | # More parser functions corresponding to CoreMagicVariables. |
698 | # For this group, the first parameter to the parser function is |
699 | # "raw" (uses the 'raw' format if present) and the no-args form |
700 | # (and the magic variable) defaults to 'not raw'. |
701 | 'numberofarticles', 'numberoffiles', |
702 | 'numberofusers', |
703 | 'numberofactiveusers', |
704 | 'numberofpages', |
705 | 'numberofadmins', |
706 | 'numberofedits', |
707 | |
708 | # These magic words already contain the hash, and the no-args form |
709 | # is the same as passing an empty first argument |
710 | 'bcp47', |
711 | 'dir', |
712 | 'interwikilink', |
713 | 'interlanguagelink', |
714 | |
715 | # ############################################### |
716 | # The following are not from core's $noHash list |
717 | # but are instead special callbacks from core: |
718 | 'int', 'displaytitle', 'pagesinnamespace', |
719 | ] ); |
720 | } |
721 | |
722 | $syn = $func; |
723 | if ( substr( $syn, -1 ) === ':' ) { |
724 | $syn = substr( $syn, 0, -1 ); |
725 | } |
726 | if ( !isset( self::$noHashFunctions[$magicword] ) ) { |
727 | $syn = '#' . $syn; |
728 | } |
729 | $this->functionSynonyms[intval( $caseSensitive )][$syn] = $magicword; |
730 | } |
731 | } |
732 | |
733 | /** @inheritDoc */ |
734 | protected function getMagicWords(): array { |
735 | $this->loadSiteData(); |
736 | return $this->apiMagicWords; |
737 | } |
738 | |
739 | /** @inheritDoc */ |
740 | public function getMagicWordMatcher( string $id ): string { |
741 | $this->loadSiteData(); |
742 | return $this->allMWs[$id] ?? '/^(?!)$/'; |
743 | } |
744 | |
745 | /** @inheritDoc */ |
746 | public function getParameterizedAliasMatcher( array $words ): callable { |
747 | $this->loadSiteData(); |
748 | $regexes = array_intersect_key( $this->paramMWs, array_flip( $words ) ); |
749 | return static function ( $text ) use ( $regexes ) { |
750 | /** |
751 | * $name is the canonical magic word name |
752 | * $re has patterns for matching aliases |
753 | */ |
754 | foreach ( $regexes as $name => $re ) { |
755 | if ( preg_match( $re, $text, $m ) ) { |
756 | unset( $m[0] ); |
757 | |
758 | // Ex. regexp here is, /^(?:(?:|vinculo\=(.*?)|enlace\=(.*?)|link\=(.*?)))$/uS |
759 | // Check all the capture groups for a value, if not, it's safe to return an |
760 | // empty string since we did get a match. |
761 | foreach ( $m as $v ) { |
762 | if ( $v !== '' ) { |
763 | return [ 'k' => $name, 'v' => $v ]; |
764 | } |
765 | } |
766 | return [ 'k' => $name, 'v' => '' ]; |
767 | } |
768 | } |
769 | return null; |
770 | }; |
771 | } |
772 | |
773 | /** |
774 | * This function is public so it can be used to synchronize env for |
775 | * hybrid parserTests. The parserTests setup includes the definition |
776 | * of a number of non-standard extension tags, whose names are passed |
777 | * over from the JS side in hybrid testing. |
778 | * @param string $tag Name of an extension tag assumed to be present |
779 | */ |
780 | public function ensureExtensionTag( string $tag ): void { |
781 | $this->loadSiteData(); |
782 | $this->extensionTags[mb_strtolower( $tag )] = true; |
783 | } |
784 | |
785 | /** @inheritDoc */ |
786 | protected function getNonNativeExtensionTags(): array { |
787 | $this->loadSiteData(); |
788 | return $this->extensionTags; |
789 | } |
790 | |
791 | /** @inheritDoc */ |
792 | public function getMaxTemplateDepth(): int { |
793 | // Not in the API result |
794 | return $this->maxDepth; |
795 | } |
796 | |
797 | /** @inheritDoc */ |
798 | protected function getSpecialNSAliases(): array { |
799 | $nsAliases = [ |
800 | 'Special', |
801 | ]; |
802 | foreach ( $this->nsIds as $name => $id ) { |
803 | if ( $id === -1 ) { |
804 | $nsAliases[] = $this->quoteTitleRe( $name, '!' ); |
805 | } |
806 | } |
807 | return $nsAliases; |
808 | } |
809 | |
810 | /** @inheritDoc */ |
811 | protected function getSpecialPageAliases( string $specialPage ): array { |
812 | $spAliases = [ $specialPage ]; |
813 | foreach ( $this->specialPageAliases as $special ) { |
814 | if ( $special['realname'] === $specialPage ) { |
815 | $spAliases = array_merge( $spAliases, $special['aliases'] ); |
816 | break; |
817 | } |
818 | } |
819 | return $spAliases; |
820 | } |
821 | |
822 | /** @inheritDoc */ |
823 | protected function getProtocols(): array { |
824 | $this->loadSiteData(); |
825 | return $this->protocols; |
826 | } |
827 | |
828 | /** @var ?MockMetrics */ |
829 | private $metrics; |
830 | |
831 | /** @inheritDoc */ |
832 | public function metrics(): ?StatsdDataFactoryInterface { |
833 | if ( $this->metrics === null ) { |
834 | $this->metrics = new MockMetrics(); |
835 | } |
836 | return $this->metrics; |
837 | } |
838 | |
839 | /** |
840 | * Increment a counter metric |
841 | * @param string $name |
842 | * @param array $labels |
843 | * @param float $amount |
844 | * @return void |
845 | */ |
846 | public function incrementCounter( string $name, array $labels, float $amount = 1 ): void { |
847 | // We don't use the labels for now, using MockMetrics instead |
848 | $this->metrics->increment( $name ); |
849 | } |
850 | |
851 | /** |
852 | * Record a timing metric |
853 | * @param string $name |
854 | * @param float $value |
855 | * @param array $labels |
856 | * @return void |
857 | */ |
858 | public function observeTiming( string $name, float $value, array $labels ): void { |
859 | // We don't use the labels for now, using MockMetrics instead |
860 | $this->metrics->timing( $name, $value ); |
861 | } |
862 | |
863 | /** |
864 | * Record a histogram metric |
865 | * @param string $name |
866 | * @param float $value A time value in milliseconds |
867 | * @param array $buckets The buckets used in this histogram |
868 | * @param array $labels The metric labels |
869 | * @return void |
870 | */ |
871 | public function observeHistogram( string $name, float $value, array $buckets, array $labels ) { |
872 | $this->metrics->histogram( $name, $value, $buckets, $labels ); |
873 | } |
874 | |
875 | /** |
876 | * Generate mock histogram buckets |
877 | * |
878 | * @param float $mean |
879 | * @param int $skip |
880 | * @return list<float> |
881 | */ |
882 | public function getHistogramBuckets( float $mean, int $skip ) { |
883 | return [ 0., $mean, 2 * $mean ]; |
884 | } |
885 | |
886 | /** @inheritDoc */ |
887 | public function getNoFollowConfig(): array { |
888 | $this->loadSiteData(); |
889 | return [ |
890 | 'nofollow' => $this->siteData['nofollowlinks'] ?? true, |
891 | 'nsexceptions' => $this->siteData['nofollownsexceptions'] ?? [], |
892 | 'domainexceptions' => $this->siteData['nofollowdomainexceptions'] ?? [ 'mediawiki.org' ] |
893 | ]; |
894 | } |
895 | |
896 | /** @inheritDoc */ |
897 | public function getExternalLinkTarget() { |
898 | $this->loadSiteData(); |
899 | return $this->siteData['externallinktarget'] ?? false; |
900 | } |
901 | } |