Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.56% |
231 / 270 |
|
78.46% |
51 / 65 |
CRAP | |
0.00% |
0 / 1 |
SiteConfig | |
85.56% |
231 / 270 |
|
78.46% |
51 / 65 |
165.12 | |
0.00% |
0 / 1 |
__construct | |
96.55% |
28 / 29 |
|
0.00% |
0 / 1 |
5 | |||
getObjectFactory | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLogger | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getStatsPrefix | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
metrics | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
prefixedStatsFactory | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
observeTiming | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
observeHistogram | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getHistogramBuckets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
incrementCounter | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
galleryOptions | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
allowedExternalImagePrefixes | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
determineArticlePath | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
4 | |||
baseURI | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
relativeLinkPrefix | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
mwaToRegex | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
redirectRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
categoryRegexp | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
bswRegexp | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
canonicalNamespaceId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
namespaceId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
namespaceName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
3 | |||
namespaceHasSubpages | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
namespaceCase | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
namespaceIsTalk | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
ucfirst | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
specialPageLocalName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
interwikiMagic | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
magicLinkEnabled | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
interwikiMap | |
93.94% |
31 / 33 |
|
0.00% |
0 / 1 |
11.03 | |||
iwp | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
legalTitleChars | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
linkPrefixRegex | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
linkTrail | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
langBcp47 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
mainpage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
mainPageLinkTarget | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMWConfigValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
rtl | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
langConverterEnabledBcp47 | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
script | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
scriptpath | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
server | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
exportMetadataToHeadBcp47 | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
timezoneOffset | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
variants | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
8.04 | |||
variantsFor | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
widthOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getVariableIDs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWords | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordMatcher | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getParameterizedAliasMatcher | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
populateExtensionTags | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNonNativeExtensionTags | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
shouldValidateExtConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMaxTemplateDepth | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setMaxTemplateDepth | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getSpecialNSAliases | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
getSpecialPageAliases | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getProtocols | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNoFollowConfig | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getExternalLinkTarget | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getAsyncFallbackMessageKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supportsContentModel | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2011-2022 Wikimedia Foundation and others. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | */ |
19 | |
20 | // NO_PRELOAD -- anonymous class in parent |
21 | |
22 | namespace MediaWiki\Parser\Parsoid\Config; |
23 | |
24 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
25 | use MediaWiki\Config\Config; |
26 | use MediaWiki\Config\MutableConfig; |
27 | use MediaWiki\Config\ServiceOptions; |
28 | use MediaWiki\Content\IContentHandlerFactory; |
29 | use MediaWiki\Exception\MWUnknownContentModelException; |
30 | use MediaWiki\Interwiki\InterwikiLookup; |
31 | use MediaWiki\Language\Language; |
32 | use MediaWiki\Language\LanguageCode; |
33 | use MediaWiki\Language\LanguageConverter; |
34 | use MediaWiki\Languages\LanguageConverterFactory; |
35 | use MediaWiki\Languages\LanguageFactory; |
36 | use MediaWiki\Languages\LanguageNameUtils; |
37 | use MediaWiki\Logger\LoggerFactory; |
38 | use MediaWiki\MainConfigNames; |
39 | use MediaWiki\Parser\MagicWordArray; |
40 | use MediaWiki\Parser\MagicWordFactory; |
41 | use MediaWiki\Parser\ParserFactory; |
42 | use MediaWiki\Parser\ParserOutput; |
43 | use MediaWiki\SpecialPage\SpecialPageFactory; |
44 | use MediaWiki\Title\NamespaceInfo; |
45 | use MediaWiki\Title\Title; |
46 | use MediaWiki\User\Options\UserOptionsLookup; |
47 | use MediaWiki\Utils\UrlUtils; |
48 | use MediaWiki\WikiMap\WikiMap; |
49 | use Psr\Log\LoggerInterface; |
50 | use UnexpectedValueException; |
51 | use Wikimedia\Bcp47Code\Bcp47Code; |
52 | use Wikimedia\ObjectFactory\ObjectFactory; |
53 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
54 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
55 | use Wikimedia\Parsoid\DOM\Document; |
56 | use Wikimedia\Parsoid\Utils\Utils; |
57 | use Wikimedia\Stats\PrefixingStatsdDataFactoryProxy; |
58 | use Wikimedia\Stats\StatsFactory; |
59 | use Wikimedia\Stats\StatsUtils; |
60 | |
61 | /** |
62 | * Site-level configuration for Parsoid |
63 | * |
64 | * This includes both global configuration and wiki-level configuration. |
65 | * |
66 | * @since 1.39 |
67 | * @internal |
68 | */ |
69 | class SiteConfig extends ISiteConfig { |
70 | |
71 | /** |
72 | * Regular expression fragment for matching wikitext comments. |
73 | * Meant for inclusion in other regular expressions. |
74 | */ |
75 | protected const COMMENT_REGEXP_FRAGMENT = '<!--(?>[\s\S]*?-->)'; |
76 | |
77 | public const CONSTRUCTOR_OPTIONS = [ |
78 | MainConfigNames::GalleryOptions, |
79 | MainConfigNames::AllowExternalImages, |
80 | MainConfigNames::AllowExternalImagesFrom, |
81 | MainConfigNames::Server, |
82 | MainConfigNames::ArticlePath, |
83 | MainConfigNames::InterwikiMagic, |
84 | MainConfigNames::ExtraInterlanguageLinkPrefixes, |
85 | MainConfigNames::InterlanguageLinkCodeMap, |
86 | MainConfigNames::LocalInterwikis, |
87 | MainConfigNames::LanguageCode, |
88 | MainConfigNames::NamespaceAliases, |
89 | MainConfigNames::UrlProtocols, |
90 | MainConfigNames::Script, |
91 | MainConfigNames::ScriptPath, |
92 | MainConfigNames::LoadScript, |
93 | MainConfigNames::LocalTZoffset, |
94 | MainConfigNames::ThumbLimits, |
95 | MainConfigNames::MaxTemplateDepth, |
96 | MainConfigNames::NoFollowLinks, |
97 | MainConfigNames::NoFollowNsExceptions, |
98 | MainConfigNames::NoFollowDomainExceptions, |
99 | MainConfigNames::ExternalLinkTarget, |
100 | MainConfigNames::EnableMagicLinks, |
101 | MainConfigNames::ParsoidExperimentalParserFunctionOutput, |
102 | ]; |
103 | |
104 | private ServiceOptions $config; |
105 | private Config $mwConfig; |
106 | /** Parsoid-specific options array from $config */ |
107 | private array $parsoidSettings; |
108 | private Language $contLang; |
109 | private StatsdDataFactoryInterface $stats; |
110 | private StatsFactory $statsFactory; |
111 | private MagicWordFactory $magicWordFactory; |
112 | private NamespaceInfo $namespaceInfo; |
113 | private SpecialPageFactory $specialPageFactory; |
114 | private InterwikiLookup $interwikiLookup; |
115 | private ParserFactory $parserFactory; |
116 | private UserOptionsLookup $userOptionsLookup; |
117 | private ObjectFactory $objectFactory; |
118 | private LanguageFactory $languageFactory; |
119 | private LanguageConverterFactory $languageConverterFactory; |
120 | private LanguageNameUtils $languageNameUtils; |
121 | private UrlUtils $urlUtils; |
122 | private IContentHandlerFactory $contentHandlerFactory; |
123 | private ?string $baseUri = null; |
124 | private ?string $relativeLinkPrefix = null; |
125 | private ?array $interwikiMap = null; |
126 | private ?array $variants = null; |
127 | private ?array $extensionTags = null; |
128 | private bool $isTimedMediaHandlerLoaded; |
129 | |
130 | /** |
131 | * @param ServiceOptions $config MediaWiki main configuration object |
132 | * @param array $parsoidSettings Parsoid-specific options array from main configuration. |
133 | * @param ObjectFactory $objectFactory |
134 | * @param Language $contentLanguage Content language. |
135 | * @param StatsdDataFactoryInterface $stats |
136 | * @param StatsFactory $statsFactory |
137 | * @param MagicWordFactory $magicWordFactory |
138 | * @param NamespaceInfo $namespaceInfo |
139 | * @param SpecialPageFactory $specialPageFactory |
140 | * @param InterwikiLookup $interwikiLookup |
141 | * @param UserOptionsLookup $userOptionsLookup |
142 | * @param LanguageFactory $languageFactory |
143 | * @param LanguageConverterFactory $languageConverterFactory |
144 | * @param LanguageNameUtils $languageNameUtils |
145 | * @param UrlUtils $urlUtils |
146 | * @param IContentHandlerFactory $contentHandlerFactory |
147 | * @param array $extensionParsoidModules |
148 | * @param ParserFactory $parserFactory |
149 | * @param Config $mwConfig |
150 | * @param bool $isTimedMediaHandlerLoaded |
151 | */ |
152 | public function __construct( |
153 | ServiceOptions $config, |
154 | array $parsoidSettings, |
155 | ObjectFactory $objectFactory, |
156 | Language $contentLanguage, |
157 | StatsdDataFactoryInterface $stats, |
158 | StatsFactory $statsFactory, |
159 | MagicWordFactory $magicWordFactory, |
160 | NamespaceInfo $namespaceInfo, |
161 | SpecialPageFactory $specialPageFactory, |
162 | InterwikiLookup $interwikiLookup, |
163 | UserOptionsLookup $userOptionsLookup, |
164 | LanguageFactory $languageFactory, |
165 | LanguageConverterFactory $languageConverterFactory, |
166 | LanguageNameUtils $languageNameUtils, |
167 | UrlUtils $urlUtils, |
168 | IContentHandlerFactory $contentHandlerFactory, |
169 | array $extensionParsoidModules, |
170 | // $parserFactory is temporary and may be removed once a better solution is found. |
171 | ParserFactory $parserFactory, // T268776 |
172 | Config $mwConfig, |
173 | bool $isTimedMediaHandlerLoaded |
174 | ) { |
175 | parent::__construct(); |
176 | |
177 | $config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
178 | $this->config = $config; |
179 | $this->mwConfig = $mwConfig; |
180 | $this->parsoidSettings = $parsoidSettings; |
181 | |
182 | $this->objectFactory = $objectFactory; |
183 | $this->contLang = $contentLanguage; |
184 | $this->stats = $stats; |
185 | $this->statsFactory = $statsFactory; |
186 | $this->magicWordFactory = $magicWordFactory; |
187 | $this->namespaceInfo = $namespaceInfo; |
188 | $this->specialPageFactory = $specialPageFactory; |
189 | $this->interwikiLookup = $interwikiLookup; |
190 | $this->parserFactory = $parserFactory; |
191 | $this->userOptionsLookup = $userOptionsLookup; |
192 | $this->languageFactory = $languageFactory; |
193 | $this->languageConverterFactory = $languageConverterFactory; |
194 | $this->languageNameUtils = $languageNameUtils; |
195 | $this->urlUtils = $urlUtils; |
196 | $this->contentHandlerFactory = $contentHandlerFactory; |
197 | |
198 | // Override parent default |
199 | if ( isset( $this->parsoidSettings['linting'] ) ) { |
200 | // @todo: Add this setting to MW's MainConfigSchema |
201 | $this->linterEnabled = $this->parsoidSettings['linting']; |
202 | } |
203 | |
204 | if ( isset( $this->parsoidSettings['wt2htmlLimits'] ) ) { |
205 | $this->wt2htmlLimits = $this->parsoidSettings['wt2htmlLimits'] + $this->wt2htmlLimits; |
206 | } |
207 | if ( isset( $this->parsoidSettings['html2wtLimits'] ) ) { |
208 | $this->html2wtLimits = $this->parsoidSettings['html2wtLimits'] + $this->html2wtLimits; |
209 | } |
210 | |
211 | // Register extension modules |
212 | foreach ( $extensionParsoidModules as $configOrSpec ) { |
213 | $this->registerExtensionModule( $configOrSpec ); |
214 | } |
215 | |
216 | $this->isTimedMediaHandlerLoaded = $isTimedMediaHandlerLoaded; |
217 | } |
218 | |
219 | /** @inheritDoc */ |
220 | public function getObjectFactory(): ObjectFactory { |
221 | return $this->objectFactory; |
222 | } |
223 | |
224 | /** @inheritDoc */ |
225 | public function getLogger(): LoggerInterface { |
226 | // TODO: inject |
227 | if ( $this->logger === null ) { |
228 | $this->logger = LoggerFactory::getInstance( 'Parsoid' ); |
229 | } |
230 | return $this->logger; |
231 | } |
232 | |
233 | /** |
234 | * Get stats prefix |
235 | * @param bool $trimmed Trim trailing dot on prefix name |
236 | * @return string |
237 | */ |
238 | private function getStatsPrefix( bool $trimmed = false ): string { |
239 | $component = $this->parsoidSettings['metricsPrefix'] ?? 'Parsoid.'; |
240 | if ( $trimmed ) { |
241 | $component = rtrim( $component, '.' ); |
242 | } |
243 | return $component; |
244 | } |
245 | |
246 | public function metrics(): ?StatsdDataFactoryInterface { |
247 | // TODO: inject |
248 | static $prefixedMetrics = null; |
249 | $prefixedMetrics ??= new PrefixingStatsdDataFactoryProxy( |
250 | // Our stats will also get prefixed with 'MediaWiki.' |
251 | $this->stats, |
252 | $this->getStatsPrefix() |
253 | ); |
254 | return $prefixedMetrics; |
255 | } |
256 | |
257 | /** |
258 | * Create a prefixed StatsFactory for parsoid stats |
259 | */ |
260 | public function prefixedStatsFactory(): StatsFactory { |
261 | $component = $this->getStatsPrefix( true ); |
262 | return $this->statsFactory->withComponent( $component ); |
263 | } |
264 | |
265 | /** |
266 | * Record a timing metric |
267 | * @param string $name |
268 | * @param float $value A time value in milliseconds |
269 | * @param array $labels |
270 | * @return void |
271 | */ |
272 | public function observeTiming( string $name, float $value, array $labels ) { |
273 | $this->prefixedStatsFactory()->getTiming( $name ) |
274 | ->setLabels( $labels ) |
275 | ->observe( $value ); |
276 | } |
277 | |
278 | /** |
279 | * Record a histogram metric |
280 | * @param string $name |
281 | * @param float $value A time value in milliseconds |
282 | * @param array $buckets The buckets used in this histogram |
283 | * @param array $labels The metric labels |
284 | * @return void |
285 | */ |
286 | public function observeHistogram( string $name, float $value, array $buckets, array $labels ) { |
287 | $metric = $this->prefixedStatsFactory()->getHistogram( $name, $buckets ); |
288 | foreach ( $labels as $labelKey => $labelValue ) { |
289 | $metric->setLabel( $labelKey, $labelValue ); |
290 | } |
291 | $metric->observe( $value ); |
292 | } |
293 | |
294 | /** |
295 | * Generate buckets based on skip and mean |
296 | * @param float $mean |
297 | * @param int $skip |
298 | * @return float[] |
299 | */ |
300 | public function getHistogramBuckets( float $mean, int $skip ) { |
301 | return StatsUtils::makeBucketsFromMean( $mean, $skip ); |
302 | } |
303 | |
304 | /** |
305 | * Increment a counter metric |
306 | * @param string $name |
307 | * @param array $labels |
308 | * @param float $amount |
309 | * @return void |
310 | */ |
311 | public function incrementCounter( string $name, array $labels, float $amount = 1 ) { |
312 | $this->prefixedStatsFactory()->getCounter( $name ) |
313 | ->setLabels( $labels ) |
314 | ->incrementBy( $amount ); |
315 | } |
316 | |
317 | public function galleryOptions(): array { |
318 | return $this->config->get( MainConfigNames::GalleryOptions ); |
319 | } |
320 | |
321 | public function allowedExternalImagePrefixes(): array { |
322 | if ( $this->config->get( MainConfigNames::AllowExternalImages ) ) { |
323 | return [ '' ]; |
324 | } else { |
325 | $allowFrom = $this->config->get( MainConfigNames::AllowExternalImagesFrom ); |
326 | return $allowFrom ? (array)$allowFrom : []; |
327 | } |
328 | } |
329 | |
330 | /** |
331 | * Determine the article base URI and relative prefix |
332 | * |
333 | * Populates `$this->baseUri` and `$this->relativeLinkPrefix` based on |
334 | * `$wgServer` and `$wgArticlePath`, by splitting it at the last '/' in the |
335 | * path portion. |
336 | */ |
337 | private function determineArticlePath(): void { |
338 | $url = $this->config->get( MainConfigNames::Server ) . |
339 | $this->config->get( MainConfigNames::ArticlePath ); |
340 | |
341 | if ( substr( $url, -2 ) !== '$1' ) { |
342 | throw new UnexpectedValueException( "Article path '$url' does not have '$1' at the end" ); |
343 | } |
344 | $url = substr( $url, 0, -2 ); |
345 | |
346 | $bits = $this->urlUtils->parse( $url ); |
347 | if ( !$bits ) { |
348 | throw new UnexpectedValueException( "Failed to parse article path '$url'" ); |
349 | } |
350 | |
351 | if ( empty( $bits['path'] ) ) { |
352 | $path = '/'; |
353 | } else { |
354 | $path = UrlUtils::removeDotSegments( $bits['path'] ); |
355 | } |
356 | |
357 | $relParts = [ 'query' => true, 'fragment' => true ]; |
358 | $base = array_diff_key( $bits, $relParts ); |
359 | $rel = array_intersect_key( $bits, $relParts ); |
360 | |
361 | $i = strrpos( $path, '/' ); |
362 | $base['path'] = substr( $path, 0, $i + 1 ); |
363 | $rel['path'] = '.' . substr( $path, $i ); |
364 | |
365 | $this->baseUri = UrlUtils::assemble( $base ); |
366 | $this->relativeLinkPrefix = UrlUtils::assemble( $rel ); |
367 | } |
368 | |
369 | public function baseURI(): string { |
370 | if ( $this->baseUri === null ) { |
371 | $this->determineArticlePath(); |
372 | } |
373 | return $this->baseUri; |
374 | } |
375 | |
376 | public function relativeLinkPrefix(): string { |
377 | if ( $this->relativeLinkPrefix === null ) { |
378 | $this->determineArticlePath(); |
379 | } |
380 | return $this->relativeLinkPrefix; |
381 | } |
382 | |
383 | /** |
384 | * This is very similar to MagicWordArray::getBaseRegex() except we |
385 | * don't emit the named grouping constructs, which can cause havoc |
386 | * when embedded in other regexps with grouping constructs. |
387 | * |
388 | * @param MagicWordArray $magicWordArray |
389 | * @param string $delimiter |
390 | * @return string |
391 | */ |
392 | private static function mwaToRegex( |
393 | MagicWordArray $magicWordArray, |
394 | string $delimiter = '/' |
395 | ): string { |
396 | return implode( '|', $magicWordArray->getBaseRegex( false, $delimiter ) ); |
397 | } |
398 | |
399 | public function redirectRegexp(): string { |
400 | $redirect = self::mwaToRegex( $this->magicWordFactory->newArray( [ 'redirect' ] ), '@' ); |
401 | return "@$redirect@Su"; |
402 | } |
403 | |
404 | public function categoryRegexp(): string { |
405 | $canon = $this->namespaceInfo->getCanonicalName( NS_CATEGORY ); |
406 | $result = [ $canon ]; |
407 | foreach ( $this->contLang->getNamespaceAliases() as $alias => $ns ) { |
408 | if ( $ns === NS_CATEGORY && $alias !== $canon ) { |
409 | $result[] = $alias; |
410 | } |
411 | } |
412 | $category = implode( '|', array_map( function ( $v ) { |
413 | return $this->quoteTitleRe( $v, '@' ); |
414 | }, $result ) ); |
415 | return "@(?i:$category)@"; |
416 | } |
417 | |
418 | public function bswRegexp(): string { |
419 | $bsw = self::mwaToRegex( $this->magicWordFactory->getDoubleUnderscoreArray(), '@' ); |
420 | // Aliases for double underscore mws include the underscores |
421 | // So, strip them since the base regexp will have included them |
422 | // and they aren't expected at the use sites of bswRegexp |
423 | $bsw = str_replace( '__', '', $bsw ); |
424 | return "@$bsw@Su"; |
425 | } |
426 | |
427 | /** @inheritDoc */ |
428 | public function canonicalNamespaceId( string $name ): ?int { |
429 | $ret = $this->namespaceInfo->getCanonicalIndex( $name ); |
430 | return $ret === false ? null : $ret; |
431 | } |
432 | |
433 | /** @inheritDoc */ |
434 | public function namespaceId( string $name ): ?int { |
435 | $ret = $this->contLang->getNsIndex( $name ); |
436 | return $ret === false ? null : $ret; |
437 | } |
438 | |
439 | /** @inheritDoc */ |
440 | public function namespaceName( int $ns ): ?string { |
441 | $ret = $this->contLang->getFormattedNsText( $ns ); |
442 | return $ret === '' && $ns !== NS_MAIN ? null : $ret; |
443 | } |
444 | |
445 | /** @inheritDoc */ |
446 | public function namespaceHasSubpages( int $ns ): bool { |
447 | return $this->namespaceInfo->hasSubpages( $ns ); |
448 | } |
449 | |
450 | /** @inheritDoc */ |
451 | public function namespaceCase( int $ns ): string { |
452 | return $this->namespaceInfo->isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive'; |
453 | } |
454 | |
455 | /** @inheritDoc */ |
456 | public function namespaceIsTalk( int $ns ): bool { |
457 | return $this->namespaceInfo->isTalk( $ns ); |
458 | } |
459 | |
460 | /** @inheritDoc */ |
461 | public function ucfirst( string $str ): string { |
462 | return $this->contLang->ucfirst( $str ); |
463 | } |
464 | |
465 | /** @inheritDoc */ |
466 | public function specialPageLocalName( string $alias ): ?string { |
467 | $aliases = $this->specialPageFactory->resolveAlias( $alias ); |
468 | return $aliases[0] !== null ? $this->specialPageFactory->getLocalNameFor( ...$aliases ) : $alias; |
469 | } |
470 | |
471 | public function interwikiMagic(): bool { |
472 | return $this->config->get( MainConfigNames::InterwikiMagic ); |
473 | } |
474 | |
475 | /** @inheritDoc */ |
476 | public function magicLinkEnabled( string $which ): bool { |
477 | $m = $this->config->get( MainConfigNames::EnableMagicLinks ); |
478 | return $m[$which] ?? true; |
479 | } |
480 | |
481 | public function interwikiMap(): array { |
482 | // Unfortunate that this mostly duplicates \ApiQuerySiteinfo::appendInterwikiMap() |
483 | if ( $this->interwikiMap !== null ) { |
484 | return $this->interwikiMap; |
485 | } |
486 | $this->interwikiMap = []; |
487 | |
488 | $getPrefixes = $this->interwikiLookup->getAllPrefixes(); |
489 | $langNames = $this->languageNameUtils->getLanguageNames(); |
490 | $extraLangPrefixes = $this->config->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ); |
491 | $extraLangCodeMap = $this->config->get( MainConfigNames::InterlanguageLinkCodeMap ); |
492 | $localInterwikis = $this->config->get( MainConfigNames::LocalInterwikis ); |
493 | |
494 | foreach ( $getPrefixes as $row ) { |
495 | $prefix = $row['iw_prefix']; |
496 | $val = []; |
497 | $val['prefix'] = $prefix; |
498 | // ApiQuerySiteInfo::appendInterwikiMap uses PROTO_CURRENT here, |
499 | // but that's the 'current' protocol *of the API request*; use |
500 | // PROTO_CANONICAL instead. |
501 | $val['url'] = $this->urlUtils->expand( $row['iw_url'], PROTO_CANONICAL ) ?? false; |
502 | |
503 | // Fix up broken interwiki hrefs that are missing a $1 placeholder |
504 | // Just append the placeholder at the end. |
505 | // This makes sure that the interwikiMatcher adds one match |
506 | // group per URI, and that interwiki links work as expected. |
507 | if ( !str_contains( $val['url'], '$1' ) ) { |
508 | $val['url'] .= '$1'; |
509 | } |
510 | |
511 | if ( str_starts_with( $row['iw_url'], '//' ) ) { |
512 | $val['protorel'] = true; |
513 | } |
514 | if ( isset( $row['iw_local'] ) && $row['iw_local'] == '1' ) { |
515 | $val['local'] = true; |
516 | } |
517 | if ( isset( $langNames[$prefix] ) ) { |
518 | $val['language'] = true; |
519 | $standard = LanguageCode::replaceDeprecatedCodes( $prefix ); |
520 | if ( $standard !== $prefix ) { |
521 | # Note that even if this code is deprecated, it should |
522 | # only be remapped if extralanglink (set below) is false. |
523 | $val['deprecated'] = $standard; |
524 | } |
525 | $val['bcp47'] = LanguageCode::bcp47( $standard ); |
526 | } |
527 | if ( in_array( $prefix, $localInterwikis, true ) ) { |
528 | $val['localinterwiki'] = true; |
529 | } |
530 | if ( in_array( $prefix, $extraLangPrefixes, true ) ) { |
531 | $val['extralanglink'] = true; |
532 | $val['code'] = $extraLangCodeMap[$prefix] ?? $prefix; |
533 | $val['bcp47'] = LanguageCode::bcp47( $val['code'] ); |
534 | } |
535 | |
536 | $this->interwikiMap[$prefix] = $val; |
537 | } |
538 | return $this->interwikiMap; |
539 | } |
540 | |
541 | public function iwp(): string { |
542 | return WikiMap::getCurrentWikiId(); |
543 | } |
544 | |
545 | public function legalTitleChars(): string { |
546 | return Title::legalChars(); |
547 | } |
548 | |
549 | public function linkPrefixRegex(): ?string { |
550 | if ( !$this->contLang->linkPrefixExtension() ) { |
551 | return null; |
552 | } |
553 | return '/[' . $this->contLang->linkPrefixCharset() . ']+$/Du'; |
554 | } |
555 | |
556 | /** @inheritDoc */ |
557 | protected function linkTrail(): string { |
558 | return $this->contLang->linkTrail(); |
559 | } |
560 | |
561 | public function langBcp47(): Bcp47Code { |
562 | return $this->contLang; |
563 | } |
564 | |
565 | public function mainpage(): string { |
566 | // @todo Perhaps should inject TitleFactory here? |
567 | return Title::newMainPage()->getPrefixedText(); |
568 | } |
569 | |
570 | public function mainPageLinkTarget(): Title { |
571 | // @todo Perhaps should inject TitleFactory here? |
572 | return Title::newMainPage(); |
573 | } |
574 | |
575 | /** |
576 | * Lookup config |
577 | * @param string $key |
578 | * @return mixed config value for $key, if present or null, if not. |
579 | */ |
580 | public function getMWConfigValue( string $key ) { |
581 | return $this->mwConfig->has( $key ) ? $this->mwConfig->get( $key ) : null; |
582 | } |
583 | |
584 | public function rtl(): bool { |
585 | return $this->contLang->isRTL(); |
586 | } |
587 | |
588 | public function langConverterEnabledBcp47( Bcp47Code $lang ): bool { |
589 | if ( $this->languageConverterFactory->isConversionDisabled() ) { |
590 | return false; |
591 | } |
592 | |
593 | $langObject = $this->languageFactory->getLanguage( $lang ); |
594 | if ( !in_array( $langObject->getCode(), LanguageConverter::$languagesWithVariants, true ) ) { |
595 | return false; |
596 | } |
597 | $converter = $this->languageConverterFactory->getLanguageConverter( $langObject ); |
598 | return $converter->hasVariants(); |
599 | } |
600 | |
601 | public function script(): string { |
602 | return $this->config->get( MainConfigNames::Script ); |
603 | } |
604 | |
605 | public function scriptpath(): string { |
606 | return $this->config->get( MainConfigNames::ScriptPath ); |
607 | } |
608 | |
609 | public function server(): string { |
610 | return $this->config->get( MainConfigNames::Server ); |
611 | } |
612 | |
613 | /** |
614 | * @inheritDoc |
615 | * @param Document $document |
616 | * @param ContentMetadataCollector $metadata |
617 | * @param string $defaultTitle |
618 | * @param Bcp47Code $lang |
619 | */ |
620 | public function exportMetadataToHeadBcp47( |
621 | Document $document, |
622 | ContentMetadataCollector $metadata, |
623 | string $defaultTitle, |
624 | Bcp47Code $lang |
625 | ): void { |
626 | '@phan-var ParserOutput $metadata'; // @var ParserOutput $metadata |
627 | // Look for a displaytitle. |
628 | $displayTitle = $metadata->getPageProperty( 'displaytitle' ) ?: |
629 | // Use the default title, properly escaped |
630 | Utils::escapeHtml( $defaultTitle ); |
631 | $this->exportMetadataHelper( |
632 | $document, |
633 | $this->config->get( MainConfigNames::LoadScript ), |
634 | $metadata->getModules(), |
635 | $metadata->getModuleStyles(), |
636 | $metadata->getJsConfigVars(), |
637 | $displayTitle, |
638 | $lang |
639 | ); |
640 | } |
641 | |
642 | public function timezoneOffset(): int { |
643 | return $this->config->get( MainConfigNames::LocalTZoffset ); |
644 | } |
645 | |
646 | /** |
647 | * Language variant information |
648 | * @return array<string,array> Keys are MediaWiki-internal variant codes (e.g. "zh-cn"), |
649 | * values are arrays with two fields: |
650 | * - base: (string) Base language code (e.g. "zh") (MediaWiki-internal) |
651 | * - fallbacks: (string[]) Fallback variants (MediaWiki-internal codes) |
652 | * @deprecated since 1.43; use ::variantsFor() (T320662) |
653 | */ |
654 | public function variants(): array { |
655 | // Deprecated for all external callers; to make private and remove this warning. |
656 | if ( wfGetCaller() !== __CLASS__ . '->variantsFor' ) { |
657 | wfDeprecated( __METHOD__, '1.43' ); |
658 | } |
659 | |
660 | if ( $this->variants !== null ) { |
661 | return $this->variants; |
662 | } |
663 | $this->variants = []; |
664 | |
665 | $langNames = LanguageConverter::$languagesWithVariants; |
666 | if ( $this->languageConverterFactory->isConversionDisabled() ) { |
667 | // Ensure result is empty if language conversion is disabled. |
668 | $langNames = []; |
669 | } |
670 | |
671 | foreach ( $langNames as $langCode ) { |
672 | $lang = $this->languageFactory->getLanguage( $langCode ); |
673 | $converter = $this->languageConverterFactory->getLanguageConverter( $lang ); |
674 | if ( !$converter->hasVariants() ) { |
675 | continue; |
676 | } |
677 | |
678 | $variants = $converter->getVariants(); |
679 | foreach ( $variants as $v ) { |
680 | $fallbacks = $converter->getVariantFallbacks( $v ); |
681 | if ( !is_array( $fallbacks ) ) { |
682 | $fallbacks = [ $fallbacks ]; |
683 | } |
684 | $this->variants[$v] = [ |
685 | 'base' => $langCode, |
686 | 'fallbacks' => $fallbacks, |
687 | ]; |
688 | } |
689 | } |
690 | return $this->variants; |
691 | } |
692 | |
693 | /** |
694 | * Language variant information for the given language (or null if |
695 | * unknown). |
696 | * @param Bcp47Code $code The language for which you want variant information |
697 | * @return ?array{base:Bcp47Code,fallbacks:Bcp47Code[]} an array with |
698 | * two fields: |
699 | * - base: (Bcp47Code) Base BCP-47 language code (e.g. "zh") |
700 | * - fallbacks: (Bcp47Code[]) Fallback variants, as BCP-47 codes |
701 | */ |
702 | public function variantsFor( Bcp47Code $code ): ?array { |
703 | $variants = $this->variants(); |
704 | $lang = $this->languageFactory->getLanguage( $code ); |
705 | $tuple = $variants[$lang->getCode()] ?? null; |
706 | if ( $tuple === null ) { |
707 | return null; |
708 | } |
709 | return [ |
710 | 'base' => $this->languageFactory->getLanguage( $tuple['base'] ), |
711 | 'fallbacks' => array_map( |
712 | [ $this->languageFactory, 'getLanguage' ], |
713 | $tuple['fallbacks'] |
714 | ), |
715 | ]; |
716 | } |
717 | |
718 | public function widthOption(): int { |
719 | // Even though this looks like Parsoid is supporting per-user thumbsize |
720 | // options, that is not the case, Parsoid doesn't receive user session state |
721 | $thumbsize = $this->userOptionsLookup->getDefaultOption( 'thumbsize' ); |
722 | return $this->config->get( MainConfigNames::ThumbLimits )[$thumbsize]; |
723 | } |
724 | |
725 | /** @inheritDoc */ |
726 | protected function getVariableIDs(): array { |
727 | return $this->magicWordFactory->getVariableIDs(); |
728 | } |
729 | |
730 | /** @inheritDoc */ |
731 | protected function getFunctionSynonyms(): array { |
732 | return $this->parserFactory->getMainInstance()->getFunctionSynonyms(); |
733 | } |
734 | |
735 | /** @return array<string,array> $magicWord => [ int $caseSensitive, string ...$alias ] */ |
736 | protected function getMagicWords(): array { |
737 | return $this->contLang->getMagicWords(); |
738 | } |
739 | |
740 | /** @inheritDoc */ |
741 | public function getMagicWordMatcher( string $id ): string { |
742 | return $this->magicWordFactory->get( $id )->getRegexStartToEnd(); |
743 | } |
744 | |
745 | /** @inheritDoc */ |
746 | public function getParameterizedAliasMatcher( array $words ): callable { |
747 | // PORT-FIXME: this should be combined with |
748 | // getMediaPrefixParameterizedAliasMatcher; see PORT-FIXME comment |
749 | // in that method. |
750 | // Filter out timedmedia-* unless that extension is loaded, so Parsoid |
751 | // doesn't have a hard dependency on an extension. |
752 | if ( !$this->isTimedMediaHandlerLoaded ) { |
753 | $words = preg_grep( '/^timedmedia_/', $words, PREG_GREP_INVERT ); |
754 | } |
755 | $words = $this->magicWordFactory->newArray( $words ); |
756 | return static function ( $text ) use ( $words ) { |
757 | $ret = $words->matchVariableStartToEnd( $text ); |
758 | if ( $ret[0] === false || $ret[1] === false ) { |
759 | return null; |
760 | } else { |
761 | return [ 'k' => $ret[0], 'v' => $ret[1] ]; |
762 | } |
763 | }; |
764 | } |
765 | |
766 | private function populateExtensionTags(): void { |
767 | $this->extensionTags = array_fill_keys( $this->parserFactory->getMainInstance()->getTags(), true ); |
768 | } |
769 | |
770 | /** @inheritDoc */ |
771 | protected function getNonNativeExtensionTags(): array { |
772 | if ( $this->extensionTags === null ) { |
773 | $this->populateExtensionTags(); |
774 | } |
775 | return $this->extensionTags; |
776 | } |
777 | |
778 | /** @inheritDoc */ |
779 | protected function shouldValidateExtConfig(): bool { |
780 | // Only perform json schema validation for extension module |
781 | // configurations when running tests. |
782 | return defined( 'MW_PHPUNIT_TEST' ); |
783 | } |
784 | |
785 | /** @inheritDoc */ |
786 | public function getMaxTemplateDepth(): int { |
787 | return (int)$this->config->get( MainConfigNames::MaxTemplateDepth ); |
788 | } |
789 | |
790 | /** |
791 | * Overrides the max template depth in the MediaWiki configuration. |
792 | */ |
793 | public function setMaxTemplateDepth( int $depth ): void { |
794 | // Parsoid's command-line tools let you set the max template depth |
795 | // as a CLI argument. Since we currently invoke the legacy |
796 | // preprocessor in some situations, we can't just override |
797 | // ::getMaxTemplateDepth() above, we need to reset the Config |
798 | // service. |
799 | if ( $this->config instanceof MutableConfig ) { |
800 | $this->config->set( MainConfigNames::MaxTemplateDepth, $depth ); |
801 | } else { |
802 | // Fall back on global variable (hopefully we're using |
803 | // a GlobalVarConfig and this will work) |
804 | $GLOBALS['wgMaxTemplateDepth'] = $depth; |
805 | } |
806 | } |
807 | |
808 | /** @inheritDoc */ |
809 | protected function getSpecialNSAliases(): array { |
810 | $nsAliases = [ |
811 | 'Special', |
812 | $this->quoteTitleRe( $this->contLang->getNsText( NS_SPECIAL ) ) |
813 | ]; |
814 | foreach ( |
815 | $this->contLang->getNamespaceAliases() + |
816 | $this->config->get( MainConfigNames::NamespaceAliases ) |
817 | as $name => $ns |
818 | ) { |
819 | if ( $ns === NS_SPECIAL ) { |
820 | $nsAliases[] = $this->quoteTitleRe( $name ); |
821 | } |
822 | } |
823 | |
824 | return $nsAliases; |
825 | } |
826 | |
827 | /** @inheritDoc */ |
828 | protected function getSpecialPageAliases( string $specialPage ): array { |
829 | return array_merge( [ $specialPage ], |
830 | $this->contLang->getSpecialPageAliases()[$specialPage] ?? [] |
831 | ); |
832 | } |
833 | |
834 | /** @inheritDoc */ |
835 | protected function getProtocols(): array { |
836 | return $this->config->get( MainConfigNames::UrlProtocols ); |
837 | } |
838 | |
839 | public function getNoFollowConfig(): array { |
840 | return [ |
841 | 'nofollow' => $this->config->get( MainConfigNames::NoFollowLinks ), |
842 | 'nsexceptions' => $this->config->get( MainConfigNames::NoFollowNsExceptions ), |
843 | 'domainexceptions' => $this->config->get( MainConfigNames::NoFollowDomainExceptions ) |
844 | ]; |
845 | } |
846 | |
847 | /** @return string|false */ |
848 | public function getExternalLinkTarget() { |
849 | return $this->config->get( MainConfigNames::ExternalLinkTarget ); |
850 | } |
851 | |
852 | /** |
853 | * Return the localization key we should use for asynchronous |
854 | * fallback content. |
855 | */ |
856 | public function getAsyncFallbackMessageKey(): string { |
857 | return 'parsoid-async-not-ready-fallback'; |
858 | } |
859 | |
860 | // MW-specific helper |
861 | |
862 | /** |
863 | * Returns true iff Parsoid natively supports the given content model. |
864 | * @param string $model content model identifier |
865 | * @return bool |
866 | */ |
867 | public function supportsContentModel( string $model ): bool { |
868 | if ( $model === CONTENT_MODEL_WIKITEXT ) { |
869 | return true; |
870 | } |
871 | |
872 | // Check if the content model serializes to wikitext. |
873 | // NOTE: We could use isSupportedFormat( CONTENT_FORMAT_WIKITEXT ) if PageContent::getContent() |
874 | // would specify the format when calling serialize(). |
875 | try { |
876 | $handler = $this->contentHandlerFactory->getContentHandler( $model ); |
877 | if ( $handler->getDefaultFormat() === CONTENT_FORMAT_WIKITEXT ) { |
878 | return true; |
879 | } |
880 | } catch ( MWUnknownContentModelException $ex ) { |
881 | // If the content model is not known, it can't be supported. |
882 | return false; |
883 | } |
884 | |
885 | return $this->getContentModelHandler( $model ) !== null; |
886 | } |
887 | |
888 | } |