Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
40.74% |
165 / 405 |
|
28.33% |
17 / 60 |
CRAP | |
0.00% |
0 / 1 |
SiteConfig | |
40.74% |
165 / 405 |
|
28.33% |
17 / 60 |
4459.13 | |
0.00% |
0 / 1 |
registerExtensionModule | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
unregisterExtensionModule | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtensionModules | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLogger | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
galleryOptions | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
addHTMLTemplateParameters | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
metrics | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
incrementCounter | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
observeTiming | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
scrubBidiChars | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
allowedExternalImagePrefixes | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
baseURI | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
relativeLinkPrefix | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
bswPagePropRegexp | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
canonicalNamespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceHasSubpages | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceCase | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceIsTalk | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
ucfirst | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
specialPageLocalName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
interwikiMagic | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
magicLinkEnabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
interwikiMap | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
interwikiMapNoNamespaces | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
interwikiMatcher | |
100.00% |
53 / 53 |
|
100.00% |
1 / 1 |
15 | |||
iwp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
legalTitleChars | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkPrefixRegex | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkTrail | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkTrailRegex | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
langBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
mainPageLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMWConfigValue | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
rtl | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
langConverterEnabledBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
script | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
scriptpath | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
server | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
exportMetadataToHeadBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
exportMetadataHelper | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
42 | |||
redirectRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
categoryRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
bswRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
solTransparentWikitextRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
solTransparentWikitextNoWsRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 | |||
timezoneOffset | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
variantsFor | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
widthOption | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getVariableIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMagicWords | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFunctionSynonyms | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateFunctionSynonym | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
populateMagicWords | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
9.05 | |||
mwAliases | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForFunctionHook | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForVariable | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordCanonicalName | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
getMagicWordForMediaOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForBehaviorSwitch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
isBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMagicWordWT | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getMagicWordMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getParameterizedAliasMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMediaPrefixParameterizedAliasMatcher | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getMaxTemplateDepth | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getSpecialNSAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getSpecialPageAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
quoteTitleRe | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getExtResourceURLPatternMatcher | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
7 | |||
linterEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getLinterSiteConfig | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
makeExtResourceURL | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
8 | |||
getProtocols | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getProtocolsRegex | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
5 | |||
hasValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
findValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
fakeTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getNonNativeExtensionTags | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getObjectFactory | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
constructExtConfig | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
tagNeedsNowikiStrippedInTagPF | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
processExtensionModule | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
90 | |||
getExtConfig | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getContentModelHandler | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAnnotationStrippers | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
isExtensionTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isAnnotationTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAnnotationTags | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtensionTagNameMap | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtTagConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtTagImpl | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
getExtDOMProcessors | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getWt2HtmlLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtml2WtLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createLogger | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
getNoFollowConfig | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExternalLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Config; |
5 | |
6 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
7 | use Monolog\Formatter\LineFormatter; |
8 | use Monolog\Handler\ErrorLogHandler; |
9 | use Monolog\Handler\StreamHandler; |
10 | use Monolog\Logger; |
11 | use Psr\Container\ContainerInterface; |
12 | use Psr\Container\NotFoundExceptionInterface; |
13 | use Psr\Log\LoggerInterface; |
14 | use Psr\Log\LogLevel; |
15 | use Psr\Log\NullLogger; |
16 | use Wikimedia\Assert\Assert; |
17 | use Wikimedia\Bcp47Code\Bcp47Code; |
18 | use Wikimedia\ObjectFactory\ObjectFactory; |
19 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
20 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
21 | use Wikimedia\Parsoid\Core\LinkTarget; |
22 | use Wikimedia\Parsoid\DOM\Document; |
23 | use Wikimedia\Parsoid\Ext\AnnotationStripper; |
24 | use Wikimedia\Parsoid\Ext\ExtensionModule; |
25 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
26 | use Wikimedia\Parsoid\Ext\Gallery\Gallery; |
27 | use Wikimedia\Parsoid\Ext\Indicator\Indicator; |
28 | use Wikimedia\Parsoid\Ext\JSON\JSON; |
29 | use Wikimedia\Parsoid\Ext\LST\LST; |
30 | use Wikimedia\Parsoid\Ext\Nowiki\Nowiki; |
31 | use Wikimedia\Parsoid\Ext\Pre\Pre; |
32 | use Wikimedia\Parsoid\Utils\DOMUtils; |
33 | use Wikimedia\Parsoid\Utils\PHPUtils; |
34 | use Wikimedia\Parsoid\Utils\Utils; |
35 | use Wikimedia\Parsoid\Wikitext\Consts; |
36 | |
37 | /** |
38 | * Site-level configuration interface for Parsoid |
39 | * |
40 | * This includes both global configuration and wiki-level configuration. |
41 | */ |
42 | abstract class SiteConfig { |
43 | /** |
44 | * FIXME: not private so that ParserTests can reset these variables |
45 | * since they reuse site config and other objects between tests for |
46 | * efficiency reasons. |
47 | * |
48 | * @var array|null |
49 | */ |
50 | protected $mwAliases; |
51 | |
52 | /** @var array|null */ |
53 | private $behaviorSwitches; |
54 | |
55 | /** @var array|null */ |
56 | private $variables; |
57 | |
58 | /** @var array|null */ |
59 | private $mediaOptions; |
60 | |
61 | /** @var array|null */ |
62 | protected $functionSynonyms; |
63 | |
64 | /** @var string[] */ |
65 | private $protocolsRegexes = []; |
66 | |
67 | /** |
68 | * FIXME: not private so that ParserTests can reset these variables |
69 | * since they reuse site config and other objects between tests for |
70 | * efficiency reasons. |
71 | * @var array|null |
72 | */ |
73 | protected $interwikiMapNoNamespaces; |
74 | |
75 | /** |
76 | * FIXME: not private so that ParserTests can reset these variables |
77 | * since they reuse site config and other objects between tests for |
78 | * efficiency reasons. |
79 | * @var string|null|bool |
80 | */ |
81 | protected $linkTrailRegex = false; |
82 | |
83 | /** |
84 | * These extension modules provide "core" functionality |
85 | * and their implementations live in the Parsoid repo. |
86 | * |
87 | * @var class-string<ExtensionModule>[] |
88 | */ |
89 | private static $coreExtModules = [ |
90 | // content modules |
91 | JSON::class, |
92 | // extension tags |
93 | Nowiki::class, |
94 | Pre::class, |
95 | Gallery::class, |
96 | Indicator::class, |
97 | // The following implementations will move to their own repositories |
98 | // soon, but for now are implemented in the Parsoid repo. |
99 | LST::class |
100 | ]; |
101 | |
102 | /** |
103 | * Array specifying fully qualified class name for Parsoid-compatible extensions |
104 | * @var ?array<int,ExtensionModule> |
105 | */ |
106 | private $extModules = null; |
107 | /** |
108 | * Private counter to assign IDs to $extModules |
109 | * @var int |
110 | */ |
111 | private $extModuleNextId = 0; |
112 | |
113 | // phpcs:disable Generic.Files.LineLength.TooLong |
114 | |
115 | /** |
116 | * Register a Parsoid extension module. |
117 | * @param string|array{name:string}|array{factory:callable}|array{class:class-string<ExtensionModule>} $configOrSpec |
118 | * Either an object factory specification for an ExtensionModule object, |
119 | * or else the configuration array which ExtensionModule::getConfig() |
120 | * would return. (The latter is preferred, but our internal extensions |
121 | * use the former.) |
122 | * @return int An integer identifier that can be passed to |
123 | * ::unregisterExtensionModule to remove this extension ( |
124 | */ |
125 | final public function registerExtensionModule( $configOrSpec ): int { |
126 | $this->getExtensionModules(); // ensure it's initialized w/ core modules |
127 | if ( is_string( $configOrSpec ) || isset( $configOrSpec['class'] ) || isset( $configOrSpec['factory'] ) ) { |
128 | // Treat this as an object factory spec for an ExtensionModule |
129 | // ObjectFactory::createObject accepts an array, not just a callable (phan bug) |
130 | // @phan-suppress-next-line PhanTypeInvalidCallableArraySize |
131 | $module = $this->getObjectFactory()->createObject( $configOrSpec, [ |
132 | 'allowClassName' => true, |
133 | 'assertClass' => ExtensionModule::class, |
134 | ] ); |
135 | } else { |
136 | // Treat this as a configuration array, create a new anonymous |
137 | // ExtensionModule object for it. |
138 | $module = new class( $configOrSpec ) implements ExtensionModule { |
139 | private $config; |
140 | |
141 | /** @param array $config */ |
142 | public function __construct( $config ) { |
143 | $this->config = $config; |
144 | } |
145 | |
146 | /** @inheritDoc */ |
147 | public function getConfig(): array { |
148 | return $this->config; |
149 | } |
150 | }; |
151 | } |
152 | $extId = $this->extModuleNextId++; |
153 | $this->extModules[$extId] = $module; |
154 | // remove cached extConfig to ensure this registration is picked up |
155 | $this->extConfig = null; |
156 | return $extId; |
157 | } |
158 | |
159 | // phpcs:enable Generic.Files.LineLength.TooLong |
160 | |
161 | /** |
162 | * Unregister a Parsoid extension module. This is typically used |
163 | * only for testing purposes in order to reset a shared SiteConfig |
164 | * to its original configuration. |
165 | * @param int $extId The value returned by the call to |
166 | * ::registerExtensionModule() |
167 | */ |
168 | final public function unregisterExtensionModule( int $extId ): void { |
169 | unset( $this->extModules[$extId] ); |
170 | $this->extConfig = null; // remove cached extConfig |
171 | } |
172 | |
173 | /** |
174 | * Return the set of Parsoid extension modules associated with this |
175 | * SiteConfig. |
176 | * |
177 | * @return ExtensionModule[] |
178 | */ |
179 | final public function getExtensionModules() { |
180 | if ( $this->extModules === null ) { |
181 | $this->extModules = []; |
182 | foreach ( self::$coreExtModules as $m ) { |
183 | $this->extModules[$this->extModuleNextId++] = new $m(); |
184 | } |
185 | } |
186 | return array_values( $this->extModules ); |
187 | } |
188 | |
189 | /** @var LoggerInterface|null */ |
190 | protected $logger = null; |
191 | |
192 | /** @var int */ |
193 | protected $iwMatcherBatchSize = 4096; |
194 | |
195 | /** @var array|null */ |
196 | protected $iwMatcher = null; |
197 | |
198 | /** @var bool */ |
199 | protected $addHTMLTemplateParameters = false; |
200 | |
201 | /** @var bool */ |
202 | protected $scrubBidiChars = false; |
203 | |
204 | /** @var bool */ |
205 | protected $linterEnabled = false; |
206 | |
207 | /** @var ?array */ |
208 | protected $extConfig = null; |
209 | |
210 | /** |
211 | * Tag handlers for some extensions currently explicit call unstripNowiki |
212 | * first thing in their handlers. They do this to strip <nowiki>..</nowiki> |
213 | * wrappers around args when encountered in the {{#tag:...}} parser function. |
214 | * However, this strategy won't work for Parsoid which calls the preprocessor |
215 | * to get expanded wikitext. In this mode, <nowiki> wrappers won't be stripped |
216 | * and this leads to functional differences in parsing and output. |
217 | * |
218 | * See T203293 and T299103 for more details. |
219 | * |
220 | * To get around this, T299103 proposes that extensions that require this support |
221 | * set a config flag in their Parsoid extension config. On the Parsoid end, we |
222 | * then let the legacy parser know of these tags. When such extension tags are |
223 | * encountered in the {{#tag:...}} parser function handler (see tagObj function |
224 | * in CoreParserFunctions.php), that handler can than automatically strip these |
225 | * nowiki wrappers on behalf of the extension. |
226 | * |
227 | * This serves two purposes. For one, it lets Parsoid support these extensions |
228 | * in this nowiki use edge case. For another, extensions that register handlers |
229 | * with Parsoid can get rid of explicit calls to unstripNowiki() in the |
230 | * tag handlers for the legacy parser. |
231 | * |
232 | * This property maintains an array of tags that need this support. |
233 | * |
234 | * @var array an associative array of tag names |
235 | */ |
236 | private $t299103Tags = []; |
237 | |
238 | /** |
239 | * Base constructor. |
240 | * |
241 | * This constructor is public because it is used to create mock objects |
242 | * in our test suite. |
243 | */ |
244 | public function __construct() { |
245 | } |
246 | |
247 | /************************************************************************//** |
248 | * @name Global config |
249 | * @{ |
250 | */ |
251 | |
252 | /** |
253 | * General log channel |
254 | * @return LoggerInterface |
255 | */ |
256 | public function getLogger(): LoggerInterface { |
257 | if ( $this->logger === null ) { |
258 | $this->logger = new NullLogger; |
259 | } |
260 | return $this->logger; |
261 | } |
262 | |
263 | /** |
264 | * Set the log channel, for debugging |
265 | * @param ?LoggerInterface $logger |
266 | */ |
267 | public function setLogger( ?LoggerInterface $logger ): void { |
268 | $this->logger = $logger; |
269 | } |
270 | |
271 | /** |
272 | * Default gallery options for this wiki. |
273 | * @return array<string,string|int|bool> |
274 | */ |
275 | public function galleryOptions(): array { |
276 | return [ |
277 | 'imagesPerRow' => 0, |
278 | 'imageWidth' => 120, |
279 | 'imageHeight' => 120, |
280 | 'captionLength' => true, |
281 | 'showBytes' => true, |
282 | 'showDimensions' => true, |
283 | 'mode' => 'traditional', |
284 | ]; |
285 | } |
286 | |
287 | /** |
288 | * When processing template parameters, parse them to HTML and add it to the |
289 | * template parameters data. |
290 | * @return bool |
291 | */ |
292 | public function addHTMLTemplateParameters(): bool { |
293 | return $this->addHTMLTemplateParameters; |
294 | } |
295 | |
296 | /** |
297 | * Statistics aggregator, for counting and timing. |
298 | * |
299 | * @return StatsdDataFactoryInterface|null |
300 | */ |
301 | public function metrics(): ?StatsdDataFactoryInterface { |
302 | return null; |
303 | } |
304 | |
305 | /** |
306 | * Increment a counter metric |
307 | * @param string $name |
308 | * @param array $labels |
309 | * @param float $amount |
310 | * @return void |
311 | */ |
312 | abstract public function incrementCounter( string $name, array $labels, float $amount = 1 ); |
313 | |
314 | /** |
315 | * Record a timing metric. |
316 | * |
317 | * Note that the value should be provided in *milliseconds* even though |
318 | * the name of the metric may end (by convention) in `_seconds`. The |
319 | * metrics infrastructure will make the appropriate conversion. |
320 | * |
321 | * @param string $name |
322 | * @param float $value A timing value *in milliseconds* |
323 | * @param array $labels |
324 | * @return void |
325 | */ |
326 | abstract public function observeTiming( string $name, float $value, array $labels ); |
327 | |
328 | /** |
329 | * If enabled, bidi chars adjacent to category links will be stripped |
330 | * in the html -> wt serialization pass. |
331 | * @return bool |
332 | */ |
333 | public function scrubBidiChars(): bool { |
334 | return $this->scrubBidiChars; |
335 | } |
336 | |
337 | /** @} */ |
338 | |
339 | /************************************************************************//** |
340 | * @name Wiki config |
341 | * @{ |
342 | */ |
343 | |
344 | /** |
345 | * Allowed external image URL prefixes. |
346 | * |
347 | * @return string[] The empty array matches no URLs. The empty string matches |
348 | * all URLs. |
349 | */ |
350 | abstract public function allowedExternalImagePrefixes(): array; |
351 | |
352 | /** |
353 | * Site base URI |
354 | * |
355 | * This would be the URI found in `<base href="..." />`. |
356 | * |
357 | * @return string |
358 | */ |
359 | abstract public function baseURI(): string; |
360 | |
361 | /** |
362 | * Prefix for relative links |
363 | * |
364 | * Prefix to prepend to a page title to link to that page. |
365 | * Intended to be relative to the URI returned by baseURI(). |
366 | * |
367 | * If possible, keep the default "./" so clients need not know this value |
368 | * to extract titles from link hrefs. |
369 | * |
370 | * @return string |
371 | */ |
372 | public function relativeLinkPrefix(): string { |
373 | return './'; |
374 | } |
375 | |
376 | /** |
377 | * Regex matching all double-underscore magic words |
378 | * @return string |
379 | */ |
380 | public function bswPagePropRegexp(): string { |
381 | static $bswPagePropRegexp = null; |
382 | if ( $bswPagePropRegexp === null ) { |
383 | $bswRegexp = $this->bswRegexp(); |
384 | $bswPagePropRegexp = |
385 | '@(?:^|\\s)mw:PageProp/(?:' . |
386 | PHPUtils::reStrip( $bswRegexp, '@' ) . |
387 | ')(?=$|\\s)@uDS'; |
388 | } |
389 | return $bswPagePropRegexp; |
390 | } |
391 | |
392 | /** |
393 | * Map a canonical namespace name to its index |
394 | * |
395 | * @note This replaces canonicalNamespaces |
396 | * @param string $name all-lowercase and with underscores rather than spaces. |
397 | * @return int|null |
398 | */ |
399 | abstract public function canonicalNamespaceId( string $name ): ?int; |
400 | |
401 | /** |
402 | * Map a namespace name to its index |
403 | * |
404 | * @note This replaces canonicalNamespaces |
405 | * @param string $name all-lowercase and with underscores rather than spaces. |
406 | * @return int|null |
407 | */ |
408 | abstract public function namespaceId( string $name ): ?int; |
409 | |
410 | /** |
411 | * Map a namespace index to its preferred name |
412 | * (with spaces, not underscores). |
413 | * |
414 | * @note This replaces namespaceNames |
415 | * @param int $ns |
416 | * @return string|null |
417 | */ |
418 | abstract public function namespaceName( int $ns ): ?string; |
419 | |
420 | /** |
421 | * Test if a namespace has subpages |
422 | * |
423 | * @note This replaces namespacesWithSubpages |
424 | * @param int $ns |
425 | * @return bool |
426 | */ |
427 | abstract public function namespaceHasSubpages( int $ns ): bool; |
428 | |
429 | /** |
430 | * Return namespace case setting |
431 | * @param int $ns |
432 | * @return string 'first-letter' or 'case-sensitive' |
433 | */ |
434 | abstract public function namespaceCase( int $ns ): string; |
435 | |
436 | /** |
437 | * Test if a namespace is a talk namespace |
438 | * |
439 | * @note This replaces title.getNamespace().isATalkNamespace() |
440 | * @param int $ns |
441 | * @return bool |
442 | */ |
443 | public function namespaceIsTalk( int $ns ): bool { |
444 | return $ns > 0 && $ns % 2; |
445 | } |
446 | |
447 | /** |
448 | * Uppercasing method for titles |
449 | * @param string $str |
450 | * @return string |
451 | */ |
452 | public function ucfirst( string $str ): string { |
453 | $o = ord( $str ); |
454 | if ( $o < 96 ) { // if already uppercase... |
455 | return $str; |
456 | } elseif ( $o < 128 ) { |
457 | if ( $str[0] === 'i' && |
458 | in_array( $this->langBcp47()->toBcp47Code(), [ 'az', 'tr', 'kaa', 'kk' ], true ) |
459 | ) { |
460 | return 'Ä°' . mb_substr( $str, 1 ); |
461 | } |
462 | return ucfirst( $str ); // use PHP's ucfirst() |
463 | } else { |
464 | // fall back to more complex logic in case of multibyte strings |
465 | $char = mb_substr( $str, 0, 1 ); |
466 | return mb_strtoupper( $char ) . mb_substr( $str, 1 ); |
467 | } |
468 | } |
469 | |
470 | /** |
471 | * Get the default local name for a special page |
472 | * @param string $alias Special page alias |
473 | * @return string|null |
474 | */ |
475 | abstract public function specialPageLocalName( string $alias ): ?string; |
476 | |
477 | /** |
478 | * Treat language links as magic connectors, not inline links |
479 | * @return bool |
480 | */ |
481 | abstract public function interwikiMagic(): bool; |
482 | |
483 | /** |
484 | * Return true if the specified magic link syntax is enabled on this |
485 | * wiki. |
486 | * @param string $which One of "ISBN", "PMID", or "RFC" |
487 | * @return true if the specified magic link type is enabled on this wiki |
488 | */ |
489 | public function magicLinkEnabled( string $which ): bool { |
490 | // This should be an abstract method, but in order to provide |
491 | // graceful upgrades, start by defaulting to true for all link types |
492 | return true; |
493 | } |
494 | |
495 | /** |
496 | * Interwiki link data. |
497 | * |
498 | * Note that the order of the keys in this array is significant: if more |
499 | * than one prefix matches a given URL during html2wt conversion, the |
500 | * *first* match is used. If you want `wikitech` to be used instead of |
501 | * `labsconsole`, for example, the `'wikitech'=>[....]` key needs to |
502 | * enumerate first. |
503 | * |
504 | * @return array<string,array> Keys are interwiki prefixes, values are arrays with the following keys: |
505 | * - prefix: (string) The interwiki prefix, same as the key. |
506 | * - url: (string) Target URL, containing a '$1' to be replaced by the interwiki target. |
507 | * - protorel: (bool, optional) Whether the url may be accessed by both http:// and https://. |
508 | * - local: (bool, optional) Whether the interwiki link is considered local (to the wikifarm). |
509 | * - localinterwiki: (bool, optional) Whether the interwiki link points to the current wiki. |
510 | * - language: (bool, optional) Whether the interwiki link is a language link. |
511 | * - extralanglink: (bool, optional) Whether the interwiki link is an "extra language link". |
512 | * - linktext: (string, optional) For "extra language links", the link text. |
513 | * (booleans marked "optional" must be omitted if false) |
514 | */ |
515 | abstract public function interwikiMap(): array; |
516 | |
517 | /** |
518 | * Interwiki link data, after removing items that conflict with namespace names. |
519 | * (In case of such conflict, namespace wins, interwiki is ignored.) |
520 | * @return array<string,array> See interwikiMap() |
521 | */ |
522 | public function interwikiMapNoNamespaces(): array { |
523 | if ( $this->interwikiMapNoNamespaces === null ) { |
524 | $this->interwikiMapNoNamespaces = []; |
525 | foreach ( $this->interwikiMap() as $key => $value ) { |
526 | if ( $this->namespaceId( (string)$key ) === null ) { |
527 | $this->interwikiMapNoNamespaces[$key] = $value; |
528 | } |
529 | } |
530 | } |
531 | return $this->interwikiMapNoNamespaces; |
532 | } |
533 | |
534 | /** |
535 | * Match interwiki URLs |
536 | * @param string $href Link to match against |
537 | * @return string[]|null Two values [ string $key, string $target ] on success, null on no match. |
538 | */ |
539 | public function interwikiMatcher( string $href ): ?array { |
540 | if ( $this->iwMatcher === null ) { |
541 | $keys = [ [], [] ]; |
542 | $patterns = [ [], [] ]; |
543 | foreach ( $this->interwikiMapNoNamespaces() as $key => $iw ) { |
544 | $key = (string)$key; |
545 | $lang = (int)( !empty( $iw['language'] ) ); |
546 | |
547 | $url = $iw['url']; |
548 | $protocolRelative = substr( $url, 0, 2 ) === '//'; |
549 | if ( !empty( $iw['protorel'] ) ) { |
550 | $url = preg_replace( '/^https?:/', '', $url ); |
551 | $protocolRelative = true; |
552 | } |
553 | |
554 | // full-url match pattern |
555 | $keys[$lang][] = $key; |
556 | $patterns[$lang][] = |
557 | // Support protocol-relative URLs |
558 | ( $protocolRelative ? '(?:https?:)?' : '' ) |
559 | // Convert placeholder to group match |
560 | . strtr( preg_quote( $url, '/' ), [ '\\$1' => '(.*?)' ] ); |
561 | |
562 | if ( !empty( $iw['local'] ) ) { |
563 | // ./$interwikiPrefix:$title and |
564 | // $interwikiPrefix%3A$title shortcuts |
565 | // are recognized and the local wiki forwards |
566 | // these shortcuts to the remote wiki |
567 | |
568 | $keys[$lang][] = $key; |
569 | $patterns[$lang][] = '^\\.\\/' . $iw['prefix'] . ':(.*?)'; |
570 | |
571 | $keys[$lang][] = $key; |
572 | $patterns[$lang][] = '^' . $iw['prefix'] . '%3A(.*?)'; |
573 | } |
574 | } |
575 | |
576 | // Prefer language matches over non-language matches |
577 | $numLangs = count( $keys[1] ); |
578 | $keys = array_merge( $keys[1], $keys[0] ); |
579 | $patterns = array_merge( $patterns[1], $patterns[0] ); |
580 | |
581 | // Chunk patterns into reasonably sized regexes |
582 | $this->iwMatcher = []; |
583 | $batchStart = 0; |
584 | $batchLen = 0; |
585 | foreach ( $patterns as $i => $pat ) { |
586 | $len = strlen( $pat ); |
587 | if ( $i !== $batchStart && $batchLen + $len > $this->iwMatcherBatchSize ) { |
588 | $this->iwMatcher[] = [ |
589 | array_slice( $keys, $batchStart, $i - $batchStart ), |
590 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
591 | $numLangs - $batchStart, |
592 | ]; |
593 | $batchStart = $i; |
594 | $batchLen = $len; |
595 | } else { |
596 | $batchLen += $len; |
597 | } |
598 | } |
599 | $i = count( $patterns ); |
600 | if ( $i > $batchStart ) { |
601 | $this->iwMatcher[] = [ |
602 | array_slice( $keys, $batchStart, $i - $batchStart ), |
603 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
604 | $numLangs - $batchStart, |
605 | ]; |
606 | } |
607 | } |
608 | |
609 | foreach ( $this->iwMatcher as [ $keys, $regex, $numLangs ] ) { |
610 | if ( preg_match( $regex, $href, $m, PREG_UNMATCHED_AS_NULL ) ) { |
611 | foreach ( $keys as $i => $key ) { |
612 | if ( isset( $m[$i + 1] ) ) { |
613 | if ( $i < $numLangs ) { |
614 | // Escape language interwikis with a colon |
615 | $key = ':' . $key; |
616 | } |
617 | return [ $key, $m[$i + 1] ]; |
618 | } |
619 | } |
620 | } |
621 | } |
622 | return null; |
623 | } |
624 | |
625 | /** |
626 | * Wiki identifier, for cache keys. |
627 | * Should match a key in mwApiMap()? |
628 | * @return string |
629 | */ |
630 | abstract public function iwp(): string; |
631 | |
632 | /** |
633 | * Legal title characters |
634 | * |
635 | * Regex is intended to match bytes, not Unicode characters. |
636 | * |
637 | * @return string Regex character class (i.e. the bit that goes inside `[]`) |
638 | */ |
639 | abstract public function legalTitleChars(): string; |
640 | |
641 | /** |
642 | * Link prefix regular expression. |
643 | * @return string|null |
644 | */ |
645 | abstract public function linkPrefixRegex(): ?string; |
646 | |
647 | /** |
648 | * Return raw link trail regexp from config |
649 | * @return string |
650 | */ |
651 | abstract protected function linkTrail(): string; |
652 | |
653 | /** |
654 | * Link trail regular expression. |
655 | * @return string|null |
656 | */ |
657 | public function linkTrailRegex(): ?string { |
658 | if ( $this->linkTrailRegex === false ) { |
659 | $trail = $this->linkTrail(); |
660 | $trail = str_replace( '(.*)$', '', $trail ); |
661 | if ( strpos( $trail, '()' ) !== false ) { |
662 | // Empty regex from zh-hans |
663 | $this->linkTrailRegex = null; |
664 | } else { |
665 | $this->linkTrailRegex = $trail; |
666 | } |
667 | } |
668 | return $this->linkTrailRegex; |
669 | } |
670 | |
671 | /** |
672 | * Wiki language code. |
673 | * @return Bcp47Code BCP-47 language code |
674 | */ |
675 | abstract public function langBcp47(): Bcp47Code; |
676 | |
677 | /** |
678 | * Main page title, as LinkTarget |
679 | * @return LinkTarget |
680 | */ |
681 | abstract public function mainPageLinkTarget(): LinkTarget; |
682 | |
683 | /** |
684 | * Lookup config |
685 | * @param string $key |
686 | * @return mixed|null config value for $key, if present or null, if not. |
687 | * @deprecated This very broad interface is no longer needed. |
688 | */ |
689 | abstract public function getMWConfigValue( string $key ); |
690 | |
691 | /** |
692 | * Whether the wiki language is right-to-left |
693 | * @return bool |
694 | */ |
695 | abstract public function rtl(): bool; |
696 | |
697 | /** |
698 | * Whether language converter is enabled for the specified language |
699 | * @param Bcp47Code $lang |
700 | * @return bool |
701 | */ |
702 | abstract public function langConverterEnabledBcp47( Bcp47Code $lang ): bool; |
703 | |
704 | /** |
705 | * The URL path to index.php. |
706 | * @return string |
707 | */ |
708 | abstract public function script(): string; |
709 | |
710 | /** |
711 | * FIXME: This is only used to compute the modules path below |
712 | * and maybe shouldn't be exposed. |
713 | * |
714 | * The base wiki path |
715 | * @return string |
716 | */ |
717 | abstract public function scriptpath(): string; |
718 | |
719 | /** |
720 | * The base URL of the server. |
721 | * @return string |
722 | */ |
723 | abstract public function server(): string; |
724 | |
725 | /** |
726 | * Export content metadata via meta tags (and via a stylesheet |
727 | * for now to aid some clients). |
728 | * |
729 | * @param Document $document |
730 | * @param ContentMetadataCollector $metadata |
731 | * @param string $defaultTitle The default title to display, as an |
732 | * unescaped string |
733 | * @param Bcp47Code $lang a BCP-47 language code |
734 | */ |
735 | abstract public function exportMetadataToHeadBcp47( |
736 | Document $document, |
737 | ContentMetadataCollector $metadata, |
738 | string $defaultTitle, |
739 | Bcp47Code $lang |
740 | ): void; |
741 | |
742 | /** |
743 | * Helper function to create <head> elements from metadata. |
744 | * @param Document $document |
745 | * @param string $modulesLoadURI |
746 | * @param string[] $modules |
747 | * @param string[] $moduleStyles |
748 | * @param array<string,mixed> $jsConfigVars |
749 | * @param string $htmlTitle The display title, as escaped HTML |
750 | * @param Bcp47Code $lang a Bcp47Code object |
751 | */ |
752 | protected function exportMetadataHelper( |
753 | Document $document, |
754 | string $modulesLoadURI, |
755 | array $modules, |
756 | array $moduleStyles, |
757 | array $jsConfigVars, |
758 | string $htmlTitle, |
759 | Bcp47Code $lang |
760 | ): void { |
761 | // $htmlTitle contains the DISPLAYTITLE but it corresponds to the |
762 | // value of the ParserOutput *not* the ultimate value which would |
763 | // be used in the <h1> tag *nor* the plaintext value which would |
764 | // be used for the page <title>. OutputPage does additional |
765 | // validation/stripping on the displaytitle value before using it. |
766 | // As such we're going to just ignore $htmlTitle for now rather |
767 | // than report an incorrect value in the <head> (T324431). |
768 | |
769 | // JsConfigVars |
770 | $content = null; |
771 | try { |
772 | if ( $jsConfigVars ) { |
773 | $content = PHPUtils::jsonEncode( $jsConfigVars ); |
774 | } |
775 | } catch ( \Exception $e ) { |
776 | // Similar to ResourceLoader::makeConfigSetScript. See T289358 |
777 | $this->getLogger()->log( |
778 | LogLevel::WARNING, |
779 | 'JSON serialization of config data failed. ' . |
780 | 'This usually means the config data is not valid UTF-8.' |
781 | ); |
782 | } |
783 | if ( $content ) { |
784 | DOMUtils::appendToHead( $document, 'meta', [ |
785 | 'property' => 'mw:jsConfigVars', |
786 | 'content' => $content, |
787 | ] ); |
788 | } |
789 | // Styles from modules returned from preprocessor / parse requests |
790 | if ( $modules ) { |
791 | // mw:generalModules can be processed via JS (and async) and are usually (but |
792 | // not always) JS scripts. |
793 | DOMUtils::appendToHead( $document, 'meta', [ |
794 | 'property' => 'mw:generalModules', |
795 | 'content' => implode( '|', array_unique( $modules ) ) |
796 | ] ); |
797 | } |
798 | // Styles from modules returned from preprocessor / parse requests |
799 | if ( $moduleStyles ) { |
800 | // mw:moduleStyles are CSS modules that are render-blocking. |
801 | DOMUtils::appendToHead( $document, 'meta', [ |
802 | 'property' => 'mw:moduleStyles', |
803 | 'content' => implode( '|', array_unique( $moduleStyles ) ) |
804 | ] ); |
805 | } |
806 | /* |
807 | * While unnecessary for Wikimedia clients, a stylesheet url in |
808 | * the <head> is useful for clients like Kiwix and others who |
809 | * might not want to process the meta tags to construct the |
810 | * resourceloader url. |
811 | * |
812 | * Given that these clients will be consuming Parsoid HTML outside |
813 | * a MediaWiki skin, the clients are effectively responsible for |
814 | * their own "skin". But, once again, as a courtesy, we are |
815 | * hardcoding the vector skin modules for them. But, note that |
816 | * this may cause page elements to render differently than how |
817 | * they render on Wikimedia sites with the vector skin since this |
818 | * is probably missing a number of other modules. |
819 | * |
820 | * All that said, note that JS-generated parts of the page will |
821 | * still require them to have more intimate knowledge of how to |
822 | * process the JS modules. Except for <graph>s, page content |
823 | * doesn't require JS modules at this point. So, where these |
824 | * clients want to invest in the necessary logic to construct a |
825 | * better resourceloader url, they could simply delete / ignore |
826 | * this stylesheet. |
827 | */ |
828 | $moreStyles = array_merge( $moduleStyles, [ |
829 | 'mediawiki.skinning.content.parsoid', |
830 | // Use the base styles that API output and fallback skin use. |
831 | 'mediawiki.skinning.interface', |
832 | // Make sure to include contents of user generated styles |
833 | // e.g. MediaWiki:Common.css / MediaWiki:Mobile.css |
834 | 'site.styles' |
835 | ] ); |
836 | # need to use MW-internal language code for constructing resource |
837 | # loader path. |
838 | $langMw = Utils::bcp47ToMwCode( $lang ); |
839 | $styleURI = $modulesLoadURI . '?lang=' . $langMw . '&modules=' . |
840 | PHPUtils::encodeURIComponent( implode( '|', array_unique( $moreStyles ) ) ) . |
841 | '&only=styles&skin=vector'; |
842 | DOMUtils::appendToHead( $document, 'link', [ 'rel' => 'stylesheet', 'href' => $styleURI ] ); |
843 | } |
844 | |
845 | /** |
846 | * A regexp matching the localized 'REDIRECT' marker for this wiki. |
847 | * The regexp should be delimited, but should not have boundary anchors |
848 | * or capture groups. |
849 | * @return string |
850 | */ |
851 | abstract public function redirectRegexp(): string; |
852 | |
853 | /** |
854 | * A regexp matching the localized 'Category' prefix for this wiki. |
855 | * The regexp should be delimited, but should not have boundary anchors |
856 | * or capture groups. |
857 | * @return string |
858 | */ |
859 | abstract public function categoryRegexp(): string; |
860 | |
861 | /** |
862 | * A regexp matching localized behavior switches for this wiki. |
863 | * The regexp should be delimited, but should not have boundary anchors |
864 | * or capture groups. |
865 | * @return string |
866 | */ |
867 | abstract public function bswRegexp(): string; |
868 | |
869 | /** |
870 | * A regex matching a line containing just whitespace, comments, and |
871 | * sol transparent links and behavior switches. |
872 | * @return string |
873 | */ |
874 | public function solTransparentWikitextRegexp(): string { |
875 | // cscott sadly says: Note that this depends on the precise |
876 | // localization of the magic words of this particular wiki. |
877 | static $solTransparentWikitextRegexp = null; |
878 | if ( $solTransparentWikitextRegexp === null ) { |
879 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
880 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
881 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
882 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
883 | $solTransparentWikitextRegexp = '@' . |
884 | '^[ \t\n\r\0\x0b]*' . |
885 | '(?:' . |
886 | '(?:' . $redirect . ')' . |
887 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
888 | ')?' . |
889 | '(?:' . |
890 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
891 | '__(?:' . $bswRegexp . ')__|' . |
892 | $comment . '|' . |
893 | '[ \t\n\r\0\x0b]' . |
894 | ')*$@'; |
895 | } |
896 | return $solTransparentWikitextRegexp; |
897 | } |
898 | |
899 | /** |
900 | * A regex matching a line containing just comments and |
901 | * sol transparent links and behavior switches. |
902 | * |
903 | * @param bool $addIncludes |
904 | * @return string |
905 | */ |
906 | public function solTransparentWikitextNoWsRegexp( |
907 | bool $addIncludes = false |
908 | ): string { |
909 | // cscott sadly says: Note that this depends on the precise |
910 | // localization of the magic words of this particular wiki. |
911 | static $solTransparentWikitextNoWsRegexp = null; |
912 | if ( $solTransparentWikitextNoWsRegexp === null ) { |
913 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
914 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
915 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
916 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
917 | $solTransparentWikitextNoWsRegexp = '@' . |
918 | '((?:' . |
919 | '(?:' . $redirect . ')' . |
920 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
921 | ')?' . |
922 | '(?:' . |
923 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
924 | '__(?:' . $bswRegexp . ')__|' . |
925 | $comment . |
926 | // FIXME(SSS): What about onlyinclude and noinclude? |
927 | ( $addIncludes ? '|<includeonly>[\S\s]*?</includeonly>' : '' ) . |
928 | ')*)@'; |
929 | } |
930 | return $solTransparentWikitextNoWsRegexp; |
931 | } |
932 | |
933 | /** |
934 | * The wiki's time zone offset |
935 | * @return int Minutes east of UTC |
936 | */ |
937 | abstract public function timezoneOffset(): int; |
938 | |
939 | /** |
940 | * Language variant information for the given language (or null if |
941 | * unknown). |
942 | * @param Bcp47Code $lang The language for which you want variant information |
943 | * @return ?array{base:Bcp47Code,fallbacks:Bcp47Code[]} an array with |
944 | * two fields: |
945 | * - base: (Bcp47Code) Base BCP-47 language code (e.g. "zh") |
946 | * - fallbacks: (Bcp47Code[]) Fallback variants, as BCP-47 codes |
947 | */ |
948 | abstract public function variantsFor( Bcp47Code $lang ): ?array; |
949 | |
950 | /** |
951 | * Default thumbnail width |
952 | */ |
953 | abstract public function widthOption(): int; |
954 | |
955 | abstract protected function getVariableIDs(): array; |
956 | |
957 | abstract protected function getMagicWords(): array; |
958 | |
959 | /** |
960 | * Does the SiteConfig provide precomputed function synonyms? |
961 | * If no, the SiteConfig is expected to provide an implementation |
962 | * for updateFunctionSynonym. |
963 | */ |
964 | protected function haveComputedFunctionSynonyms(): bool { |
965 | return true; |
966 | } |
967 | |
968 | /** |
969 | * Get a list of precomputed function synonyms |
970 | */ |
971 | protected function getFunctionSynonyms(): array { |
972 | return []; |
973 | } |
974 | |
975 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
976 | throw new \RuntimeException( "Unexpected code path!" ); |
977 | } |
978 | |
979 | private function populateMagicWords() { |
980 | if ( !empty( $this->mwAliases ) ) { |
981 | return; |
982 | } |
983 | |
984 | $this->mwAliases = $this->behaviorSwitches = $this->variables = $this->mediaOptions = []; |
985 | $variablesMap = PHPUtils::makeSet( $this->getVariableIDs() ); |
986 | $this->functionSynonyms = $this->getFunctionSynonyms(); |
987 | $haveSynonyms = $this->haveComputedFunctionSynonyms(); |
988 | foreach ( $this->getMagicWords() as $magicword => $aliases ) { |
989 | $caseSensitive = array_shift( $aliases ); |
990 | $isVariable = isset( $variablesMap[$magicword] ); |
991 | $isMediaOption = preg_match( '/^(img|timedmedia)_/', $magicword ); |
992 | foreach ( $aliases as $alias ) { |
993 | $this->mwAliases[$magicword][] = $alias; |
994 | if ( !$caseSensitive ) { |
995 | $alias = mb_strtolower( $alias ); |
996 | $this->mwAliases[$magicword][] = $alias; |
997 | } |
998 | if ( substr( $alias, 0, 2 ) === '__' ) { |
999 | $this->behaviorSwitches[$alias] = [ $caseSensitive, $magicword ]; |
1000 | } |
1001 | if ( $isVariable ) { |
1002 | $this->variables[$alias] = $magicword; |
1003 | } |
1004 | if ( $isMediaOption ) { |
1005 | $this->mediaOptions[$alias] = [ $caseSensitive, $magicword ]; |
1006 | } |
1007 | if ( !$haveSynonyms ) { |
1008 | $this->updateFunctionSynonym( $alias, $magicword, (bool)$caseSensitive ); |
1009 | } |
1010 | } |
1011 | } |
1012 | } |
1013 | |
1014 | /** |
1015 | * List all magic words by canonical name |
1016 | * @return string[][] Keys are canonical names, values are arrays of aliases. |
1017 | */ |
1018 | public function mwAliases(): array { |
1019 | $this->populateMagicWords(); |
1020 | return $this->mwAliases; |
1021 | } |
1022 | |
1023 | /** |
1024 | * Return canonical magic word for a function hook |
1025 | * @param string $str |
1026 | * @return string|null |
1027 | */ |
1028 | public function getMagicWordForFunctionHook( string $str ): ?string { |
1029 | $this->populateMagicWords(); |
1030 | return $this->functionSynonyms[1][$str] ?? |
1031 | # Case insensitive functions |
1032 | $this->functionSynonyms[0][mb_strtolower( $str )] ?? null; |
1033 | } |
1034 | |
1035 | /** |
1036 | * Return canonical magic word for a variable |
1037 | * @param string $str |
1038 | * @return string|null |
1039 | */ |
1040 | public function getMagicWordForVariable( string $str ): ?string { |
1041 | $this->populateMagicWords(); |
1042 | return $this->variables[$str] ?? null; |
1043 | } |
1044 | |
1045 | private static function getMagicWordCanonicalName( array $mws, string $word ): ?string { |
1046 | if ( isset( $mws[$word] ) ) { |
1047 | return $mws[$word][1]; |
1048 | } |
1049 | $mw = $mws[mb_strtolower( $word )] ?? null; |
1050 | return ( $mw && !$mw[0] ) ? $mw[1] : null; |
1051 | } |
1052 | |
1053 | /** |
1054 | * Return canonical magic word for a media option |
1055 | * @param string $word |
1056 | * @return string|null |
1057 | */ |
1058 | public function getMagicWordForMediaOption( string $word ): ?string { |
1059 | $this->populateMagicWords(); |
1060 | return self::getMagicWordCanonicalName( $this->mediaOptions, $word ); |
1061 | } |
1062 | |
1063 | /** |
1064 | * Return canonical magic word for a behavior switch |
1065 | * @param string $word |
1066 | * @return string|null |
1067 | */ |
1068 | public function getMagicWordForBehaviorSwitch( string $word ): ?string { |
1069 | $this->populateMagicWords(); |
1070 | return self::getMagicWordCanonicalName( $this->behaviorSwitches, $word ); |
1071 | } |
1072 | |
1073 | /** |
1074 | * Check if a string is a recognized behavior switch. |
1075 | * |
1076 | * @param string $word |
1077 | * @return bool |
1078 | */ |
1079 | public function isBehaviorSwitch( string $word ): bool { |
1080 | return $this->getMagicWordForBehaviorSwitch( $word ) !== null; |
1081 | } |
1082 | |
1083 | /** |
1084 | * Convert the internal canonical magic word name to the wikitext alias. |
1085 | * @param string $word Canonical magic word name |
1086 | * @param string $suggest Suggested alias (used as fallback and preferred choice) |
1087 | * @return string |
1088 | */ |
1089 | public function getMagicWordWT( string $word, string $suggest ): string { |
1090 | $aliases = $this->mwAliases()[$word] ?? null; |
1091 | if ( !$aliases ) { |
1092 | return $suggest; |
1093 | } |
1094 | $ind = 0; |
1095 | if ( $suggest ) { |
1096 | $ind = array_search( $suggest, $aliases, true ); |
1097 | } |
1098 | return $aliases[$ind ?: 0]; |
1099 | } |
1100 | |
1101 | /** |
1102 | * Get a regexp matching a localized magic word, given its id. |
1103 | * |
1104 | * FIXME: misleading function name |
1105 | * |
1106 | * @param string $id |
1107 | * @return string |
1108 | */ |
1109 | abstract public function getMagicWordMatcher( string $id ): string; |
1110 | |
1111 | /** |
1112 | * Get a matcher function for fetching values out of interpolated magic words, |
1113 | * ie those with `$1` in their aliases. |
1114 | * |
1115 | * The matcher takes a string and returns null if it doesn't match any of |
1116 | * the words, or an associative array if it did match: |
1117 | * - k: The magic word that matched |
1118 | * - v: The value of $1 that was matched |
1119 | * (the JS also returned 'a' with the specific alias that matched, but that |
1120 | * seems to be unused and so is omitted here) |
1121 | * |
1122 | * @param string[] $words Magic words to match |
1123 | * @return callable |
1124 | */ |
1125 | abstract protected function getParameterizedAliasMatcher( array $words ): callable; |
1126 | |
1127 | /** |
1128 | * Get a matcher function for fetching values out of interpolated magic words |
1129 | * which are media prefix options. |
1130 | * |
1131 | * The matcher takes a string and returns null if it doesn't match any of |
1132 | * the words, or an associative array if it did match: |
1133 | * - k: The magic word that matched |
1134 | * - v: The value of $1 that was matched |
1135 | * (the JS also returned 'a' with the specific alias that matched, but that |
1136 | * seems to be unused and so is omitted here) |
1137 | * |
1138 | * @return callable |
1139 | */ |
1140 | final public function getMediaPrefixParameterizedAliasMatcher(): callable { |
1141 | // PORT-FIXME: this shouldn't be a constant, we should fetch these |
1142 | // from the SiteConfig. Further, we probably need a hook here so |
1143 | // Parsoid can handle media options defined in extensions... in |
1144 | // particular timedmedia_* magic words from Extension:TimedMediaHandler |
1145 | $mws = array_keys( Consts::$Media['PrefixOptions'] ); |
1146 | return $this->getParameterizedAliasMatcher( $mws ); |
1147 | } |
1148 | |
1149 | /** |
1150 | * Get the maximum template depth |
1151 | * |
1152 | * @return int |
1153 | */ |
1154 | abstract public function getMaxTemplateDepth(): int; |
1155 | |
1156 | /** |
1157 | * Return name spaces aliases for the NS_SPECIAL namespace |
1158 | * @return array |
1159 | */ |
1160 | abstract protected function getSpecialNSAliases(): array; |
1161 | |
1162 | /** |
1163 | * Return Special Page aliases for a special page name |
1164 | * @param string $specialPage |
1165 | * @return array |
1166 | */ |
1167 | abstract protected function getSpecialPageAliases( string $specialPage ): array; |
1168 | |
1169 | /** |
1170 | * Quote a title regex |
1171 | * |
1172 | * Assumes '/' as the delimiter, and replaces spaces or underscores with |
1173 | * `[ _]` so either will be matched. |
1174 | * |
1175 | * @param string $s |
1176 | * @param string $delimiter Defaults to '/' |
1177 | * @return string |
1178 | */ |
1179 | protected static function quoteTitleRe( string $s, string $delimiter = '/' ): string { |
1180 | $s = preg_quote( $s, $delimiter ); |
1181 | $s = strtr( $s, [ |
1182 | ' ' => '[ _]', |
1183 | '_' => '[ _]', |
1184 | ] ); |
1185 | return $s; |
1186 | } |
1187 | |
1188 | /** |
1189 | * Matcher for ISBN/RFC/PMID URL patterns, returning the type and number. |
1190 | * |
1191 | * The match method takes a string and returns false on no match or a tuple |
1192 | * like this on match: [ 'RFC', '12345' ] |
1193 | * |
1194 | * @return callable |
1195 | */ |
1196 | public function getExtResourceURLPatternMatcher(): callable { |
1197 | $nsAliases = implode( '|', array_unique( $this->getSpecialNSAliases() ) ); |
1198 | $pageAliases = implode( '|', array_map( [ $this, 'quoteTitleRe' ], |
1199 | $this->getSpecialPageAliases( 'Booksources' ) |
1200 | ) ); |
1201 | |
1202 | $pats = [ |
1203 | 'ISBN' => '(?:\.\.?/)*(?i:' . $nsAliases . ')(?:%3[Aa]|:)' |
1204 | . '(?i:' . $pageAliases . ')(?:%2[Ff]|/)(?P<ISBN>\d+[Xx]?)', |
1205 | 'RFC' => '[^/]*//tools\.ietf\.org/html/rfc(?P<RFC>\w+)', |
1206 | 'PMID' => '[^/]*//www\.ncbi\.nlm\.nih\.gov/pubmed/(?P<PMID>\w+)\?dopt=Abstract', |
1207 | ]; |
1208 | // T145590: remove patterns for disabled magic links |
1209 | foreach ( array_keys( $pats ) as $v ) { |
1210 | if ( !$this->magicLinkEnabled( $v ) ) { |
1211 | unset( $pats[$v] ); |
1212 | } |
1213 | } |
1214 | $regex = '!^(?:' . implode( '|', $pats ) . ')$!'; |
1215 | return static function ( $text ) use ( $pats, $regex ) { |
1216 | if ( preg_match( $regex, $text, $m ) ) { |
1217 | foreach ( $pats as $k => $re ) { |
1218 | if ( isset( $m[$k] ) && $m[$k] !== '' ) { |
1219 | return [ $k, $m[$k] ]; |
1220 | } |
1221 | } |
1222 | } |
1223 | return false; |
1224 | }; |
1225 | } |
1226 | |
1227 | /** |
1228 | * @return bool |
1229 | */ |
1230 | public function linterEnabled(): bool { |
1231 | return $this->linterEnabled; |
1232 | } |
1233 | |
1234 | /** |
1235 | * Return the desired linter configuration. These are heuristic values |
1236 | * which have hardcoded defaults but could be overridden on a per-wiki |
1237 | * basis. |
1238 | * @return array{enabled?:string[],disabled?:string[],maxTableColumnHeuristic?:int,maxTableRowsToCheck?:int} |
1239 | */ |
1240 | public function getLinterSiteConfig(): array { |
1241 | return [ |
1242 | // Allow list for specific lint types. |
1243 | // Takes precedence over block list. |
1244 | 'enabled' => null, |
1245 | // Block list for specific lint types. |
1246 | // Not used if an allow list is set. |
1247 | 'disabled' => null, |
1248 | // The maximum columns in a table before the table is considered |
1249 | // large |
1250 | 'maxTableColumnHeuristic' => 5, |
1251 | // The maximum rows (header or data) to be checked for the large |
1252 | // table lint |
1253 | // - If we consider the first N rows to be representative of the |
1254 | // table, and the table is well-formed and uniform, it is |
1255 | // sufficent to check the first N rows to check if the table is |
1256 | // "large". |
1257 | // - This heuristic is used together with the |
1258 | // 'maxTableColumnHeuristic' to identify "large tables". |
1259 | 'maxTableRowsToCheck' => 10, |
1260 | // Max length of content covered by 'white-space:nowrap' CSS |
1261 | // that we consider "safe" when Tidy is replaced. Beyond that, |
1262 | // wikitext will have to be fixed up to manually insert whitespace |
1263 | // at the right places. Length in bytes. |
1264 | 'tidyWhitespaceBugMaxLength' => 100, |
1265 | ]; |
1266 | } |
1267 | |
1268 | /** |
1269 | * Serialize ISBN/RFC/PMID URL patterns |
1270 | * |
1271 | * @param string[] $match As returned by the getExtResourceURLPatternMatcher() matcher |
1272 | * @param string $href Fallback link target, if $match is invalid. |
1273 | * @param string $content Link text |
1274 | * @return string |
1275 | */ |
1276 | public function makeExtResourceURL( array $match, string $href, string $content ): string { |
1277 | $normalized = preg_replace( |
1278 | '/[ \x{00A0}\x{1680}\x{2000}-\x{200A}\x{202F}\x{205F}\x{3000}]+/u', ' ', |
1279 | Utils::decodeWtEntities( $content ) |
1280 | ); |
1281 | |
1282 | // TODO: T145590 ("Update Parsoid to be compatible with magic links being disabled") |
1283 | switch ( $match[0] ) { |
1284 | case 'ISBN': |
1285 | $normalized = strtoupper( preg_replace( '/[\- \t]/', '', $normalized ) ); |
1286 | // validate ISBN length and format, so as not to produce magic links |
1287 | // which aren't actually magic |
1288 | $valid = preg_match( '/^ISBN(97[89])?\d{9}(\d|X)$/D', $normalized ); |
1289 | if ( implode( '', $match ) === $normalized && $valid ) { |
1290 | return $content; |
1291 | } |
1292 | // strip "./" prefix. TODO: Use relativeLinkPrefix() instead? |
1293 | $href = PHPUtils::stripPrefix( $href, './' ); |
1294 | return "[[$href|$content]]"; |
1295 | |
1296 | case 'RFC': |
1297 | case 'PMID': |
1298 | $normalized = preg_replace( '/[ \t]/', '', $normalized ); |
1299 | return implode( '', $match ) === $normalized ? $content : "[$href $content]"; |
1300 | |
1301 | default: |
1302 | throw new \InvalidArgumentException( "Invalid match type '{$match[0]}'" ); |
1303 | } |
1304 | } |
1305 | |
1306 | /** |
1307 | * Get the list of valid protocols |
1308 | * @return array |
1309 | */ |
1310 | abstract protected function getProtocols(): array; |
1311 | |
1312 | /** |
1313 | * Get a regex fragment matching URL protocols, quoted for an exclamation |
1314 | * mark delimiter. The case-insensitive option should be used. |
1315 | * |
1316 | * @param bool $excludeProtRel Whether to exclude protocol-relative URLs |
1317 | * @return string |
1318 | */ |
1319 | public function getProtocolsRegex( bool $excludeProtRel = false ) { |
1320 | $excludeProtRel = (int)$excludeProtRel; |
1321 | if ( !isset( $this->protocolsRegexes[$excludeProtRel] ) ) { |
1322 | $parts = []; |
1323 | foreach ( $this->getProtocols() as $protocol ) { |
1324 | if ( !$excludeProtRel || $protocol !== '//' ) { |
1325 | $parts[] = preg_quote( $protocol, '!' ); |
1326 | } |
1327 | } |
1328 | $this->protocolsRegexes[$excludeProtRel] = implode( '|', $parts ); |
1329 | } |
1330 | return $this->protocolsRegexes[$excludeProtRel]; |
1331 | } |
1332 | |
1333 | /** |
1334 | * Matcher for valid protocols, must be anchored at start of string. |
1335 | * @param string $potentialLink |
1336 | * @return bool Whether $potentialLink begins with a valid protocol |
1337 | */ |
1338 | public function hasValidProtocol( string $potentialLink ): bool { |
1339 | $re = '!^(?:' . $this->getProtocolsRegex() . ')!i'; |
1340 | return (bool)preg_match( $re, $potentialLink ); |
1341 | } |
1342 | |
1343 | /** |
1344 | * Matcher for valid protocols, may occur at any point within string. |
1345 | * @param string $potentialLink |
1346 | * @return bool Whether $potentialLink contains a valid protocol |
1347 | */ |
1348 | public function findValidProtocol( string $potentialLink ): bool { |
1349 | $re = '!(?:\W|^)(?:' . $this->getProtocolsRegex() . ')!i'; |
1350 | return (bool)preg_match( $re, $potentialLink ); |
1351 | } |
1352 | |
1353 | /** @} */ |
1354 | |
1355 | /** |
1356 | * Fake timestamp, for unit tests. |
1357 | * @return int|null Unix timestamp, or null to not fake it |
1358 | */ |
1359 | public function fakeTimestamp(): ?int { |
1360 | return null; |
1361 | } |
1362 | |
1363 | /** |
1364 | * Get an array of defined extension tags, with the lower case name in the |
1365 | * key, the value arbitrary. This is the set of extension tags that are |
1366 | * configured in M/W core. $coreExtModules may already be part of it, |
1367 | * but eventually this distinction will disappear since all extension tags |
1368 | * have to be defined against the Parsoid's extension API. |
1369 | * |
1370 | * @return array |
1371 | */ |
1372 | abstract protected function getNonNativeExtensionTags(): array; |
1373 | |
1374 | /** |
1375 | * Return an object factory to use when instantiating extensions. |
1376 | * (This is assumed to be plumbed up to an appropriate service container.) |
1377 | * @return ObjectFactory The object factory to use for extensions |
1378 | */ |
1379 | public function getObjectFactory(): ObjectFactory { |
1380 | // Default implementation returns an object factory with an |
1381 | // empty service container. |
1382 | return new ObjectFactory( new class() implements ContainerInterface { |
1383 | |
1384 | /** |
1385 | * @param string $id |
1386 | * @return never |
1387 | */ |
1388 | public function get( $id ) { |
1389 | throw new class( "Empty service container" ) extends \Error |
1390 | implements NotFoundExceptionInterface { |
1391 | }; |
1392 | } |
1393 | |
1394 | /** |
1395 | * @param string $id |
1396 | * @return false |
1397 | */ |
1398 | public function has( $id ): bool { |
1399 | return false; |
1400 | } |
1401 | } ); |
1402 | } |
1403 | |
1404 | /** |
1405 | * FIXME: might benefit from T250230 (caching) but see T270307 -- |
1406 | * currently SiteConfig::unregisterExtensionModule() is called |
1407 | * during testing, which requires invalidating $this->extConfig. |
1408 | * (See also SiteConfig::fakeTimestamp() etc.) We'd probably need |
1409 | * to more fully separate/mock the "testing SiteConfig" as well |
1410 | * as provide a way for parser options to en/disable individual |
1411 | * registered modules before this class can be considered immutable |
1412 | * and cached. |
1413 | */ |
1414 | private function constructExtConfig() { |
1415 | $this->extConfig = [ |
1416 | 'allTags' => [], |
1417 | 'parsoidExtTags' => [], |
1418 | 'annotationTags' => [], |
1419 | 'domProcessors' => [], |
1420 | 'annotationStrippers' => [], |
1421 | 'contentModels' => [], |
1422 | ]; |
1423 | |
1424 | // There may be some tags defined by the parent wiki which have no |
1425 | // associated parsoid modules; for now we handle these by invoking |
1426 | // the legacy parser. |
1427 | $this->extConfig['allTags'] = $this->getNonNativeExtensionTags(); |
1428 | |
1429 | foreach ( $this->getExtensionModules() as $module ) { |
1430 | $this->processExtensionModule( $module ); |
1431 | } |
1432 | } |
1433 | |
1434 | /** |
1435 | * @param string $lowerTagName |
1436 | * @return bool |
1437 | */ |
1438 | public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool { |
1439 | return isset( $this->t299103Tags[$lowerTagName] ); |
1440 | } |
1441 | |
1442 | /** |
1443 | * Register a Parsoid-compatible extension |
1444 | * @param ExtensionModule $ext |
1445 | */ |
1446 | protected function processExtensionModule( ExtensionModule $ext ): void { |
1447 | Assert::invariant( $this->extConfig !== null, "not yet inited!" ); |
1448 | $extConfig = $ext->getConfig(); |
1449 | Assert::invariant( |
1450 | isset( $extConfig['name'] ), |
1451 | "Every extension module must have a name." |
1452 | ); |
1453 | $name = $extConfig['name']; |
1454 | |
1455 | // These are extension tag handlers. They have |
1456 | // wt2html (sourceToDom), html2wt (domToWikitext), and |
1457 | // linter functionality. |
1458 | foreach ( $extConfig['tags'] ?? [] as $tagConfig ) { |
1459 | $lowerTagName = mb_strtolower( $tagConfig['name'] ); |
1460 | $this->extConfig['allTags'][$lowerTagName] = true; |
1461 | $this->extConfig['parsoidExtTags'][$lowerTagName] = $tagConfig; |
1462 | // Deal with b/c nowiki stripping support needed by some extensions. |
1463 | // This register this tag with the legacy parser for |
1464 | // implicit nowiki stripping in {{#tag:..}} args for this tag. |
1465 | if ( isset( $tagConfig['options']['stripNowiki'] ) ) { |
1466 | $this->t299103Tags[$lowerTagName] = true; |
1467 | } |
1468 | } |
1469 | |
1470 | if ( isset( $extConfig['annotations'] ) ) { |
1471 | $annotationConfig = $extConfig['annotations']; |
1472 | $annotationTags = $annotationConfig['tagNames'] ?? $annotationConfig; |
1473 | foreach ( $annotationTags ?? [] as $aTag ) { |
1474 | $lowerTagName = mb_strtolower( $aTag ); |
1475 | $this->extConfig['allTags'][$lowerTagName] = true; |
1476 | $this->extConfig['annotationTags'][$lowerTagName] = true; |
1477 | } |
1478 | if ( isset( $annotationConfig['annotationStripper'] ) ) { |
1479 | $obj = $this->getObjectFactory()->createObject( $annotationConfig['annotationStripper'], [ |
1480 | 'allowClassName' => true, |
1481 | 'assertClass' => AnnotationStripper::class, |
1482 | ] ); |
1483 | $this->extConfig['annotationStrippers'][$name] = $obj; |
1484 | } |
1485 | } |
1486 | |
1487 | // Extension modules may also register dom processors. |
1488 | // This is for wt2htmlPostProcessor and html2wtPreProcessor |
1489 | // functionality. |
1490 | if ( isset( $extConfig['domProcessors'] ) ) { |
1491 | $this->extConfig['domProcessors'][$name] = $extConfig['domProcessors']; |
1492 | } |
1493 | |
1494 | foreach ( $extConfig['contentModels'] ?? [] as $cm => $spec ) { |
1495 | // For compatibility with mediawiki core, the first |
1496 | // registered extension wins. |
1497 | if ( isset( $this->extConfig['contentModels'][$cm] ) ) { |
1498 | continue; |
1499 | } |
1500 | $handler = $this->getObjectFactory()->createObject( $spec, [ |
1501 | 'allowClassName' => true, |
1502 | 'assertClass' => ContentModelHandler::class, |
1503 | ] ); |
1504 | $this->extConfig['contentModels'][$cm] = $handler; |
1505 | } |
1506 | } |
1507 | |
1508 | protected function getExtConfig(): array { |
1509 | if ( !$this->extConfig ) { |
1510 |