Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
39.75% |
161 / 405 |
|
28.81% |
17 / 59 |
CRAP | |
0.00% |
0 / 1 |
SiteConfig | |
39.75% |
161 / 405 |
|
28.81% |
17 / 59 |
4551.42 | |
0.00% |
0 / 1 |
registerExtensionModule | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
unregisterExtensionModule | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtensionModules | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLogger | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
galleryOptions | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
addHTMLTemplateParameters | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
metrics | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
scrubBidiChars | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
allowedExternalImagePrefixes | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
baseURI | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
relativeLinkPrefix | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
bswPagePropRegexp | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
canonicalNamespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceHasSubpages | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceCase | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
namespaceIsTalk | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
ucfirst | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
specialPageLocalName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
interwikiMagic | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
interwikiMap | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
interwikiMapNoNamespaces | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
interwikiMatcher | |
100.00% |
52 / 52 |
|
100.00% |
1 / 1 |
15 | |||
iwp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
legalTitleChars | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkPrefixRegex | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkTrail | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
linkTrailRegex | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
langBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
mainPageLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMWConfigValue | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
rtl | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
langConverterEnabledBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
script | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
scriptpath | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
server | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
exportMetadataToHeadBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
exportMetadataHelper | |
0.00% |
0 / 39 |
|
0.00% |
0 / 1 |
56 | |||
redirectRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
categoryRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
bswRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
solTransparentWikitextRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
solTransparentWikitextNoWsRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 | |||
timezoneOffset | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
variantsFor | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
widthOption | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getVariableIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMagicWords | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFunctionSynonyms | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateFunctionSynonym | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
populateMagicWords | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
9.05 | |||
mwAliases | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForFunctionHook | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForVariable | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordCanonicalName | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
getMagicWordForMediaOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMagicWordForBehaviorSwitch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
isBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMagicWordWT | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getMagicWordMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getParameterizedAliasMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getMediaPrefixParameterizedAliasMatcher | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getMaxTemplateDepth | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getSpecialNSAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getSpecialPageAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
quoteTitleRe | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getExtResourceURLPatternMatcher | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
5 | |||
linterEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getLinterSiteConfig | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
makeExtResourceURL | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
8 | |||
getProtocols | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getProtocolsRegex | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
5 | |||
hasValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
findValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
fakeTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getNonNativeExtensionTags | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getObjectFactory | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
constructExtConfig | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
tagNeedsNowikiStrippedInTagPF | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
processExtensionModule | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
90 | |||
getExtConfig | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getContentModelHandler | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAnnotationStrippers | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
isExtensionTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isAnnotationTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAnnotationTags | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtensionTagNameMap | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtTagConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExtTagImpl | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
getExtDOMProcessors | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getWt2HtmlLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtml2WtLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createLogger | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
getNoFollowConfig | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExternalLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Config; |
5 | |
6 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
7 | use Monolog\Formatter\LineFormatter; |
8 | use Monolog\Handler\ErrorLogHandler; |
9 | use Monolog\Handler\StreamHandler; |
10 | use Monolog\Logger; |
11 | use Psr\Container\ContainerInterface; |
12 | use Psr\Container\NotFoundExceptionInterface; |
13 | use Psr\Log\LoggerInterface; |
14 | use Psr\Log\LogLevel; |
15 | use Psr\Log\NullLogger; |
16 | use Wikimedia\Assert\Assert; |
17 | use Wikimedia\Bcp47Code\Bcp47Code; |
18 | use Wikimedia\ObjectFactory\ObjectFactory; |
19 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
20 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
21 | use Wikimedia\Parsoid\Core\LinkTarget; |
22 | use Wikimedia\Parsoid\DOM\Document; |
23 | use Wikimedia\Parsoid\Ext\AnnotationStripper; |
24 | use Wikimedia\Parsoid\Ext\ExtensionModule; |
25 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
26 | use Wikimedia\Parsoid\Ext\Gallery\Gallery; |
27 | use Wikimedia\Parsoid\Ext\Indicator\Indicator; |
28 | use Wikimedia\Parsoid\Ext\JSON\JSON; |
29 | use Wikimedia\Parsoid\Ext\LST\LST; |
30 | use Wikimedia\Parsoid\Ext\Nowiki\Nowiki; |
31 | use Wikimedia\Parsoid\Ext\Pre\Pre; |
32 | use Wikimedia\Parsoid\Utils\DOMCompat; |
33 | use Wikimedia\Parsoid\Utils\DOMUtils; |
34 | use Wikimedia\Parsoid\Utils\PHPUtils; |
35 | use Wikimedia\Parsoid\Utils\Utils; |
36 | use Wikimedia\Parsoid\Wikitext\Consts; |
37 | |
38 | /** |
39 | * Site-level configuration interface for Parsoid |
40 | * |
41 | * This includes both global configuration and wiki-level configuration. |
42 | */ |
43 | abstract class SiteConfig { |
44 | /** |
45 | * FIXME: not private so that ParserTests can reset these variables |
46 | * since they reuse site config and other objects between tests for |
47 | * efficiency reasons. |
48 | * |
49 | * @var array|null |
50 | */ |
51 | protected $mwAliases; |
52 | |
53 | /** @var array|null */ |
54 | private $behaviorSwitches; |
55 | |
56 | /** @var array|null */ |
57 | private $variables; |
58 | |
59 | /** @var array|null */ |
60 | private $mediaOptions; |
61 | |
62 | /** @var array|null */ |
63 | protected $functionSynonyms; |
64 | |
65 | /** @var string[] */ |
66 | private $protocolsRegexes = []; |
67 | |
68 | /** |
69 | * FIXME: not private so that ParserTests can reset these variables |
70 | * since they reuse site config and other objects between tests for |
71 | * efficiency reasons. |
72 | * @var array|null |
73 | */ |
74 | protected $interwikiMapNoNamespaces; |
75 | |
76 | /** |
77 | * FIXME: not private so that ParserTests can reset these variables |
78 | * since they reuse site config and other objects between tests for |
79 | * efficiency reasons. |
80 | * @var string|null|bool |
81 | */ |
82 | protected $linkTrailRegex = false; |
83 | |
84 | /** |
85 | * These extension modules provide "core" functionality |
86 | * and their implementations live in the Parsoid repo. |
87 | * |
88 | * @var class-string<ExtensionModule>[] |
89 | */ |
90 | private static $coreExtModules = [ |
91 | // content modules |
92 | JSON::class, |
93 | // extension tags |
94 | Nowiki::class, |
95 | Pre::class, |
96 | Gallery::class, |
97 | Indicator::class, |
98 | // The following implementations will move to their own repositories |
99 | // soon, but for now are implemented in the Parsoid repo. |
100 | LST::class |
101 | ]; |
102 | |
103 | /** |
104 | * Array specifying fully qualified class name for Parsoid-compatible extensions |
105 | * @var ?array<int,ExtensionModule> |
106 | */ |
107 | private $extModules = null; |
108 | /** |
109 | * Private counter to assign IDs to $extModules |
110 | * @var int |
111 | */ |
112 | private $extModuleNextId = 0; |
113 | |
114 | // phpcs:disable Generic.Files.LineLength.TooLong |
115 | |
116 | /** |
117 | * Register a Parsoid extension module. |
118 | * @param string|array{name:string}|array{factory:callable}|array{class:class-string<ExtensionModule>} $configOrSpec |
119 | * Either an object factory specification for an ExtensionModule object, |
120 | * or else the configuration array which ExtensionModule::getConfig() |
121 | * would return. (The latter is preferred, but our internal extensions |
122 | * use the former.) |
123 | * @return int An integer identifier that can be passed to |
124 | * ::unregisterExtensionModule to remove this extension ( |
125 | */ |
126 | final public function registerExtensionModule( $configOrSpec ): int { |
127 | $this->getExtensionModules(); // ensure it's initialized w/ core modules |
128 | if ( is_string( $configOrSpec ) || isset( $configOrSpec['class'] ) || isset( $configOrSpec['factory'] ) ) { |
129 | // Treat this as an object factory spec for an ExtensionModule |
130 | // ObjectFactory::createObject accepts an array, not just a callable (phan bug) |
131 | // @phan-suppress-next-line PhanTypeInvalidCallableArraySize |
132 | $module = $this->getObjectFactory()->createObject( $configOrSpec, [ |
133 | 'allowClassName' => true, |
134 | 'assertClass' => ExtensionModule::class, |
135 | ] ); |
136 | } else { |
137 | // Treat this as a configuration array, create a new anonymous |
138 | // ExtensionModule object for it. |
139 | $module = new class( $configOrSpec ) implements ExtensionModule { |
140 | private $config; |
141 | |
142 | /** @param array $config */ |
143 | public function __construct( $config ) { |
144 | $this->config = $config; |
145 | } |
146 | |
147 | /** @inheritDoc */ |
148 | public function getConfig(): array { |
149 | return $this->config; |
150 | } |
151 | }; |
152 | } |
153 | $extId = $this->extModuleNextId++; |
154 | $this->extModules[$extId] = $module; |
155 | // remove cached extConfig to ensure this registration is picked up |
156 | $this->extConfig = null; |
157 | return $extId; |
158 | } |
159 | |
160 | // phpcs:enable Generic.Files.LineLength.TooLong |
161 | |
162 | /** |
163 | * Unregister a Parsoid extension module. This is typically used |
164 | * only for testing purposes in order to reset a shared SiteConfig |
165 | * to its original configuration. |
166 | * @param int $extId The value returned by the call to |
167 | * ::registerExtensionModule() |
168 | */ |
169 | final public function unregisterExtensionModule( int $extId ): void { |
170 | unset( $this->extModules[$extId] ); |
171 | $this->extConfig = null; // remove cached extConfig |
172 | } |
173 | |
174 | /** |
175 | * Return the set of Parsoid extension modules associated with this |
176 | * SiteConfig. |
177 | * |
178 | * @return ExtensionModule[] |
179 | */ |
180 | final public function getExtensionModules() { |
181 | if ( $this->extModules === null ) { |
182 | $this->extModules = []; |
183 | foreach ( self::$coreExtModules as $m ) { |
184 | $this->extModules[$this->extModuleNextId++] = new $m(); |
185 | } |
186 | } |
187 | return array_values( $this->extModules ); |
188 | } |
189 | |
190 | /** @var LoggerInterface|null */ |
191 | protected $logger = null; |
192 | |
193 | /** @var int */ |
194 | protected $iwMatcherBatchSize = 4096; |
195 | |
196 | /** @var array|null */ |
197 | private $iwMatcher = null; |
198 | |
199 | /** @var bool */ |
200 | protected $addHTMLTemplateParameters = false; |
201 | |
202 | /** @var bool */ |
203 | protected $scrubBidiChars = false; |
204 | |
205 | /** @var bool */ |
206 | protected $linterEnabled = false; |
207 | |
208 | /** @var ?array */ |
209 | protected $extConfig = null; |
210 | |
211 | /** |
212 | * Tag handlers for some extensions currently explicit call unstripNowiki |
213 | * first thing in their handlers. They do this to strip <nowiki>..</nowiki> |
214 | * wrappers around args when encountered in the {{#tag:...}} parser function. |
215 | * However, this strategy won't work for Parsoid which calls the preprocessor |
216 | * to get expanded wikitext. In this mode, <nowiki> wrappers won't be stripped |
217 | * and this leads to functional differences in parsing and output. |
218 | * |
219 | * See T203293 and T299103 for more details. |
220 | * |
221 | * To get around this, T299103 proposes that extensions that require this support |
222 | * set a config flag in their Parsoid extension config. On the Parsoid end, we |
223 | * then let the legacy parser know of these tags. When such extension tags are |
224 | * encountered in the {{#tag:...}} parser function handler (see tagObj function |
225 | * in CoreParserFunctions.php), that handler can than automatically strip these |
226 | * nowiki wrappers on behalf of the extension. |
227 | * |
228 | * This serves two purposes. For one, it lets Parsoid support these extensions |
229 | * in this nowiki use edge case. For another, extensions that register handlers |
230 | * with Parsoid can get rid of explicit calls to unstripNowiki() in the |
231 | * tag handlers for the legacy parser. |
232 | * |
233 | * This property maintains an array of tags that need this support. |
234 | * |
235 | * @var array an associative array of tag names |
236 | */ |
237 | private $t299103Tags = []; |
238 | |
239 | /** |
240 | * Base constructor. |
241 | * |
242 | * This constructor is public because it is used to create mock objects |
243 | * in our test suite. |
244 | */ |
245 | public function __construct() { |
246 | } |
247 | |
248 | /************************************************************************//** |
249 | * @name Global config |
250 | * @{ |
251 | */ |
252 | |
253 | /** |
254 | * General log channel |
255 | * @return LoggerInterface |
256 | */ |
257 | public function getLogger(): LoggerInterface { |
258 | if ( $this->logger === null ) { |
259 | $this->logger = new NullLogger; |
260 | } |
261 | return $this->logger; |
262 | } |
263 | |
264 | /** |
265 | * Set the log channel, for debugging |
266 | * @param ?LoggerInterface $logger |
267 | */ |
268 | public function setLogger( ?LoggerInterface $logger ): void { |
269 | $this->logger = $logger; |
270 | } |
271 | |
272 | /** |
273 | * Default gallery options for this wiki. |
274 | * @return array<string,string|int|bool> |
275 | */ |
276 | public function galleryOptions(): array { |
277 | return [ |
278 | 'imagesPerRow' => 0, |
279 | 'imageWidth' => 120, |
280 | 'imageHeight' => 120, |
281 | 'captionLength' => true, |
282 | 'showBytes' => true, |
283 | 'showDimensions' => true, |
284 | 'mode' => 'traditional', |
285 | ]; |
286 | } |
287 | |
288 | /** |
289 | * When processing template parameters, parse them to HTML and add it to the |
290 | * template parameters data. |
291 | * @return bool |
292 | */ |
293 | public function addHTMLTemplateParameters(): bool { |
294 | return $this->addHTMLTemplateParameters; |
295 | } |
296 | |
297 | /** |
298 | * Statistics aggregator, for counting and timing. |
299 | * |
300 | * @return StatsdDataFactoryInterface|null |
301 | */ |
302 | public function metrics(): ?StatsdDataFactoryInterface { |
303 | return null; |
304 | } |
305 | |
306 | /** |
307 | * If enabled, bidi chars adjacent to category links will be stripped |
308 | * in the html -> wt serialization pass. |
309 | * @return bool |
310 | */ |
311 | public function scrubBidiChars(): bool { |
312 | return $this->scrubBidiChars; |
313 | } |
314 | |
315 | /** @} */ |
316 | |
317 | /************************************************************************//** |
318 | * @name Wiki config |
319 | * @{ |
320 | */ |
321 | |
322 | /** |
323 | * Allowed external image URL prefixes. |
324 | * |
325 | * @return string[] The empty array matches no URLs. The empty string matches |
326 | * all URLs. |
327 | */ |
328 | abstract public function allowedExternalImagePrefixes(): array; |
329 | |
330 | /** |
331 | * Site base URI |
332 | * |
333 | * This would be the URI found in `<base href="..." />`. |
334 | * |
335 | * @return string |
336 | */ |
337 | abstract public function baseURI(): string; |
338 | |
339 | /** |
340 | * Prefix for relative links |
341 | * |
342 | * Prefix to prepend to a page title to link to that page. |
343 | * Intended to be relative to the URI returned by baseURI(). |
344 | * |
345 | * If possible, keep the default "./" so clients need not know this value |
346 | * to extract titles from link hrefs. |
347 | * |
348 | * @return string |
349 | */ |
350 | public function relativeLinkPrefix(): string { |
351 | return './'; |
352 | } |
353 | |
354 | /** |
355 | * Regex matching all double-underscore magic words |
356 | * @return string |
357 | */ |
358 | public function bswPagePropRegexp(): string { |
359 | static $bswPagePropRegexp = null; |
360 | if ( $bswPagePropRegexp === null ) { |
361 | $bswRegexp = $this->bswRegexp(); |
362 | $bswPagePropRegexp = |
363 | '@(?:^|\\s)mw:PageProp/(?:' . |
364 | PHPUtils::reStrip( $bswRegexp, '@' ) . |
365 | ')(?=$|\\s)@uDS'; |
366 | } |
367 | return $bswPagePropRegexp; |
368 | } |
369 | |
370 | /** |
371 | * Map a canonical namespace name to its index |
372 | * |
373 | * @note This replaces canonicalNamespaces |
374 | * @param string $name all-lowercase and with underscores rather than spaces. |
375 | * @return int|null |
376 | */ |
377 | abstract public function canonicalNamespaceId( string $name ): ?int; |
378 | |
379 | /** |
380 | * Map a namespace name to its index |
381 | * |
382 | * @note This replaces canonicalNamespaces |
383 | * @param string $name |
384 | * @return int|null |
385 | */ |
386 | abstract public function namespaceId( string $name ): ?int; |
387 | |
388 | /** |
389 | * Map a namespace index to its preferred name |
390 | * (with spaces, not underscores). |
391 | * |
392 | * @note This replaces namespaceNames |
393 | * @param int $ns |
394 | * @return string|null |
395 | */ |
396 | abstract public function namespaceName( int $ns ): ?string; |
397 | |
398 | /** |
399 | * Test if a namespace has subpages |
400 | * |
401 | * @note This replaces namespacesWithSubpages |
402 | * @param int $ns |
403 | * @return bool |
404 | */ |
405 | abstract public function namespaceHasSubpages( int $ns ): bool; |
406 | |
407 | /** |
408 | * Return namespace case setting |
409 | * @param int $ns |
410 | * @return string 'first-letter' or 'case-sensitive' |
411 | */ |
412 | abstract public function namespaceCase( int $ns ): string; |
413 | |
414 | /** |
415 | * Test if a namespace is a talk namespace |
416 | * |
417 | * @note This replaces title.getNamespace().isATalkNamespace() |
418 | * @param int $ns |
419 | * @return bool |
420 | */ |
421 | public function namespaceIsTalk( int $ns ): bool { |
422 | return $ns > 0 && $ns % 2; |
423 | } |
424 | |
425 | /** |
426 | * Uppercasing method for titles |
427 | * @param string $str |
428 | * @return string |
429 | */ |
430 | public function ucfirst( string $str ): string { |
431 | $o = ord( $str ); |
432 | if ( $o < 96 ) { // if already uppercase... |
433 | return $str; |
434 | } elseif ( $o < 128 ) { |
435 | if ( $str[0] === 'i' && |
436 | in_array( $this->langBcp47()->toBcp47Code(), [ 'az', 'tr', 'kaa', 'kk' ], true ) |
437 | ) { |
438 | return 'Ä°' . mb_substr( $str, 1 ); |
439 | } |
440 | return ucfirst( $str ); // use PHP's ucfirst() |
441 | } else { |
442 | // fall back to more complex logic in case of multibyte strings |
443 | $char = mb_substr( $str, 0, 1 ); |
444 | return mb_strtoupper( $char ) . mb_substr( $str, 1 ); |
445 | } |
446 | } |
447 | |
448 | /** |
449 | * Get the default local name for a special page |
450 | * @param string $alias Special page alias |
451 | * @return string|null |
452 | */ |
453 | abstract public function specialPageLocalName( string $alias ): ?string; |
454 | |
455 | /** |
456 | * Treat language links as magic connectors, not inline links |
457 | * @return bool |
458 | */ |
459 | abstract public function interwikiMagic(): bool; |
460 | |
461 | /** |
462 | * Interwiki link data |
463 | * @return array<string,array> Keys are interwiki prefixes, values are arrays with the following keys: |
464 | * - prefix: (string) The interwiki prefix, same as the key. |
465 | * - url: (string) Target URL, containing a '$1' to be replaced by the interwiki target. |
466 | * - protorel: (bool, optional) Whether the url may be accessed by both http:// and https://. |
467 | * - local: (bool, optional) Whether the interwiki link is considered local (to the wikifarm). |
468 | * - localinterwiki: (bool, optional) Whether the interwiki link points to the current wiki. |
469 | * - language: (bool, optional) Whether the interwiki link is a language link. |
470 | * - extralanglink: (bool, optional) Whether the interwiki link is an "extra language link". |
471 | * - linktext: (string, optional) For "extra language links", the link text. |
472 | * (booleans marked "optional" must be omitted if false) |
473 | */ |
474 | abstract public function interwikiMap(): array; |
475 | |
476 | /** |
477 | * Interwiki link data, after removing items that conflict with namespace names. |
478 | * (In case of such conflict, namespace wins, interwiki is ignored.) |
479 | * @return array<string,array> See interwikiMap() |
480 | */ |
481 | public function interwikiMapNoNamespaces(): array { |
482 | if ( $this->interwikiMapNoNamespaces === null ) { |
483 | $this->interwikiMapNoNamespaces = []; |
484 | foreach ( $this->interwikiMap() as $key => $value ) { |
485 | if ( $this->namespaceId( $key ) === null ) { |
486 | $this->interwikiMapNoNamespaces[$key] = $value; |
487 | } |
488 | } |
489 | } |
490 | return $this->interwikiMapNoNamespaces; |
491 | } |
492 | |
493 | /** |
494 | * Match interwiki URLs |
495 | * @param string $href Link to match against |
496 | * @return string[]|null Two values [ string $key, string $target ] on success, null on no match. |
497 | */ |
498 | public function interwikiMatcher( string $href ): ?array { |
499 | if ( $this->iwMatcher === null ) { |
500 | $keys = [ [], [] ]; |
501 | $patterns = [ [], [] ]; |
502 | foreach ( $this->interwikiMapNoNamespaces() as $key => $iw ) { |
503 | $lang = (int)( !empty( $iw['language'] ) ); |
504 | |
505 | $url = $iw['url']; |
506 | $protocolRelative = substr( $url, 0, 2 ) === '//'; |
507 | if ( !empty( $iw['protorel'] ) ) { |
508 | $url = preg_replace( '/^https?:/', '', $url ); |
509 | $protocolRelative = true; |
510 | } |
511 | |
512 | // full-url match pattern |
513 | $keys[$lang][] = $key; |
514 | $patterns[$lang][] = |
515 | // Support protocol-relative URLs |
516 | ( $protocolRelative ? '(?:https?:)?' : '' ) |
517 | // Convert placeholder to group match |
518 | . strtr( preg_quote( $url, '/' ), [ '\\$1' => '(.*?)' ] ); |
519 | |
520 | if ( !empty( $iw['local'] ) ) { |
521 | // ./$interwikiPrefix:$title and |
522 | // $interwikiPrefix%3A$title shortcuts |
523 | // are recognized and the local wiki forwards |
524 | // these shortcuts to the remote wiki |
525 | |
526 | $keys[$lang][] = $key; |
527 | $patterns[$lang][] = '^\\.\\/' . $iw['prefix'] . ':(.*?)'; |
528 | |
529 | $keys[$lang][] = $key; |
530 | $patterns[$lang][] = '^' . $iw['prefix'] . '%3A(.*?)'; |
531 | } |
532 | } |
533 | |
534 | // Prefer language matches over non-language matches |
535 | $numLangs = count( $keys[1] ); |
536 | $keys = array_merge( $keys[1], $keys[0] ); |
537 | $patterns = array_merge( $patterns[1], $patterns[0] ); |
538 | |
539 | // Chunk patterns into reasonably sized regexes |
540 | $this->iwMatcher = []; |
541 | $batchStart = 0; |
542 | $batchLen = 0; |
543 | foreach ( $patterns as $i => $pat ) { |
544 | $len = strlen( $pat ); |
545 | if ( $i !== $batchStart && $batchLen + $len > $this->iwMatcherBatchSize ) { |
546 | $this->iwMatcher[] = [ |
547 | array_slice( $keys, $batchStart, $i - $batchStart ), |
548 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
549 | $numLangs - $batchStart, |
550 | ]; |
551 | $batchStart = $i; |
552 | $batchLen = $len; |
553 | } else { |
554 | $batchLen += $len; |
555 | } |
556 | } |
557 | $i = count( $patterns ); |
558 | if ( $i > $batchStart ) { |
559 | $this->iwMatcher[] = [ |
560 | array_slice( $keys, $batchStart, $i - $batchStart ), |
561 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
562 | $numLangs - $batchStart, |
563 | ]; |
564 | } |
565 | } |
566 | |
567 | foreach ( $this->iwMatcher as [ $keys, $regex, $numLangs ] ) { |
568 | if ( preg_match( $regex, $href, $m, PREG_UNMATCHED_AS_NULL ) ) { |
569 | foreach ( $keys as $i => $key ) { |
570 | if ( isset( $m[$i + 1] ) ) { |
571 | if ( $i < $numLangs ) { |
572 | // Escape language interwikis with a colon |
573 | $key = ':' . $key; |
574 | } |
575 | return [ $key, $m[$i + 1] ]; |
576 | } |
577 | } |
578 | } |
579 | } |
580 | return null; |
581 | } |
582 | |
583 | /** |
584 | * Wiki identifier, for cache keys. |
585 | * Should match a key in mwApiMap()? |
586 | * @return string |
587 | */ |
588 | abstract public function iwp(): string; |
589 | |
590 | /** |
591 | * Legal title characters |
592 | * |
593 | * Regex is intended to match bytes, not Unicode characters. |
594 | * |
595 | * @return string Regex character class (i.e. the bit that goes inside `[]`) |
596 | */ |
597 | abstract public function legalTitleChars(): string; |
598 | |
599 | /** |
600 | * Link prefix regular expression. |
601 | * @return string|null |
602 | */ |
603 | abstract public function linkPrefixRegex(): ?string; |
604 | |
605 | /** |
606 | * Return raw link trail regexp from config |
607 | * @return string |
608 | */ |
609 | abstract protected function linkTrail(): string; |
610 | |
611 | /** |
612 | * Link trail regular expression. |
613 | * @return string|null |
614 | */ |
615 | public function linkTrailRegex(): ?string { |
616 | if ( $this->linkTrailRegex === false ) { |
617 | $trail = $this->linkTrail(); |
618 | $trail = str_replace( '(.*)$', '', $trail ); |
619 | if ( strpos( $trail, '()' ) !== false ) { |
620 | // Empty regex from zh-hans |
621 | $this->linkTrailRegex = null; |
622 | } else { |
623 | $this->linkTrailRegex = $trail; |
624 | } |
625 | } |
626 | return $this->linkTrailRegex; |
627 | } |
628 | |
629 | /** |
630 | * Wiki language code. |
631 | * @return Bcp47Code BCP-47 language code |
632 | */ |
633 | abstract public function langBcp47(): Bcp47Code; |
634 | |
635 | /** |
636 | * Main page title, as LinkTarget |
637 | * @return LinkTarget |
638 | */ |
639 | abstract public function mainPageLinkTarget(): LinkTarget; |
640 | |
641 | /** |
642 | * Lookup config |
643 | * @param string $key |
644 | * @return mixed|null config value for $key, if present or null, if not. |
645 | */ |
646 | abstract public function getMWConfigValue( string $key ); |
647 | |
648 | /** |
649 | * Whether the wiki language is right-to-left |
650 | * @return bool |
651 | */ |
652 | abstract public function rtl(): bool; |
653 | |
654 | /** |
655 | * Whether language converter is enabled for the specified language |
656 | * @param Bcp47Code $lang |
657 | * @return bool |
658 | */ |
659 | abstract public function langConverterEnabledBcp47( Bcp47Code $lang ): bool; |
660 | |
661 | /** |
662 | * The URL path to index.php. |
663 | * @return string |
664 | */ |
665 | abstract public function script(): string; |
666 | |
667 | /** |
668 | * FIXME: This is only used to compute the modules path below |
669 | * and maybe shouldn't be exposed. |
670 | * |
671 | * The base wiki path |
672 | * @return string |
673 | */ |
674 | abstract public function scriptpath(): string; |
675 | |
676 | /** |
677 | * The base URL of the server. |
678 | * @return string |
679 | */ |
680 | abstract public function server(): string; |
681 | |
682 | /** |
683 | * Export content metadata via meta tags (and via a stylesheet |
684 | * for now to aid some clients). |
685 | * |
686 | * @param Document $document |
687 | * @param ContentMetadataCollector $metadata |
688 | * @param string $defaultTitle The default title to display, as an |
689 | * unescaped string |
690 | * @param Bcp47Code $lang a BCP-47 language code |
691 | */ |
692 | abstract public function exportMetadataToHeadBcp47( |
693 | Document $document, |
694 | ContentMetadataCollector $metadata, |
695 | string $defaultTitle, |
696 | Bcp47Code $lang |
697 | ): void; |
698 | |
699 | /** |
700 | * Helper function to create <head> elements from metadata. |
701 | * @param Document $document |
702 | * @param string $modulesLoadURI |
703 | * @param string[] $modules |
704 | * @param string[] $moduleStyles |
705 | * @param array<string,mixed> $jsConfigVars |
706 | * @param string $htmlTitle The display title, as escaped HTML |
707 | * @param string|Bcp47Code $lang a MediaWiki-internal language code string, |
708 | * or a Bcp47Code object (latter is preferred) |
709 | */ |
710 | protected function exportMetadataHelper( |
711 | Document $document, |
712 | string $modulesLoadURI, |
713 | array $modules, |
714 | array $moduleStyles, |
715 | array $jsConfigVars, |
716 | string $htmlTitle, |
717 | $lang |
718 | ): void { |
719 | $lang = Utils::mwCodeToBcp47( $lang, true, $this->getLogger() ); |
720 | // Display title |
721 | $titleElement = DOMCompat::querySelector( $document, 'title' ); |
722 | if ( !$titleElement ) { |
723 | $titleElement = DOMUtils::appendToHead( $document, 'title' ); |
724 | } |
725 | DOMCompat::setInnerHTML( $titleElement, $htmlTitle ); |
726 | // JsConfigVars |
727 | $content = null; |
728 | try { |
729 | if ( $jsConfigVars ) { |
730 | $content = PHPUtils::jsonEncode( $jsConfigVars ); |
731 | } |
732 | } catch ( \Exception $e ) { |
733 | // Similar to ResourceLoader::makeConfigSetScript. See T289358 |
734 | $this->getLogger()->log( |
735 | LogLevel::WARNING, |
736 | 'JSON serialization of config data failed. ' . |
737 | 'This usually means the config data is not valid UTF-8.' |
738 | ); |
739 | } |
740 | if ( $content ) { |
741 | DOMUtils::appendToHead( $document, 'meta', [ |
742 | 'property' => 'mw:jsConfigVars', |
743 | 'content' => $content, |
744 | ] ); |
745 | } |
746 | // Styles from modules returned from preprocessor / parse requests |
747 | if ( $modules ) { |
748 | // mw:generalModules can be processed via JS (and async) and are usually (but |
749 | // not always) JS scripts. |
750 | DOMUtils::appendToHead( $document, 'meta', [ |
751 | 'property' => 'mw:generalModules', |
752 | 'content' => implode( '|', array_unique( $modules ) ) |
753 | ] ); |
754 | } |
755 | // Styles from modules returned from preprocessor / parse requests |
756 | if ( $moduleStyles ) { |
757 | // mw:moduleStyles are CSS modules that are render-blocking. |
758 | DOMUtils::appendToHead( $document, 'meta', [ |
759 | 'property' => 'mw:moduleStyles', |
760 | 'content' => implode( '|', array_unique( $moduleStyles ) ) |
761 | ] ); |
762 | } |
763 | /* |
764 | * While unnecessary for Wikimedia clients, a stylesheet url in |
765 | * the <head> is useful for clients like Kiwix and others who |
766 | * might not want to process the meta tags to construct the |
767 | * resourceloader url. |
768 | * |
769 | * Given that these clients will be consuming Parsoid HTML outside |
770 | * a MediaWiki skin, the clients are effectively responsible for |
771 | * their own "skin". But, once again, as a courtesy, we are |
772 | * hardcoding the vector skin modules for them. But, note that |
773 | * this may cause page elements to render differently than how |
774 | * they render on Wikimedia sites with the vector skin since this |
775 | * is probably missing a number of other modules. |
776 | * |
777 | * All that said, note that JS-generated parts of the page will |
778 | * still require them to have more intimate knowledge of how to |
779 | * process the JS modules. Except for <graph>s, page content |
780 | * doesn't require JS modules at this point. So, where these |
781 | * clients want to invest in the necessary logic to construct a |
782 | * better resourceloader url, they could simply delete / ignore |
783 | * this stylesheet. |
784 | */ |
785 | $moreStyles = array_merge( $moduleStyles, [ |
786 | 'mediawiki.skinning.content.parsoid', |
787 | // Use the base styles that API output and fallback skin use. |
788 | 'mediawiki.skinning.interface', |
789 | // Make sure to include contents of user generated styles |
790 | // e.g. MediaWiki:Common.css / MediaWiki:Mobile.css |
791 | 'site.styles' |
792 | ] ); |
793 | # need to use MW-internal language code for constructing resource |
794 | # loader path. |
795 | $langMw = Utils::bcp47ToMwCode( $lang ); |
796 | $styleURI = $modulesLoadURI . '?lang=' . $langMw . '&modules=' . |
797 | PHPUtils::encodeURIComponent( implode( '|', array_unique( $moreStyles ) ) ) . |
798 | '&only=styles&skin=vector'; |
799 | DOMUtils::appendToHead( $document, 'link', [ 'rel' => 'stylesheet', 'href' => $styleURI ] ); |
800 | } |
801 | |
802 | /** |
803 | * A regexp matching the localized 'REDIRECT' marker for this wiki. |
804 | * The regexp should be delimited, but should not have boundary anchors |
805 | * or capture groups. |
806 | * @return string |
807 | */ |
808 | abstract public function redirectRegexp(): string; |
809 | |
810 | /** |
811 | * A regexp matching the localized 'Category' prefix for this wiki. |
812 | * The regexp should be delimited, but should not have boundary anchors |
813 | * or capture groups. |
814 | * @return string |
815 | */ |
816 | abstract public function categoryRegexp(): string; |
817 | |
818 | /** |
819 | * A regexp matching localized behavior switches for this wiki. |
820 | * The regexp should be delimited, but should not have boundary anchors |
821 | * or capture groups. |
822 | * @return string |
823 | */ |
824 | abstract public function bswRegexp(): string; |
825 | |
826 | /** |
827 | * A regex matching a line containing just whitespace, comments, and |
828 | * sol transparent links and behavior switches. |
829 | * @return string |
830 | */ |
831 | public function solTransparentWikitextRegexp(): string { |
832 | // cscott sadly says: Note that this depends on the precise |
833 | // localization of the magic words of this particular wiki. |
834 | static $solTransparentWikitextRegexp = null; |
835 | if ( $solTransparentWikitextRegexp === null ) { |
836 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
837 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
838 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
839 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
840 | $solTransparentWikitextRegexp = '@' . |
841 | '^[ \t\n\r\0\x0b]*' . |
842 | '(?:' . |
843 | '(?:' . $redirect . ')' . |
844 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
845 | ')?' . |
846 | '(?:' . |
847 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
848 | '__(?:' . $bswRegexp . ')__|' . |
849 | $comment . '|' . |
850 | '[ \t\n\r\0\x0b]' . |
851 | ')*$@'; |
852 | } |
853 | return $solTransparentWikitextRegexp; |
854 | } |
855 | |
856 | /** |
857 | * A regex matching a line containing just comments and |
858 | * sol transparent links and behavior switches. |
859 | * |
860 | * @param bool $addIncludes |
861 | * @return string |
862 | */ |
863 | public function solTransparentWikitextNoWsRegexp( |
864 | bool $addIncludes = false |
865 | ): string { |
866 | // cscott sadly says: Note that this depends on the precise |
867 | // localization of the magic words of this particular wiki. |
868 | static $solTransparentWikitextNoWsRegexp = null; |
869 | if ( $solTransparentWikitextNoWsRegexp === null ) { |
870 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
871 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
872 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
873 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
874 | $solTransparentWikitextNoWsRegexp = '@' . |
875 | '((?:' . |
876 | '(?:' . $redirect . ')' . |
877 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
878 | ')?' . |
879 | '(?:' . |
880 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
881 | '__(?:' . $bswRegexp . ')__|' . |
882 | $comment . |
883 | // FIXME(SSS): What about onlyinclude and noinclude? |
884 | ( $addIncludes ? '|<includeonly>[\S\s]*?</includeonly>' : '' ) . |
885 | ')*)@'; |
886 | } |
887 | return $solTransparentWikitextNoWsRegexp; |
888 | } |
889 | |
890 | /** |
891 | * The wiki's time zone offset |
892 | * @return int Minutes east of UTC |
893 | */ |
894 | abstract public function timezoneOffset(): int; |
895 | |
896 | /** |
897 | * Language variant information for the given language (or null if |
898 | * unknown). |
899 | * @param Bcp47Code $lang The language for which you want variant information |
900 | * @return ?array{base:Bcp47Code,fallbacks:Bcp47Code[]} an array with |
901 | * two fields: |
902 | * - base: (Bcp47Code) Base BCP-47 language code (e.g. "zh") |
903 | * - fallbacks: (Bcp47Code[]) Fallback variants, as BCP-47 codes |
904 | */ |
905 | abstract public function variantsFor( Bcp47Code $lang ): ?array; |
906 | |
907 | /** |
908 | * Default thumbnail width |
909 | */ |
910 | abstract public function widthOption(): int; |
911 | |
912 | abstract protected function getVariableIDs(): array; |
913 | |
914 | abstract protected function getMagicWords(): array; |
915 | |
916 | /** |
917 | * Does the SiteConfig provide precomputed function synonyms? |
918 | * If no, the SiteConfig is expected to provide an implementation |
919 | * for updateFunctionSynonym. |
920 | */ |
921 | protected function haveComputedFunctionSynonyms(): bool { |
922 | return true; |
923 | } |
924 | |
925 | /** |
926 | * Get a list of precomputed function synonyms |
927 | */ |
928 | protected function getFunctionSynonyms(): array { |
929 | return []; |
930 | } |
931 | |
932 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
933 | throw new \RuntimeException( "Unexpected code path!" ); |
934 | } |
935 | |
936 | private function populateMagicWords() { |
937 | if ( !empty( $this->mwAliases ) ) { |
938 | return; |
939 | } |
940 | |
941 | $this->mwAliases = $this->behaviorSwitches = $this->variables = $this->mediaOptions = []; |
942 | $variablesMap = PHPUtils::makeSet( $this->getVariableIDs() ); |
943 | $this->functionSynonyms = $this->getFunctionSynonyms(); |
944 | $haveSynonyms = $this->haveComputedFunctionSynonyms(); |
945 | foreach ( $this->getMagicWords() as $magicword => $aliases ) { |
946 | $caseSensitive = array_shift( $aliases ); |
947 | $isVariable = isset( $variablesMap[$magicword] ); |
948 | $isMediaOption = preg_match( '/^(img|timedmedia)_/', $magicword ); |
949 | foreach ( $aliases as $alias ) { |
950 | $this->mwAliases[$magicword][] = $alias; |
951 | if ( !$caseSensitive ) { |
952 | $alias = mb_strtolower( $alias ); |
953 | $this->mwAliases[$magicword][] = $alias; |
954 | } |
955 | if ( substr( $alias, 0, 2 ) === '__' ) { |
956 | $this->behaviorSwitches[$alias] = [ $caseSensitive, $magicword ]; |
957 | } |
958 | if ( $isVariable ) { |
959 | $this->variables[$alias] = $magicword; |
960 | } |
961 | if ( $isMediaOption ) { |
962 | $this->mediaOptions[$alias] = [ $caseSensitive, $magicword ]; |
963 | } |
964 | if ( !$haveSynonyms ) { |
965 | $this->updateFunctionSynonym( $alias, $magicword, (bool)$caseSensitive ); |
966 | } |
967 | } |
968 | } |
969 | } |
970 | |
971 | /** |
972 | * List all magic words by canonical name |
973 | * @return string[][] Keys are canonical names, values are arrays of aliases. |
974 | */ |
975 | public function mwAliases(): array { |
976 | $this->populateMagicWords(); |
977 | return $this->mwAliases; |
978 | } |
979 | |
980 | /** |
981 | * Return canonical magic word for a function hook |
982 | * @param string $str |
983 | * @return string|null |
984 | */ |
985 | public function getMagicWordForFunctionHook( string $str ): ?string { |
986 | $this->populateMagicWords(); |
987 | return $this->functionSynonyms[1][$str] ?? |
988 | # Case insensitive functions |
989 | $this->functionSynonyms[0][mb_strtolower( $str )] ?? null; |
990 | } |
991 | |
992 | /** |
993 | * Return canonical magic word for a variable |
994 | * @param string $str |
995 | * @return string|null |
996 | */ |
997 | public function getMagicWordForVariable( string $str ): ?string { |
998 | $this->populateMagicWords(); |
999 | return $this->variables[$str] ?? null; |
1000 | } |
1001 | |
1002 | private static function getMagicWordCanonicalName( array $mws, string $word ): ?string { |
1003 | if ( isset( $mws[$word] ) ) { |
1004 | return $mws[$word][1]; |
1005 | } |
1006 | $mw = $mws[mb_strtolower( $word )] ?? null; |
1007 | return ( $mw && !$mw[0] ) ? $mw[1] : null; |
1008 | } |
1009 | |
1010 | /** |
1011 | * Return canonical magic word for a media option |
1012 | * @param string $word |
1013 | * @return string|null |
1014 | */ |
1015 | public function getMagicWordForMediaOption( string $word ): ?string { |
1016 | $this->populateMagicWords(); |
1017 | return self::getMagicWordCanonicalName( $this->mediaOptions, $word ); |
1018 | } |
1019 | |
1020 | /** |
1021 | * Return canonical magic word for a behavior switch |
1022 | * @param string $word |
1023 | * @return string|null |
1024 | */ |
1025 | public function getMagicWordForBehaviorSwitch( string $word ): ?string { |
1026 | $this->populateMagicWords(); |
1027 | return self::getMagicWordCanonicalName( $this->behaviorSwitches, $word ); |
1028 | } |
1029 | |
1030 | /** |
1031 | * Check if a string is a recognized behavior switch. |
1032 | * |
1033 | * @param string $word |
1034 | * @return bool |
1035 | */ |
1036 | public function isBehaviorSwitch( string $word ): bool { |
1037 | return $this->getMagicWordForBehaviorSwitch( $word ) !== null; |
1038 | } |
1039 | |
1040 | /** |
1041 | * Convert the internal canonical magic word name to the wikitext alias. |
1042 | * @param string $word Canonical magic word name |
1043 | * @param string $suggest Suggested alias (used as fallback and preferred choice) |
1044 | * @return string |
1045 | */ |
1046 | public function getMagicWordWT( string $word, string $suggest ): string { |
1047 | $aliases = $this->mwAliases()[$word] ?? null; |
1048 | if ( !$aliases ) { |
1049 | return $suggest; |
1050 | } |
1051 | $ind = 0; |
1052 | if ( $suggest ) { |
1053 | $ind = array_search( $suggest, $aliases, true ); |
1054 | } |
1055 | return $aliases[$ind ?: 0]; |
1056 | } |
1057 | |
1058 | /** |
1059 | * Get a regexp matching a localized magic word, given its id. |
1060 | * |
1061 | * FIXME: misleading function name |
1062 | * |
1063 | * @param string $id |
1064 | * @return string |
1065 | */ |
1066 | abstract public function getMagicWordMatcher( string $id ): string; |
1067 | |
1068 | /** |
1069 | * Get a matcher function for fetching values out of interpolated magic words, |
1070 | * ie those with `$1` in their aliases. |
1071 | * |
1072 | * The matcher takes a string and returns null if it doesn't match any of |
1073 | * the words, or an associative array if it did match: |
1074 | * - k: The magic word that matched |
1075 | * - v: The value of $1 that was matched |
1076 | * (the JS also returned 'a' with the specific alias that matched, but that |
1077 | * seems to be unused and so is omitted here) |
1078 | * |
1079 | * @param string[] $words Magic words to match |
1080 | * @return callable |
1081 | */ |
1082 | abstract protected function getParameterizedAliasMatcher( array $words ): callable; |
1083 | |
1084 | /** |
1085 | * Get a matcher function for fetching values out of interpolated magic words |
1086 | * which are media prefix options. |
1087 | * |
1088 | * The matcher takes a string and returns null if it doesn't match any of |
1089 | * the words, or an associative array if it did match: |
1090 | * - k: The magic word that matched |
1091 | * - v: The value of $1 that was matched |
1092 | * (the JS also returned 'a' with the specific alias that matched, but that |
1093 | * seems to be unused and so is omitted here) |
1094 | * |
1095 | * @return callable |
1096 | */ |
1097 | final public function getMediaPrefixParameterizedAliasMatcher(): callable { |
1098 | // PORT-FIXME: this shouldn't be a constant, we should fetch these |
1099 | // from the SiteConfig. Further, we probably need a hook here so |
1100 | // Parsoid can handle media options defined in extensions... in |
1101 | // particular timedmedia_* magic words from Extension:TimedMediaHandler |
1102 | $mws = array_keys( Consts::$Media['PrefixOptions'] ); |
1103 | return $this->getParameterizedAliasMatcher( $mws ); |
1104 | } |
1105 | |
1106 | /** |
1107 | * Get the maximum template depth |
1108 | * |
1109 | * @return int |
1110 | */ |
1111 | abstract public function getMaxTemplateDepth(): int; |
1112 | |
1113 | /** |
1114 | * Return name spaces aliases for the NS_SPECIAL namespace |
1115 | * @return array |
1116 | */ |
1117 | abstract protected function getSpecialNSAliases(): array; |
1118 | |
1119 | /** |
1120 | * Return Special Page aliases for a special page name |
1121 | * @param string $specialPage |
1122 | * @return array |
1123 | */ |
1124 | abstract protected function getSpecialPageAliases( string $specialPage ): array; |
1125 | |
1126 | /** |
1127 | * Quote a title regex |
1128 | * |
1129 | * Assumes '/' as the delimiter, and replaces spaces or underscores with |
1130 | * `[ _]` so either will be matched. |
1131 | * |
1132 | * @param string $s |
1133 | * @param string $delimiter Defaults to '/' |
1134 | * @return string |
1135 | */ |
1136 | protected static function quoteTitleRe( string $s, string $delimiter = '/' ): string { |
1137 | $s = preg_quote( $s, $delimiter ); |
1138 | $s = strtr( $s, [ |
1139 | ' ' => '[ _]', |
1140 | '_' => '[ _]', |
1141 | ] ); |
1142 | return $s; |
1143 | } |
1144 | |
1145 | /** |
1146 | * Matcher for ISBN/RFC/PMID URL patterns, returning the type and number. |
1147 | * |
1148 | * The match method takes a string and returns false on no match or a tuple |
1149 | * like this on match: [ 'RFC', '12345' ] |
1150 | * |
1151 | * @return callable |
1152 | */ |
1153 | public function getExtResourceURLPatternMatcher(): callable { |
1154 | $nsAliases = implode( '|', array_unique( $this->getSpecialNSAliases() ) ); |
1155 | $pageAliases = implode( '|', array_map( [ $this, 'quoteTitleRe' ], |
1156 | $this->getSpecialPageAliases( 'Booksources' ) |
1157 | ) ); |
1158 | |
1159 | // cscott wants a mention of T145590 here ("Update Parsoid to be compatible with magic links |
1160 | // being disabled") |
1161 | $pats = [ |
1162 | 'ISBN' => '(?:\.\.?/)*(?i:' . $nsAliases . ')(?:%3[Aa]|:)' |
1163 | . '(?i:' . $pageAliases . ')(?:%2[Ff]|/)(?P<ISBN>\d+[Xx]?)', |
1164 | 'RFC' => '[^/]*//tools\.ietf\.org/html/rfc(?P<RFC>\w+)', |
1165 | 'PMID' => '[^/]*//www\.ncbi\.nlm\.nih\.gov/pubmed/(?P<PMID>\w+)\?dopt=Abstract', |
1166 | ]; |
1167 | $regex = '!^(?:' . implode( '|', $pats ) . ')$!'; |
1168 | return static function ( $text ) use ( $pats, $regex ) { |
1169 | if ( preg_match( $regex, $text, $m ) ) { |
1170 | foreach ( $pats as $k => $re ) { |
1171 | if ( isset( $m[$k] ) && $m[$k] !== '' ) { |
1172 | return [ $k, $m[$k] ]; |
1173 | } |
1174 | } |
1175 | } |
1176 | return false; |
1177 | }; |
1178 | } |
1179 | |
1180 | /** |
1181 | * @return bool |
1182 | */ |
1183 | public function linterEnabled(): bool { |
1184 | return $this->linterEnabled; |
1185 | } |
1186 | |
1187 | /** |
1188 | * Return the desired linter configuration. These are heuristic values |
1189 | * which have hardcoded defaults but could be overridden on a per-wiki |
1190 | * basis. |
1191 | * @return array{enabled?:string[],disabled?:string[],maxTableColumnHeuristic?:int,maxTableRowsToCheck?:int} |
1192 | */ |
1193 | public function getLinterSiteConfig(): array { |
1194 | return [ |
1195 | // Allow list for specific lint types. |
1196 | // Takes precedence over block list. |
1197 | 'enabled' => null, |
1198 | // Block list for specific lint types. |
1199 | // Not used if an allow list is set. |
1200 | 'disabled' => null, |
1201 | // The maximum columns in a table before the table is considered |
1202 | // large |
1203 | 'maxTableColumnHeuristic' => 5, |
1204 | // The maximum rows (header or data) to be checked for the large |
1205 | // table lint |
1206 | // - If we consider the first N rows to be representative of the |
1207 | // table, and the table is well-formed and uniform, it is |
1208 | // sufficent to check the first N rows to check if the table is |
1209 | // "large". |
1210 | // - This heuristic is used together with the |
1211 | // 'maxTableColumnHeuristic' to identify "large tables". |
1212 | 'maxTableRowsToCheck' => 10, |
1213 | // Max length of content covered by 'white-space:nowrap' CSS |
1214 | // that we consider "safe" when Tidy is replaced. Beyond that, |
1215 | // wikitext will have to be fixed up to manually insert whitespace |
1216 | // at the right places. Length in bytes. |
1217 | 'tidyWhitespaceBugMaxLength' => 100, |
1218 | ]; |
1219 | } |
1220 | |
1221 | /** |
1222 | * Serialize ISBN/RFC/PMID URL patterns |
1223 | * |
1224 | * @param string[] $match As returned by the getExtResourceURLPatternMatcher() matcher |
1225 | * @param string $href Fallback link target, if $match is invalid. |
1226 | * @param string $content Link text |
1227 | * @return string |
1228 | */ |
1229 | public function makeExtResourceURL( array $match, string $href, string $content ): string { |
1230 | $normalized = preg_replace( |
1231 | '/[ \x{00A0}\x{1680}\x{2000}-\x{200A}\x{202F}\x{205F}\x{3000}]+/u', ' ', |
1232 | Utils::decodeWtEntities( $content ) |
1233 | ); |
1234 | |
1235 | // TODO: T145590 ("Update Parsoid to be compatible with magic links being disabled") |
1236 | switch ( $match[0] ) { |
1237 | case 'ISBN': |
1238 | $normalized = strtoupper( preg_replace( '/[\- \t]/', '', $normalized ) ); |
1239 | // validate ISBN length and format, so as not to produce magic links |
1240 | // which aren't actually magic |
1241 | $valid = preg_match( '/^ISBN(97[89])?\d{9}(\d|X)$/D', $normalized ); |
1242 | if ( implode( '', $match ) === $normalized && $valid ) { |
1243 | return $content; |
1244 | } |
1245 | // strip "./" prefix. TODO: Use relativeLinkPrefix() instead? |
1246 | $href = PHPUtils::stripPrefix( $href, './' ); |
1247 | return "[[$href|$content]]"; |
1248 | |
1249 | case 'RFC': |
1250 | case 'PMID': |
1251 | $normalized = preg_replace( '/[ \t]/', '', $normalized ); |
1252 | return implode( '', $match ) === $normalized ? $content : "[$href $content]"; |
1253 | |
1254 | default: |
1255 | throw new \InvalidArgumentException( "Invalid match type '{$match[0]}'" ); |
1256 | } |
1257 | } |
1258 | |
1259 | /** |
1260 | * Get the list of valid protocols |
1261 | * @return array |
1262 | */ |
1263 | abstract protected function getProtocols(): array; |
1264 | |
1265 | /** |
1266 | * Get a regex fragment matching URL protocols, quoted for an exclamation |
1267 | * mark delimiter. The case-insensitive option should be used. |
1268 | * |
1269 | * @param bool $excludeProtRel Whether to exclude protocol-relative URLs |
1270 | * @return string |
1271 | */ |
1272 | public function getProtocolsRegex( bool $excludeProtRel = false ) { |
1273 | $excludeProtRel = (int)$excludeProtRel; |
1274 | if ( !isset( $this->protocolsRegexes[$excludeProtRel] ) ) { |
1275 | $parts = []; |
1276 | foreach ( $this->getProtocols() as $protocol ) { |
1277 | if ( !$excludeProtRel || $protocol !== '//' ) { |
1278 | $parts[] = preg_quote( $protocol, '!' ); |
1279 | } |
1280 | } |
1281 | $this->protocolsRegexes[$excludeProtRel] = implode( '|', $parts ); |
1282 | } |
1283 | return $this->protocolsRegexes[$excludeProtRel]; |
1284 | } |
1285 | |
1286 | /** |
1287 | * Matcher for valid protocols, must be anchored at start of string. |
1288 | * @param string $potentialLink |
1289 | * @return bool Whether $potentialLink begins with a valid protocol |
1290 | */ |
1291 | public function hasValidProtocol( string $potentialLink ): bool { |
1292 | $re = '!^(?:' . $this->getProtocolsRegex() . ')!i'; |
1293 | return (bool)preg_match( $re, $potentialLink ); |
1294 | } |
1295 | |
1296 | /** |
1297 | * Matcher for valid protocols, may occur at any point within string. |
1298 | * @param string $potentialLink |
1299 | * @return bool Whether $potentialLink contains a valid protocol |
1300 | */ |
1301 | public function findValidProtocol( string $potentialLink ): bool { |
1302 | $re = '!(?:\W|^)(?:' . $this->getProtocolsRegex() . ')!i'; |
1303 | return (bool)preg_match( $re, $potentialLink ); |
1304 | } |
1305 | |
1306 | /** @} */ |
1307 | |
1308 | /** |
1309 | * Fake timestamp, for unit tests. |
1310 | * @return int|null Unix timestamp, or null to not fake it |
1311 | */ |
1312 | public function fakeTimestamp(): ?int { |
1313 | return null; |
1314 | } |
1315 | |
1316 | /** |
1317 | * Get an array of defined extension tags, with the lower case name in the |
1318 | * key, the value arbitrary. This is the set of extension tags that are |
1319 | * configured in M/W core. $coreExtModules may already be part of it, |
1320 | * but eventually this distinction will disappear since all extension tags |
1321 | * have to be defined against the Parsoid's extension API. |
1322 | * |
1323 | * @return array |
1324 | */ |
1325 | abstract protected function getNonNativeExtensionTags(): array; |
1326 | |
1327 | /** |
1328 | * Return an object factory to use when instantiating extensions. |
1329 | * (This is assumed to be plumbed up to an appropriate service container.) |
1330 | * @return ObjectFactory The object factory to use for extensions |
1331 | */ |
1332 | public function getObjectFactory(): ObjectFactory { |
1333 | // Default implementation returns an object factory with an |
1334 | // empty service container. |
1335 | return new ObjectFactory( new class() implements ContainerInterface { |
1336 | |
1337 | /** |
1338 | * @param string $id |
1339 | * @return never |
1340 | */ |
1341 | public function get( $id ) { |
1342 | throw new class( "Empty service container" ) extends \Error |
1343 | implements NotFoundExceptionInterface { |
1344 | }; |
1345 | } |
1346 | |
1347 | /** |
1348 | * @param string $id |
1349 | * @return false |
1350 | */ |
1351 | public function has( $id ): bool { |
1352 | return false; |
1353 | } |
1354 | } ); |
1355 | } |
1356 | |
1357 | /** |
1358 | * FIXME: might benefit from T250230 (caching) but see T270307 -- |
1359 | * currently SiteConfig::unregisterExtensionModule() is called |
1360 | * during testing, which requires invalidating $this->extConfig. |
1361 | * (See also SiteConfig::fakeTimestamp() etc.) We'd probably need |
1362 | * to more fully separate/mock the "testing SiteConfig" as well |
1363 | * as provide a way for parser options to en/disable individual |
1364 | * registered modules before this class can be considered immutable |
1365 | * and cached. |
1366 | */ |
1367 | private function constructExtConfig() { |
1368 | $this->extConfig = [ |
1369 | 'allTags' => [], |
1370 | 'parsoidExtTags' => [], |
1371 | 'annotationTags' => [], |
1372 | 'domProcessors' => [], |
1373 | 'annotationStrippers' => [], |
1374 | 'contentModels' => [], |
1375 | ]; |
1376 | |
1377 | // There may be some tags defined by the parent wiki which have no |
1378 | // associated parsoid modules; for now we handle these by invoking |
1379 | // the legacy parser. |
1380 | $this->extConfig['allTags'] = $this->getNonNativeExtensionTags(); |
1381 | |
1382 | foreach ( $this->getExtensionModules() as $module ) { |
1383 | $this->processExtensionModule( $module ); |
1384 | } |
1385 | } |
1386 | |
1387 | /** |
1388 | * @param string $lowerTagName |
1389 | * @return bool |
1390 | */ |
1391 | public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool { |
1392 | return isset( $this->t299103Tags[$lowerTagName] ); |
1393 | } |
1394 | |
1395 | /** |
1396 | * Register a Parsoid-compatible extension |
1397 | * @param ExtensionModule $ext |
1398 | */ |
1399 | protected function processExtensionModule( ExtensionModule $ext ): void { |
1400 | Assert::invariant( $this->extConfig !== null, "not yet inited!" ); |
1401 | $extConfig = $ext->getConfig(); |
1402 | Assert::invariant( |
1403 | isset( $extConfig['name'] ), |
1404 | "Every extension module must have a name." |
1405 | ); |
1406 | $name = $extConfig['name']; |
1407 | |
1408 | // These are extension tag handlers. They have |
1409 | // wt2html (sourceToDom), html2wt (domToWikitext), and |
1410 | // linter functionality. |
1411 | foreach ( $extConfig['tags'] ?? [] as $tagConfig ) { |
1412 | $lowerTagName = mb_strtolower( $tagConfig['name'] ); |
1413 | $this->extConfig['allTags'][$lowerTagName] = true; |
1414 | $this->extConfig['parsoidExtTags'][$lowerTagName] = $tagConfig; |
1415 | // Deal with b/c nowiki stripping support needed by some extensions. |
1416 | // This register this tag with the legacy parser for |
1417 | // implicit nowiki stripping in {{#tag:..}} args for this tag. |
1418 | if ( isset( $tagConfig['options']['stripNowiki'] ) ) { |
1419 | $this->t299103Tags[$lowerTagName] = true; |
1420 | } |
1421 | } |
1422 | |
1423 | if ( isset( $extConfig['annotations'] ) ) { |
1424 | $annotationConfig = $extConfig['annotations']; |
1425 | $annotationTags = $annotationConfig['tagNames'] ?? $annotationConfig; |
1426 | foreach ( $annotationTags ?? [] as $aTag ) { |
1427 | $lowerTagName = mb_strtolower( $aTag ); |
1428 | $this->extConfig['allTags'][$lowerTagName] = true; |
1429 | $this->extConfig['annotationTags'][$lowerTagName] = true; |
1430 | } |
1431 | if ( isset( $annotationConfig['annotationStripper'] ) ) { |
1432 | $obj = $this->getObjectFactory()->createObject( $annotationConfig['annotationStripper'], [ |
1433 | 'allowClassName' => true, |
1434 | 'assertClass' => AnnotationStripper::class, |
1435 | ] ); |
1436 | $this->extConfig['annotationStrippers'][$name] = $obj; |
1437 | } |
1438 | } |
1439 | |
1440 | // Extension modules may also register dom processors. |
1441 | // This is for wt2htmlPostProcessor and html2wtPreProcessor |
1442 | // functionality. |
1443 | if ( isset( $extConfig['domProcessors'] ) ) { |
1444 | $this->extConfig['domProcessors'][$name] = $extConfig['domProcessors']; |
1445 | } |
1446 | |
1447 | foreach ( $extConfig['contentModels'] ?? [] as $cm => $spec ) { |
1448 | // For compatibility with mediawiki core, the first |
1449 | // registered extension wins. |
1450 | if ( isset( $this->extConfig['contentModels'][$cm] ) ) { |
1451 | continue; |
1452 | } |
1453 | $handler = $this->getObjectFactory()->createObject( $spec, [ |
1454 | 'allowClassName' => true, |
1455 | 'assertClass' => ContentModelHandler::class, |
1456 | ] ); |
1457 | $this->extConfig['contentModels'][$cm] = $handler; |
1458 | } |
1459 | } |
1460 | |
1461 | protected function getExtConfig(): array { |
1462 | if ( !$this->extConfig ) { |
1463 | $this->constructExtConfig(); |
1464 | } |
1465 | return $this->extConfig; |
1466 | } |
1467 | |
1468 | /** |
1469 | * Return a ContentModelHandler for the specified $contentmodel, if one is registered. |
1470 | * If null is returned, will use the default wikitext content model handler. |
1471 | * |
1472 | * @param string $contentmodel |
1473 | * @return ContentModelHandler|null |
1474 | */ |
1475 | public function getContentModelHandler( string $contentmodel ): ?ContentModelHandler { |
1476 | return ( $this->getExtConfig() )['contentModels'][$contentmodel] ?? null; |
1477 | } |
1478 | |
1479 | /** |
1480 | * Returns all the annotationStrippers that are defined as annotation configuration |
1481 | * @return array<AnnotationStripper> |
1482 | */ |
1483 | public function getAnnotationStrippers(): array { |
1484 | $res = $this->getExtConfig()['annotationStrippers'] ?? []; |
1485 | // ensures stability of the method list order |
1486 | ksort( $res ); |
1487 | return array_values( $res ); |
1488 | } |
1489 | |
1490 | /** |
1491 | * Determine whether a given name, which must have already been converted |
1492 | * to lower case, is a valid extension tag name. |
1493 | * |
1494 | * @param string $name |
1495 | * @return bool |
1496 | */ |
1497 | public function isExtensionTag( string $name ): bool { |
1498 | return isset( $this->getExtensionTagNameMap()[$name] ); |
1499 | } |
1500 | |
1501 | /** |
1502 | * @param string $tagName is $tagName an annotation tag? |
1503 | * @return bool |
1504 | */ |
1505 | public function isAnnotationTag( string $tagName ): bool { |
1506 | return $this->getExtConfig()['annotationTags'][mb_strtolower( $tagName )] ?? false; |
1507 | } |
1508 | |
1509 | /** |
1510 | * Get an array of defined annotation tags in lower case |
1511 | * @return array |
1512 | */ |
1513 | public function getAnnotationTags(): array { |
1514 | $extConfig = $this->getExtConfig(); |
1515 | return array_keys( $extConfig['annotationTags'] ); |
1516 | } |
1517 | |
1518 | /** |
1519 | * Get an array of defined extension tags, with the lower case name |
1520 | * in the key, and the value being arbitrary. |
1521 | * |
1522 | * @return array |
1523 | */ |
1524 | public function getExtensionTagNameMap(): array { |
1525 | $extConfig = $this->getExtConfig(); |
1526 | return $extConfig['allTags']; |
1527 | } |
1528 | |
1529 | /** |
1530 | * @param string $tagName Extension tag name |
1531 | * @return array|null |
1532 | */ |
1533 | public function getExtTagConfig( string $tagName ): ?array { |
1534 | $extConfig = $this->getExtConfig(); |
1535 | return $extConfig['parsoidExtTags'][mb_strtolower( $tagName )] ?? null; |
1536 | } |
1537 | |
1538 | private $tagHandlerCache = []; |
1539 | |
1540 | /** |
1541 | * @param string $tagName Extension tag name |
1542 | * @return ExtensionTagHandler|null |
1543 | * Returns the implementation of the named extension, if there is one. |
1544 | */ |
1545 | public function getExtTagImpl( string $tagName ): ?ExtensionTagHandler { |
1546 | if ( !array_key_exists( $tagName, $this->tagHandlerCache ) ) { |
1547 | $tagConfig = $this->getExtTagConfig( $tagName ); |
1548 | $this->tagHandlerCache[$tagName] = isset( $tagConfig['handler'] ) ? |
1549 | $this->getObjectFactory()->createObject( $tagConfig['handler'], [ |
1550 | 'allowClassName' => true, |
1551 | 'assertClass' => ExtensionTagHandler::class, |
1552 | ] ) : null; |
1553 | } |
1554 | |
1555 | return $this->tagHandlerCache[$tagName]; |
1556 | } |
1557 | |
1558 | /** |
1559 | * Return an array mapping extension name to an array of object factory |
1560 | * specs for Ext\DOMProcessor objects |
1561 | * @return array |
1562 | */ |
1563 | public function getExtDOMProcessors(): array { |
1564 | $extConfig = $this->getExtConfig(); |
1565 | return $extConfig['domProcessors']; |
1566 | } |
1567 | |
1568 | /** @var array<string,int> */ |
1569 | protected $wt2htmlLimits = [ |
1570 | // We won't handle pages beyond this size |
1571 | 'wikitextSize' => 2048 * 1024, // ParserOptions::maxIncludeSize |
1572 | |
1573 | // Max list items per page |
1574 | 'listItem' => 30000, |
1575 | |
1576 | // Max table cells per page |
1577 | 'tableCell' => 30000, |
1578 | |
1579 | // Max transclusions per page |
1580 | 'transclusion' => 10000, |
1581 | |
1582 | // DISABLED for now |
1583 | // Max images per page |
1584 | 'image' => 1000, |
1585 | |
1586 | // Max top-level token size |
1587 | 'token' => 1000000, // 1M |
1588 | ]; |
1589 | |
1590 | /** |
1591 | * @return array<string,int> |
1592 | */ |
1593 | public function getWt2HtmlLimits(): array { |
1594 | return $this->wt2htmlLimits; |
1595 | } |
1596 | |
1597 | /** @var array<string,int> */ |
1598 | protected $html2wtLimits = [ |
1599 | // We refuse to serialize HTML strings bigger than this |
1600 | 'htmlSize' => 10000000, // 10M |
1601 | ]; |
1602 | |
1603 | /** |
1604 | * @return array<string,int> |
1605 | */ |
1606 | public function getHtml2WtLimits(): array { |
1607 | return $this->html2wtLimits; |
1608 | } |
1609 | |
1610 | /** |
1611 | * @param ?string $filePath File to log to (if null, logs to console) |
1612 | * @return Logger |
1613 | */ |
1614 | public static function createLogger( ?string $filePath = null ): Logger { |
1615 | // Use Monolog's PHP console handler |
1616 | $logger = new Logger( "Parsoid CLI" ); |
1617 | $format = '%message%'; |
1618 | if ( $filePath ) { |
1619 | $handler = new StreamHandler( $filePath ); |
1620 | $format .= "\n"; |
1621 | } else { |
1622 | $handler = new ErrorLogHandler(); |
1623 | } |
1624 | // Don't suppress inline newlines |
1625 | $handler->setFormatter( new LineFormatter( $format, null, true ) ); |
1626 | $logger->pushHandler( $handler ); |
1627 | |
1628 | if ( $filePath ) { |
1629 | // Separator between logs since StreamHandler appends |
1630 | $logger->log( Logger::INFO, "-------------- starting fresh log --------------" ); |
1631 | } |
1632 | |
1633 | return $logger; |
1634 | } |
1635 | |
1636 | abstract public function getNoFollowConfig(): array; |
1637 | |
1638 | /** @return string|false */ |
1639 | abstract public function getExternalLinkTarget(); |
1640 | } |