Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
50.83% |
244 / 480 |
|
29.85% |
20 / 67 |
CRAP | |
0.00% |
0 / 1 |
| SiteConfig | |
50.83% |
244 / 480 |
|
29.85% |
20 / 67 |
3400.79 | |
0.00% |
0 / 1 |
| registerExtensionModule | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 | |||
| unregisterExtensionModule | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| getExtensionModules | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getLogger | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| galleryOptions | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
| addHTMLTemplateParameters | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| metrics | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| incrementCounter | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| observeTiming | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| scrubBidiChars | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| allowedExternalImagePrefixes | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| baseURI | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| relativeLinkPrefix | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| bswPagePropRegexp | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
| canonicalNamespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| namespaceId | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| namespaceName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| namespaceHasSubpages | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| namespaceCase | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| namespaceIsTalk | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| ucfirst | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
6.02 | |||
| specialPageLocalName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| interwikiMagic | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| magicLinkEnabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| interwikiMap | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| interwikiMapNoNamespaces | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
| interwikiMatcher | |
100.00% |
53 / 53 |
|
100.00% |
1 / 1 |
15 | |||
| iwp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| legalTitleChars | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| linkPrefixRegex | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| linkTrail | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| linkTrailRegex | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| langBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| mainPageLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getMWConfigValue | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| rtl | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| langConverterEnabledBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| script | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| scriptpath | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| server | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| exportMetadataToHeadBcp47 | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| exportMetadataHelper | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
42 | |||
| redirectRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| categoryRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| bswRegexp | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| solTransparentWikitextRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
| solTransparentWikitextNoWsRegexp | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 | |||
| timezoneOffset | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| variantsFor | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| widthOption | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getVariableIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getMagicWords | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| haveComputedFunctionSynonyms | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getFunctionSynonyms | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| updateFunctionSynonym | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| resetMagicWords | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| populateMagicWords | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
9 | |||
| mwAliases | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getMagicWordForParserFunction | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
| getMagicWordForVariable | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getMagicWordCanonicalName | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
| getMagicWordForMediaOption | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getMagicWordForBehaviorSwitch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| isBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getMagicWordWT | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| getMagicWordMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getParameterizedAliasMatcher | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getMediaPrefixParameterizedAliasMatcher | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getMaxTemplateDepth | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getSpecialNSAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getSpecialPageAliases | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| quoteTitleRe | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| getExtResourceURLPatternMatcher | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
7 | |||
| linterEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getLinterSiteConfig | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| makeExtResourceURL | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
8 | |||
| getProtocols | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getProtocolsRegex | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
5 | |||
| hasValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| findValidProtocol | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| fakeTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getNonNativeExtensionTags | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getObjectFactory | |
33.33% |
2 / 6 |
|
0.00% |
0 / 1 |
1.30 | |||
| shouldValidateExtConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| constructExtConfig | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
| tagNeedsNowikiStrippedInTagPF | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getExtensionModuleSchema | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
| processExtensionModule | |
56.52% |
39 / 69 |
|
0.00% |
0 / 1 |
40.75 | |||
| getExtConfig | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| getContentModelHandler | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getAnnotationStrippers | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| isExtensionTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isAnnotationTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getAnnotationTags | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getExtensionTagNameMap | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getExtTagConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getExtTagImpl | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| getPFragmentHandlerKeys | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getPFragmentHandlerConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getPFragmentHandlerImpl | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| getExtDOMProcessors | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getAsyncFallbackMessageKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getWt2HtmlLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getHtml2WtLimits | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| createLogger | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
| getNoFollowConfig | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getExternalLinkTarget | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Config; |
| 5 | |
| 6 | use JsonSchema\Constraints\Constraint; |
| 7 | use JsonSchema\Validator; |
| 8 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
| 9 | use Monolog\Formatter\LineFormatter; |
| 10 | use Monolog\Handler\ErrorLogHandler; |
| 11 | use Monolog\Handler\StreamHandler; |
| 12 | use Monolog\Logger; |
| 13 | use Psr\Container\ContainerInterface; |
| 14 | use Psr\Container\NotFoundExceptionInterface; |
| 15 | use Psr\Log\LoggerInterface; |
| 16 | use Psr\Log\LogLevel; |
| 17 | use Psr\Log\NullLogger; |
| 18 | use Wikimedia\Assert\Assert; |
| 19 | use Wikimedia\Bcp47Code\Bcp47Code; |
| 20 | use Wikimedia\ObjectFactory\ObjectFactory; |
| 21 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
| 22 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
| 23 | use Wikimedia\Parsoid\Core\LinkTarget; |
| 24 | use Wikimedia\Parsoid\DOM\Document; |
| 25 | use Wikimedia\Parsoid\Ext\AnnotationStripper; |
| 26 | use Wikimedia\Parsoid\Ext\ExtensionModule; |
| 27 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
| 28 | use Wikimedia\Parsoid\Ext\Gallery\Gallery; |
| 29 | use Wikimedia\Parsoid\Ext\Indicator\Indicator; |
| 30 | use Wikimedia\Parsoid\Ext\JSON\JSON; |
| 31 | use Wikimedia\Parsoid\Ext\Nowiki\Nowiki; |
| 32 | use Wikimedia\Parsoid\Ext\PFragmentHandler; |
| 33 | use Wikimedia\Parsoid\Ext\Pre\Pre; |
| 34 | use Wikimedia\Parsoid\Fragments\PFragment; |
| 35 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 36 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 37 | use Wikimedia\Parsoid\Utils\Utils; |
| 38 | use Wikimedia\Parsoid\Wikitext\Consts; |
| 39 | |
| 40 | /** |
| 41 | * Site-level configuration interface for Parsoid |
| 42 | * |
| 43 | * This includes both global configuration and wiki-level configuration. |
| 44 | */ |
| 45 | abstract class SiteConfig { |
| 46 | /** |
| 47 | * FIXME: not private so that ParserTests can reset these variables |
| 48 | * since they reuse site config and other objects between tests for |
| 49 | * efficiency reasons. |
| 50 | * |
| 51 | * @var array|null |
| 52 | */ |
| 53 | protected $mwAliases; |
| 54 | |
| 55 | /** @var array|null */ |
| 56 | private $behaviorSwitches; |
| 57 | |
| 58 | /** @var array|null */ |
| 59 | private $variables; |
| 60 | |
| 61 | /** @var array|null */ |
| 62 | private $mediaOptions; |
| 63 | |
| 64 | /** |
| 65 | * @var array{0:array<string,string>,1:array<string,string>} |
| 66 | * Localized aliases for legacy parser functions. |
| 67 | */ |
| 68 | protected array $functionSynonyms = [ [], [], ]; |
| 69 | |
| 70 | /** |
| 71 | * @var array{0:array<string,string>,1:array<string,string>} |
| 72 | * Localized aliases for parser functions defined with PFragment handlers. |
| 73 | */ |
| 74 | protected array $pFragmentHandlerFuncSynonyms = [ [], [] ]; |
| 75 | |
| 76 | /** @var string[] */ |
| 77 | private $protocolsRegexes = []; |
| 78 | |
| 79 | /** |
| 80 | * FIXME: not private so that ParserTests can reset these variables |
| 81 | * since they reuse site config and other objects between tests for |
| 82 | * efficiency reasons. |
| 83 | * @var array|null |
| 84 | */ |
| 85 | protected $interwikiMapNoNamespaces; |
| 86 | |
| 87 | /** |
| 88 | * FIXME: not private so that ParserTests can reset these variables |
| 89 | * since they reuse site config and other objects between tests for |
| 90 | * efficiency reasons. |
| 91 | * @var string|null|bool |
| 92 | */ |
| 93 | protected $linkTrailRegex = false; |
| 94 | |
| 95 | /** |
| 96 | * These extension modules provide "core" functionality |
| 97 | * and their implementations live in the Parsoid repo. |
| 98 | * |
| 99 | * @var class-string<ExtensionModule>[] |
| 100 | */ |
| 101 | private static $coreExtModules = [ |
| 102 | // content modules |
| 103 | JSON::class, |
| 104 | // extension tags |
| 105 | Nowiki::class, |
| 106 | Pre::class, |
| 107 | Gallery::class, |
| 108 | Indicator::class, |
| 109 | ]; |
| 110 | |
| 111 | /** |
| 112 | * Array mapping parsoid internal extension ID to ExtensionModule object. |
| 113 | * @var ?array<int,ExtensionModule> |
| 114 | */ |
| 115 | private ?array $extModules = null; |
| 116 | |
| 117 | /** |
| 118 | * Array mapping parsoid internal extension ID to mediawiki Extension |
| 119 | * name. |
| 120 | * @var array<int,string> |
| 121 | */ |
| 122 | private array $extModuleOwner = []; |
| 123 | |
| 124 | /** |
| 125 | * Private counter to assign IDs to $extModules / $extModuleOwner |
| 126 | */ |
| 127 | private int $extModuleNextId = 0; |
| 128 | |
| 129 | // phpcs:disable Generic.Files.LineLength.TooLong |
| 130 | |
| 131 | /** |
| 132 | * Register a Parsoid extension module. |
| 133 | * @param string|array{name:string}|array{factory:callable}|array{class:class-string<ExtensionModule>} $configOrSpec |
| 134 | * Either an object factory specification for an ExtensionModule object, |
| 135 | * or else the configuration array which ExtensionModule::getConfig() |
| 136 | * would return. (The latter is preferred, but our internal extensions |
| 137 | * use the former.) |
| 138 | * @return int An integer identifier that can be passed to |
| 139 | * ::unregisterExtensionModule to remove this extension ( |
| 140 | */ |
| 141 | final public function registerExtensionModule( $configOrSpec ): int { |
| 142 | $this->getExtensionModules(); // ensure it's initialized w/ core modules |
| 143 | // MediaWiki core passes along the name of the MediaWiki extension |
| 144 | // which registered this extension module as 'extension-name' |
| 145 | $extensionName = null; |
| 146 | if ( !is_string( $configOrSpec ) ) { |
| 147 | $extensionName = $configOrSpec['extension-name'] ?? null; |
| 148 | unset( $configOrSpec['extension-name'] ); |
| 149 | } |
| 150 | |
| 151 | if ( is_string( $configOrSpec ) || isset( $configOrSpec['class'] ) || isset( $configOrSpec['factory'] ) ) { |
| 152 | // Treat this as an object factory spec for an ExtensionModule |
| 153 | // ObjectFactory::createObject accepts an array, not just a callable (phan bug) |
| 154 | // @phan-suppress-next-line PhanTypeInvalidCallableArraySize |
| 155 | $module = $this->getObjectFactory()->createObject( $configOrSpec, [ |
| 156 | 'allowClassName' => true, |
| 157 | 'assertClass' => ExtensionModule::class, |
| 158 | ] ); |
| 159 | } else { |
| 160 | // Treat this as a configuration array, create a new anonymous |
| 161 | // ExtensionModule object for it. |
| 162 | $module = new class( $configOrSpec ) implements ExtensionModule { |
| 163 | private $config; |
| 164 | |
| 165 | /** @param array $config */ |
| 166 | public function __construct( $config ) { |
| 167 | $this->config = $config; |
| 168 | } |
| 169 | |
| 170 | /** @inheritDoc */ |
| 171 | public function getConfig(): array { |
| 172 | return $this->config; |
| 173 | } |
| 174 | }; |
| 175 | } |
| 176 | $extId = $this->extModuleNextId++; |
| 177 | $this->extModules[$extId] = $module; |
| 178 | $this->extModuleOwner[$extId] = $extensionName; |
| 179 | // remove cached extConfig to ensure this registration is picked up |
| 180 | $this->extConfig = null; |
| 181 | return $extId; |
| 182 | } |
| 183 | |
| 184 | // phpcs:enable Generic.Files.LineLength.TooLong |
| 185 | |
| 186 | /** |
| 187 | * Unregister a Parsoid extension module. This is typically used |
| 188 | * only for testing purposes in order to reset a shared SiteConfig |
| 189 | * to its original configuration. |
| 190 | * @param int $extId The value returned by the call to |
| 191 | * ::registerExtensionModule() |
| 192 | */ |
| 193 | final public function unregisterExtensionModule( int $extId ): void { |
| 194 | unset( $this->extModules[$extId] ); |
| 195 | unset( $this->extModuleOwner[$extId] ); |
| 196 | // remove cached extConfig; this will also regenerate |
| 197 | // magic word synonyms from the still-registered modules. |
| 198 | $this->extConfig = null; |
| 199 | } |
| 200 | |
| 201 | /** |
| 202 | * Return the set of Parsoid extension modules associated with this |
| 203 | * SiteConfig. |
| 204 | * |
| 205 | * @return ExtensionModule[] |
| 206 | */ |
| 207 | final public function getExtensionModules() { |
| 208 | if ( $this->extModules === null ) { |
| 209 | $this->extModules = []; |
| 210 | foreach ( self::$coreExtModules as $m ) { |
| 211 | $extId = $this->extModuleNextId++; |
| 212 | $this->extModules[$extId] = new $m(); |
| 213 | $this->extModuleOwner[$extId] = null; // native module |
| 214 | } |
| 215 | } |
| 216 | return array_values( $this->extModules ); |
| 217 | } |
| 218 | |
| 219 | /** @var LoggerInterface|null */ |
| 220 | protected $logger = null; |
| 221 | |
| 222 | /** @var int */ |
| 223 | protected $iwMatcherBatchSize = 4096; |
| 224 | |
| 225 | /** @var array|null */ |
| 226 | protected $iwMatcher = null; |
| 227 | |
| 228 | /** @var bool */ |
| 229 | protected $addHTMLTemplateParameters = false; |
| 230 | |
| 231 | /** @var bool */ |
| 232 | protected $scrubBidiChars = false; |
| 233 | |
| 234 | /** @var bool */ |
| 235 | protected $linterEnabled = false; |
| 236 | |
| 237 | /** @var ?array */ |
| 238 | protected $extConfig = null; |
| 239 | |
| 240 | /** |
| 241 | * Tag handlers for some extensions currently explicit call unstripNowiki |
| 242 | * first thing in their handlers. They do this to strip <nowiki>..</nowiki> |
| 243 | * wrappers around args when encountered in the {{#tag:...}} parser function. |
| 244 | * However, this strategy won't work for Parsoid which calls the preprocessor |
| 245 | * to get expanded wikitext. In this mode, <nowiki> wrappers won't be stripped |
| 246 | * and this leads to functional differences in parsing and output. |
| 247 | * |
| 248 | * See T203293 and T299103 for more details. |
| 249 | * |
| 250 | * To get around this, T299103 proposes that extensions that require this support |
| 251 | * set a config flag in their Parsoid extension config. On the Parsoid end, we |
| 252 | * then let the legacy parser know of these tags. When such extension tags are |
| 253 | * encountered in the {{#tag:...}} parser function handler (see tagObj function |
| 254 | * in CoreParserFunctions.php), that handler can than automatically strip these |
| 255 | * nowiki wrappers on behalf of the extension. |
| 256 | * |
| 257 | * This serves two purposes. For one, it lets Parsoid support these extensions |
| 258 | * in this nowiki use edge case. For another, extensions that register handlers |
| 259 | * with Parsoid can get rid of explicit calls to unstripNowiki() in the |
| 260 | * tag handlers for the legacy parser. |
| 261 | * |
| 262 | * This property maintains an array of tags that need this support. |
| 263 | * |
| 264 | * @var array an associative array of tag names |
| 265 | */ |
| 266 | private $t299103Tags = []; |
| 267 | |
| 268 | /** |
| 269 | * Base constructor. |
| 270 | * |
| 271 | * This constructor is public because it is used to create mock objects |
| 272 | * in our test suite. |
| 273 | */ |
| 274 | public function __construct() { |
| 275 | } |
| 276 | |
| 277 | /************************************************************************//** |
| 278 | * @name Global config |
| 279 | * @{ |
| 280 | */ |
| 281 | |
| 282 | /** |
| 283 | * General log channel |
| 284 | * @return LoggerInterface |
| 285 | */ |
| 286 | public function getLogger(): LoggerInterface { |
| 287 | if ( $this->logger === null ) { |
| 288 | $this->logger = new NullLogger; |
| 289 | } |
| 290 | return $this->logger; |
| 291 | } |
| 292 | |
| 293 | /** |
| 294 | * Set the log channel, for debugging |
| 295 | * @param ?LoggerInterface $logger |
| 296 | */ |
| 297 | public function setLogger( ?LoggerInterface $logger ): void { |
| 298 | $this->logger = $logger; |
| 299 | } |
| 300 | |
| 301 | /** |
| 302 | * Default gallery options for this wiki. |
| 303 | * @return array<string,string|int|bool> |
| 304 | */ |
| 305 | public function galleryOptions(): array { |
| 306 | return [ |
| 307 | 'imagesPerRow' => 0, |
| 308 | 'imageWidth' => 120, |
| 309 | 'imageHeight' => 120, |
| 310 | 'captionLength' => true, |
| 311 | 'showBytes' => true, |
| 312 | 'showDimensions' => true, |
| 313 | 'mode' => 'traditional', |
| 314 | ]; |
| 315 | } |
| 316 | |
| 317 | /** |
| 318 | * When processing template parameters, parse them to HTML and add it to the |
| 319 | * template parameters data. |
| 320 | * @return bool |
| 321 | */ |
| 322 | public function addHTMLTemplateParameters(): bool { |
| 323 | return $this->addHTMLTemplateParameters; |
| 324 | } |
| 325 | |
| 326 | /** |
| 327 | * Statistics aggregator, for counting and timing. |
| 328 | * |
| 329 | * @return StatsdDataFactoryInterface|null |
| 330 | */ |
| 331 | public function metrics(): ?StatsdDataFactoryInterface { |
| 332 | return null; |
| 333 | } |
| 334 | |
| 335 | /** |
| 336 | * Increment a counter metric |
| 337 | * @param string $name |
| 338 | * @param array $labels |
| 339 | * @param float $amount |
| 340 | * @return void |
| 341 | */ |
| 342 | abstract public function incrementCounter( string $name, array $labels, float $amount = 1 ); |
| 343 | |
| 344 | /** |
| 345 | * Record a timing metric. |
| 346 | * |
| 347 | * Note that the value should be provided in *milliseconds* even though |
| 348 | * the name of the metric may end (by convention) in `_seconds`. The |
| 349 | * metrics infrastructure will make the appropriate conversion. |
| 350 | * |
| 351 | * @param string $name |
| 352 | * @param float $value A timing value *in milliseconds* |
| 353 | * @param array $labels |
| 354 | * @return void |
| 355 | */ |
| 356 | abstract public function observeTiming( string $name, float $value, array $labels ); |
| 357 | |
| 358 | /** |
| 359 | * If enabled, bidi chars adjacent to category links will be stripped |
| 360 | * in the html -> wt serialization pass. |
| 361 | * @return bool |
| 362 | */ |
| 363 | public function scrubBidiChars(): bool { |
| 364 | return $this->scrubBidiChars; |
| 365 | } |
| 366 | |
| 367 | /** @} */ |
| 368 | |
| 369 | /************************************************************************//** |
| 370 | * @name Wiki config |
| 371 | * @{ |
| 372 | */ |
| 373 | |
| 374 | /** |
| 375 | * Allowed external image URL prefixes. |
| 376 | * |
| 377 | * @return string[] The empty array matches no URLs. The empty string matches |
| 378 | * all URLs. |
| 379 | */ |
| 380 | abstract public function allowedExternalImagePrefixes(): array; |
| 381 | |
| 382 | /** |
| 383 | * Site base URI |
| 384 | * |
| 385 | * This would be the URI found in `<base href="..." />`. |
| 386 | * |
| 387 | * @return string |
| 388 | */ |
| 389 | abstract public function baseURI(): string; |
| 390 | |
| 391 | /** |
| 392 | * Prefix for relative links |
| 393 | * |
| 394 | * Prefix to prepend to a page title to link to that page. |
| 395 | * Intended to be relative to the URI returned by baseURI(). |
| 396 | * |
| 397 | * If possible, keep the default "./" so clients need not know this value |
| 398 | * to extract titles from link hrefs. |
| 399 | * |
| 400 | * @return string |
| 401 | */ |
| 402 | public function relativeLinkPrefix(): string { |
| 403 | return './'; |
| 404 | } |
| 405 | |
| 406 | /** |
| 407 | * Regex matching all double-underscore magic words |
| 408 | * @return string |
| 409 | */ |
| 410 | public function bswPagePropRegexp(): string { |
| 411 | static $bswPagePropRegexp = null; |
| 412 | if ( $bswPagePropRegexp === null ) { |
| 413 | $bswRegexp = $this->bswRegexp(); |
| 414 | $bswPagePropRegexp = |
| 415 | '@(?:^|\\s)mw:PageProp/(?:' . |
| 416 | PHPUtils::reStrip( $bswRegexp, '@' ) . |
| 417 | ')(?=$|\\s)@uDS'; |
| 418 | } |
| 419 | return $bswPagePropRegexp; |
| 420 | } |
| 421 | |
| 422 | /** |
| 423 | * Map a canonical namespace name to its index |
| 424 | * |
| 425 | * @note This replaces canonicalNamespaces |
| 426 | * @param string $name all-lowercase and with underscores rather than spaces. |
| 427 | * @return int|null |
| 428 | */ |
| 429 | abstract public function canonicalNamespaceId( string $name ): ?int; |
| 430 | |
| 431 | /** |
| 432 | * Map a namespace name to its index |
| 433 | * |
| 434 | * @note This replaces canonicalNamespaces |
| 435 | * @param string $name all-lowercase and with underscores rather than spaces. |
| 436 | * @return int|null |
| 437 | */ |
| 438 | abstract public function namespaceId( string $name ): ?int; |
| 439 | |
| 440 | /** |
| 441 | * Map a namespace index to its preferred name |
| 442 | * (with spaces, not underscores). |
| 443 | * |
| 444 | * @note This replaces namespaceNames |
| 445 | * @param int $ns |
| 446 | * @return string|null |
| 447 | */ |
| 448 | abstract public function namespaceName( int $ns ): ?string; |
| 449 | |
| 450 | /** |
| 451 | * Test if a namespace has subpages |
| 452 | * |
| 453 | * @note This replaces namespacesWithSubpages |
| 454 | * @param int $ns |
| 455 | * @return bool |
| 456 | */ |
| 457 | abstract public function namespaceHasSubpages( int $ns ): bool; |
| 458 | |
| 459 | /** |
| 460 | * Return namespace case setting |
| 461 | * @param int $ns |
| 462 | * @return string 'first-letter' or 'case-sensitive' |
| 463 | */ |
| 464 | abstract public function namespaceCase( int $ns ): string; |
| 465 | |
| 466 | /** |
| 467 | * Test if a namespace is a talk namespace |
| 468 | * |
| 469 | * @note This replaces title.getNamespace().isATalkNamespace() |
| 470 | * @param int $ns |
| 471 | * @return bool |
| 472 | */ |
| 473 | public function namespaceIsTalk( int $ns ): bool { |
| 474 | return $ns > 0 && $ns % 2; |
| 475 | } |
| 476 | |
| 477 | /** |
| 478 | * Uppercasing method for titles |
| 479 | * @param string $str |
| 480 | * @return string |
| 481 | */ |
| 482 | public function ucfirst( string $str ): string { |
| 483 | if ( $str === '' ) { |
| 484 | return ''; |
| 485 | } |
| 486 | $o = ord( $str[0] ); |
| 487 | if ( $o < 96 ) { // if already uppercase... |
| 488 | return $str; |
| 489 | } elseif ( $o < 128 ) { |
| 490 | if ( $str[0] === 'i' && |
| 491 | in_array( $this->langBcp47()->toBcp47Code(), [ 'az', 'tr', 'kaa', 'kk' ], true ) |
| 492 | ) { |
| 493 | return 'İ' . mb_substr( $str, 1 ); |
| 494 | } |
| 495 | return ucfirst( $str ); // use PHP's ucfirst() |
| 496 | } else { |
| 497 | // fall back to more complex logic in case of multibyte strings |
| 498 | $char = mb_substr( $str, 0, 1 ); |
| 499 | return mb_strtoupper( $char ) . mb_substr( $str, 1 ); |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | /** |
| 504 | * Get the default local name for a special page |
| 505 | * @param string $alias Special page alias |
| 506 | * @return string|null |
| 507 | */ |
| 508 | abstract public function specialPageLocalName( string $alias ): ?string; |
| 509 | |
| 510 | /** |
| 511 | * Treat language links as magic connectors, not inline links |
| 512 | * @return bool |
| 513 | */ |
| 514 | abstract public function interwikiMagic(): bool; |
| 515 | |
| 516 | /** |
| 517 | * Return true if the specified magic link syntax is enabled on this |
| 518 | * wiki. |
| 519 | * @param string $which One of "ISBN", "PMID", or "RFC" |
| 520 | * @return true if the specified magic link type is enabled on this wiki |
| 521 | */ |
| 522 | public function magicLinkEnabled( string $which ): bool { |
| 523 | // This should be an abstract method, but in order to provide |
| 524 | // graceful upgrades, start by defaulting to true for all link types |
| 525 | return true; |
| 526 | } |
| 527 | |
| 528 | /** |
| 529 | * Interwiki link data. |
| 530 | * |
| 531 | * Note that the order of the keys in this array is significant: if more |
| 532 | * than one prefix matches a given URL during html2wt conversion, the |
| 533 | * *first* match is used. If you want `wikitech` to be used instead of |
| 534 | * `labsconsole`, for example, the `'wikitech'=>[....]` key needs to |
| 535 | * enumerate first. |
| 536 | * |
| 537 | * @return array<string,array> Keys are interwiki prefixes, values are arrays with the following keys: |
| 538 | * - prefix: (string) The interwiki prefix, same as the key. |
| 539 | * - url: (string) Target URL, containing a '$1' to be replaced by the interwiki target. |
| 540 | * - protorel: (bool, optional) Whether the url may be accessed by both http:// and https://. |
| 541 | * - local: (bool, optional) Whether the interwiki link is considered local (to the wikifarm). |
| 542 | * - localinterwiki: (bool, optional) Whether the interwiki link points to the current wiki. |
| 543 | * - language: (bool, optional) Whether the interwiki link is a language link. |
| 544 | * - extralanglink: (bool, optional) Whether the interwiki link is an "extra language link". |
| 545 | * - linktext: (string, optional) For "extra language links", the link text. |
| 546 | * (booleans marked "optional" must be omitted if false) |
| 547 | */ |
| 548 | abstract public function interwikiMap(): array; |
| 549 | |
| 550 | /** |
| 551 | * Interwiki link data, after removing items that conflict with namespace names. |
| 552 | * (In case of such conflict, namespace wins, interwiki is ignored.) |
| 553 | * @return array<string,array> See interwikiMap() |
| 554 | */ |
| 555 | public function interwikiMapNoNamespaces(): array { |
| 556 | if ( $this->interwikiMapNoNamespaces === null ) { |
| 557 | $this->interwikiMapNoNamespaces = []; |
| 558 | foreach ( $this->interwikiMap() as $key => $value ) { |
| 559 | if ( $this->namespaceId( (string)$key ) === null ) { |
| 560 | $this->interwikiMapNoNamespaces[$key] = $value; |
| 561 | } |
| 562 | } |
| 563 | } |
| 564 | return $this->interwikiMapNoNamespaces; |
| 565 | } |
| 566 | |
| 567 | /** |
| 568 | * Match interwiki URLs |
| 569 | * @param string $href Link to match against |
| 570 | * @return string[]|null Two values [ string $key, string $target ] on success, null on no match. |
| 571 | */ |
| 572 | public function interwikiMatcher( string $href ): ?array { |
| 573 | if ( $this->iwMatcher === null ) { |
| 574 | $keys = [ [], [] ]; |
| 575 | $patterns = [ [], [] ]; |
| 576 | foreach ( $this->interwikiMapNoNamespaces() as $key => $iw ) { |
| 577 | $key = (string)$key; |
| 578 | $lang = (int)( !empty( $iw['language'] ) ); |
| 579 | |
| 580 | $url = $iw['url']; |
| 581 | $protocolRelative = substr( $url, 0, 2 ) === '//'; |
| 582 | if ( !empty( $iw['protorel'] ) ) { |
| 583 | $url = preg_replace( '/^https?:/', '', $url ); |
| 584 | $protocolRelative = true; |
| 585 | } |
| 586 | |
| 587 | // full-url match pattern |
| 588 | $keys[$lang][] = $key; |
| 589 | $patterns[$lang][] = |
| 590 | // Support protocol-relative URLs |
| 591 | ( $protocolRelative ? '(?:https?:)?' : '' ) |
| 592 | // Convert placeholder to group match |
| 593 | . strtr( preg_quote( $url, '/' ), [ '\\$1' => '(.*?)' ] ); |
| 594 | |
| 595 | if ( !empty( $iw['local'] ) ) { |
| 596 | // ./$interwikiPrefix:$title and |
| 597 | // $interwikiPrefix%3A$title shortcuts |
| 598 | // are recognized and the local wiki forwards |
| 599 | // these shortcuts to the remote wiki |
| 600 | |
| 601 | $keys[$lang][] = $key; |
| 602 | $patterns[$lang][] = '^\\.\\/' . $iw['prefix'] . ':(.*?)'; |
| 603 | |
| 604 | $keys[$lang][] = $key; |
| 605 | $patterns[$lang][] = '^' . $iw['prefix'] . '%3A(.*?)'; |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | // Prefer language matches over non-language matches |
| 610 | $numLangs = count( $keys[1] ); |
| 611 | $keys = array_merge( $keys[1], $keys[0] ); |
| 612 | $patterns = array_merge( $patterns[1], $patterns[0] ); |
| 613 | |
| 614 | // Chunk patterns into reasonably sized regexes |
| 615 | $this->iwMatcher = []; |
| 616 | $batchStart = 0; |
| 617 | $batchLen = 0; |
| 618 | foreach ( $patterns as $i => $pat ) { |
| 619 | $len = strlen( $pat ); |
| 620 | if ( $i !== $batchStart && $batchLen + $len > $this->iwMatcherBatchSize ) { |
| 621 | $this->iwMatcher[] = [ |
| 622 | array_slice( $keys, $batchStart, $i - $batchStart ), |
| 623 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
| 624 | $numLangs - $batchStart, |
| 625 | ]; |
| 626 | $batchStart = $i; |
| 627 | $batchLen = $len; |
| 628 | } else { |
| 629 | $batchLen += $len; |
| 630 | } |
| 631 | } |
| 632 | $i = count( $patterns ); |
| 633 | if ( $i > $batchStart ) { |
| 634 | $this->iwMatcher[] = [ |
| 635 | array_slice( $keys, $batchStart, $i - $batchStart ), |
| 636 | '/^(?:' . implode( '|', array_slice( $patterns, $batchStart, $i - $batchStart ) ) . ')$/Di', |
| 637 | $numLangs - $batchStart, |
| 638 | ]; |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | foreach ( $this->iwMatcher as [ $keys, $regex, $numLangs ] ) { |
| 643 | if ( preg_match( $regex, $href, $m, PREG_UNMATCHED_AS_NULL ) ) { |
| 644 | foreach ( $keys as $i => $key ) { |
| 645 | if ( isset( $m[$i + 1] ) ) { |
| 646 | if ( $i < $numLangs ) { |
| 647 | // Escape language interwikis with a colon |
| 648 | $key = ':' . $key; |
| 649 | } |
| 650 | return [ $key, $m[$i + 1] ]; |
| 651 | } |
| 652 | } |
| 653 | } |
| 654 | } |
| 655 | return null; |
| 656 | } |
| 657 | |
| 658 | /** |
| 659 | * Wiki identifier, for cache keys. |
| 660 | * Should match a key in mwApiMap()? |
| 661 | * @return string |
| 662 | */ |
| 663 | abstract public function iwp(): string; |
| 664 | |
| 665 | /** |
| 666 | * Legal title characters |
| 667 | * |
| 668 | * Regex is intended to match bytes, not Unicode characters. |
| 669 | * |
| 670 | * @return string Regex character class (i.e. the bit that goes inside `[]`) |
| 671 | */ |
| 672 | abstract public function legalTitleChars(): string; |
| 673 | |
| 674 | /** |
| 675 | * Link prefix regular expression. |
| 676 | * @return string|null |
| 677 | */ |
| 678 | abstract public function linkPrefixRegex(): ?string; |
| 679 | |
| 680 | /** |
| 681 | * Return raw link trail regexp from config |
| 682 | * @return string |
| 683 | */ |
| 684 | abstract protected function linkTrail(): string; |
| 685 | |
| 686 | /** |
| 687 | * Link trail regular expression. |
| 688 | * @return string|null |
| 689 | */ |
| 690 | public function linkTrailRegex(): ?string { |
| 691 | if ( $this->linkTrailRegex === false ) { |
| 692 | $trail = $this->linkTrail(); |
| 693 | $trail = str_replace( '(.*)$', '', $trail ); |
| 694 | if ( strpos( $trail, '()' ) !== false ) { |
| 695 | // Empty regex from zh-hans |
| 696 | $this->linkTrailRegex = null; |
| 697 | } else { |
| 698 | $this->linkTrailRegex = $trail; |
| 699 | } |
| 700 | } |
| 701 | return $this->linkTrailRegex; |
| 702 | } |
| 703 | |
| 704 | /** |
| 705 | * Wiki language code. |
| 706 | * @return Bcp47Code BCP-47 language code |
| 707 | */ |
| 708 | abstract public function langBcp47(): Bcp47Code; |
| 709 | |
| 710 | /** |
| 711 | * Main page title, as LinkTarget |
| 712 | * @return LinkTarget |
| 713 | */ |
| 714 | abstract public function mainPageLinkTarget(): LinkTarget; |
| 715 | |
| 716 | /** |
| 717 | * Lookup config |
| 718 | * @param string $key |
| 719 | * @return mixed|null config value for $key, if present or null, if not. |
| 720 | * @deprecated This very broad interface is no longer needed. |
| 721 | */ |
| 722 | abstract public function getMWConfigValue( string $key ); |
| 723 | |
| 724 | /** |
| 725 | * Whether the wiki language is right-to-left |
| 726 | * @return bool |
| 727 | */ |
| 728 | abstract public function rtl(): bool; |
| 729 | |
| 730 | /** |
| 731 | * Whether language converter is enabled for the specified language |
| 732 | * @param Bcp47Code $lang |
| 733 | * @return bool |
| 734 | */ |
| 735 | abstract public function langConverterEnabledBcp47( Bcp47Code $lang ): bool; |
| 736 | |
| 737 | /** |
| 738 | * The URL path to index.php. |
| 739 | * @return string |
| 740 | */ |
| 741 | abstract public function script(): string; |
| 742 | |
| 743 | /** |
| 744 | * FIXME: This is only used to compute the modules path below |
| 745 | * and maybe shouldn't be exposed. |
| 746 | * |
| 747 | * The base wiki path |
| 748 | * @return string |
| 749 | */ |
| 750 | abstract public function scriptpath(): string; |
| 751 | |
| 752 | /** |
| 753 | * The base URL of the server. |
| 754 | * @return string |
| 755 | */ |
| 756 | abstract public function server(): string; |
| 757 | |
| 758 | /** |
| 759 | * Export content metadata via meta tags (and via a stylesheet |
| 760 | * for now to aid some clients). |
| 761 | * |
| 762 | * @param Document $document |
| 763 | * @param ContentMetadataCollector $metadata |
| 764 | * @param string $defaultTitle The default title to display, as an |
| 765 | * unescaped string |
| 766 | * @param Bcp47Code $lang a BCP-47 language code |
| 767 | */ |
| 768 | abstract public function exportMetadataToHeadBcp47( |
| 769 | Document $document, |
| 770 | ContentMetadataCollector $metadata, |
| 771 | string $defaultTitle, |
| 772 | Bcp47Code $lang |
| 773 | ): void; |
| 774 | |
| 775 | /** |
| 776 | * Helper function to create <head> elements from metadata. |
| 777 | * @param Document $document |
| 778 | * @param string $modulesLoadURI |
| 779 | * @param string[] $modules |
| 780 | * @param string[] $moduleStyles |
| 781 | * @param array<string,mixed> $jsConfigVars |
| 782 | * @param string $htmlTitle The display title, as escaped HTML |
| 783 | * @param Bcp47Code $lang a Bcp47Code object |
| 784 | */ |
| 785 | protected function exportMetadataHelper( |
| 786 | Document $document, |
| 787 | string $modulesLoadURI, |
| 788 | array $modules, |
| 789 | array $moduleStyles, |
| 790 | array $jsConfigVars, |
| 791 | string $htmlTitle, |
| 792 | Bcp47Code $lang |
| 793 | ): void { |
| 794 | // $htmlTitle contains the DISPLAYTITLE but it corresponds to the |
| 795 | // value of the ParserOutput *not* the ultimate value which would |
| 796 | // be used in the <h1> tag *nor* the plaintext value which would |
| 797 | // be used for the page <title>. OutputPage does additional |
| 798 | // validation/stripping on the displaytitle value before using it. |
| 799 | // As such we're going to just ignore $htmlTitle for now rather |
| 800 | // than report an incorrect value in the <head> (T324431). |
| 801 | |
| 802 | // JsConfigVars |
| 803 | $content = null; |
| 804 | try { |
| 805 | if ( $jsConfigVars ) { |
| 806 | $content = PHPUtils::jsonEncode( $jsConfigVars ); |
| 807 | } |
| 808 | } catch ( \Exception $e ) { |
| 809 | // Similar to ResourceLoader::makeConfigSetScript. See T289358 |
| 810 | $this->getLogger()->log( |
| 811 | LogLevel::WARNING, |
| 812 | 'JSON serialization of config data failed. ' . |
| 813 | 'This usually means the config data is not valid UTF-8.' |
| 814 | ); |
| 815 | } |
| 816 | if ( $content ) { |
| 817 | DOMUtils::appendToHead( $document, 'meta', [ |
| 818 | 'property' => 'mw:jsConfigVars', |
| 819 | 'content' => $content, |
| 820 | ] ); |
| 821 | } |
| 822 | // Styles from modules returned from preprocessor / parse requests |
| 823 | if ( $modules ) { |
| 824 | // mw:generalModules can be processed via JS (and async) and are usually (but |
| 825 | // not always) JS scripts. |
| 826 | DOMUtils::appendToHead( $document, 'meta', [ |
| 827 | 'property' => 'mw:generalModules', |
| 828 | 'content' => implode( '|', array_unique( $modules ) ) |
| 829 | ] ); |
| 830 | } |
| 831 | // Styles from modules returned from preprocessor / parse requests |
| 832 | if ( $moduleStyles ) { |
| 833 | // mw:moduleStyles are CSS modules that are render-blocking. |
| 834 | DOMUtils::appendToHead( $document, 'meta', [ |
| 835 | 'property' => 'mw:moduleStyles', |
| 836 | 'content' => implode( '|', array_unique( $moduleStyles ) ) |
| 837 | ] ); |
| 838 | } |
| 839 | /* |
| 840 | * While unnecessary for Wikimedia clients, a stylesheet url in |
| 841 | * the <head> is useful for clients like Kiwix and others who |
| 842 | * might not want to process the meta tags to construct the |
| 843 | * resourceloader url. |
| 844 | * |
| 845 | * Given that these clients will be consuming Parsoid HTML outside |
| 846 | * a MediaWiki skin, the clients are effectively responsible for |
| 847 | * their own "skin". But, once again, as a courtesy, we are |
| 848 | * hardcoding the vector skin modules for them. But, note that |
| 849 | * this may cause page elements to render differently than how |
| 850 | * they render on Wikimedia sites with the vector skin since this |
| 851 | * is probably missing a number of other modules. |
| 852 | * |
| 853 | * All that said, note that JS-generated parts of the page will |
| 854 | * still require them to have more intimate knowledge of how to |
| 855 | * process the JS modules. Except for <graph>s, page content |
| 856 | * doesn't require JS modules at this point. So, where these |
| 857 | * clients want to invest in the necessary logic to construct a |
| 858 | * better resourceloader url, they could simply delete / ignore |
| 859 | * this stylesheet. |
| 860 | */ |
| 861 | $moreStyles = array_merge( $moduleStyles, [ |
| 862 | 'mediawiki.skinning.content.parsoid', |
| 863 | // Use the base styles that API output and fallback skin use. |
| 864 | 'mediawiki.skinning.interface', |
| 865 | // Make sure to include contents of user generated styles |
| 866 | // e.g. MediaWiki:Common.css / MediaWiki:Mobile.css |
| 867 | 'site.styles' |
| 868 | ] ); |
| 869 | # need to use MW-internal language code for constructing resource |
| 870 | # loader path. |
| 871 | $langMw = Utils::bcp47ToMwCode( $lang ); |
| 872 | $styleURI = $modulesLoadURI . '?lang=' . $langMw . '&modules=' . |
| 873 | PHPUtils::encodeURIComponent( implode( '|', array_unique( $moreStyles ) ) ) . |
| 874 | '&only=styles&skin=vector'; |
| 875 | DOMUtils::appendToHead( $document, 'link', [ 'rel' => 'stylesheet', 'href' => $styleURI ] ); |
| 876 | } |
| 877 | |
| 878 | /** |
| 879 | * A regexp matching the localized 'REDIRECT' marker for this wiki. |
| 880 | * The regexp should be delimited, but should not have boundary anchors |
| 881 | * or capture groups. |
| 882 | * @return string |
| 883 | */ |
| 884 | abstract public function redirectRegexp(): string; |
| 885 | |
| 886 | /** |
| 887 | * A regexp matching the localized 'Category' prefix for this wiki. |
| 888 | * The regexp should be delimited, but should not have boundary anchors |
| 889 | * or capture groups. |
| 890 | * @return string |
| 891 | */ |
| 892 | abstract public function categoryRegexp(): string; |
| 893 | |
| 894 | /** |
| 895 | * A regexp matching localized behavior switches for this wiki. |
| 896 | * The regexp should be delimited, but should not have boundary anchors |
| 897 | * or capture groups. |
| 898 | * @return string |
| 899 | */ |
| 900 | abstract public function bswRegexp(): string; |
| 901 | |
| 902 | /** |
| 903 | * A regex matching a line containing just whitespace, comments, and |
| 904 | * sol transparent links and behavior switches. |
| 905 | * @return string |
| 906 | */ |
| 907 | public function solTransparentWikitextRegexp(): string { |
| 908 | // cscott sadly says: Note that this depends on the precise |
| 909 | // localization of the magic words of this particular wiki. |
| 910 | static $solTransparentWikitextRegexp = null; |
| 911 | if ( $solTransparentWikitextRegexp === null ) { |
| 912 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
| 913 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
| 914 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
| 915 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
| 916 | $solTransparentWikitextRegexp = '@' . |
| 917 | '^[ \t\n\r\0\x0b]*' . |
| 918 | '(?:' . |
| 919 | '(?:' . $redirect . ')' . |
| 920 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
| 921 | ')?' . |
| 922 | '(?:' . |
| 923 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
| 924 | '__(?:' . $bswRegexp . ')__|' . |
| 925 | $comment . '|' . |
| 926 | '[ \t\n\r\0\x0b]' . |
| 927 | ')*$@'; |
| 928 | } |
| 929 | return $solTransparentWikitextRegexp; |
| 930 | } |
| 931 | |
| 932 | /** |
| 933 | * A regex matching a line containing just comments and |
| 934 | * sol transparent links and behavior switches. |
| 935 | * |
| 936 | * @param bool $addIncludes |
| 937 | * @return string |
| 938 | */ |
| 939 | public function solTransparentWikitextNoWsRegexp( |
| 940 | bool $addIncludes = false |
| 941 | ): string { |
| 942 | // cscott sadly says: Note that this depends on the precise |
| 943 | // localization of the magic words of this particular wiki. |
| 944 | static $solTransparentWikitextNoWsRegexp = null; |
| 945 | if ( $solTransparentWikitextNoWsRegexp === null ) { |
| 946 | $redirect = PHPUtils::reStrip( $this->redirectRegexp(), '@' ); |
| 947 | $category = PHPUtils::reStrip( $this->categoryRegexp(), '@' ); |
| 948 | $bswRegexp = PHPUtils::reStrip( $this->bswRegexp(), '@' ); |
| 949 | $comment = PHPUtils::reStrip( Utils::COMMENT_REGEXP, '@' ); |
| 950 | $solTransparentWikitextNoWsRegexp = '@' . |
| 951 | '((?:' . |
| 952 | '(?:' . $redirect . ')' . |
| 953 | '[ \t\n\r\x0c]*(?::[ \t\n\r\x0c]*)?\[\[[^\]]+\]\]' . |
| 954 | ')?' . |
| 955 | '(?:' . |
| 956 | '\[\[' . $category . '\:[^\]]*?\]\]|' . |
| 957 | '__(?:' . $bswRegexp . ')__|' . |
| 958 | $comment . |
| 959 | // FIXME(SSS): What about onlyinclude and noinclude? |
| 960 | ( $addIncludes ? '|<includeonly>[\S\s]*?</includeonly>' : '' ) . |
| 961 | ')*)@'; |
| 962 | } |
| 963 | return $solTransparentWikitextNoWsRegexp; |
| 964 | } |
| 965 | |
| 966 | /** |
| 967 | * The wiki's time zone offset |
| 968 | * @return int Minutes east of UTC |
| 969 | */ |
| 970 | abstract public function timezoneOffset(): int; |
| 971 | |
| 972 | /** |
| 973 | * Language variant information for the given language (or null if |
| 974 | * unknown). |
| 975 | * @param Bcp47Code $lang The language for which you want variant information |
| 976 | * @return ?array{base:Bcp47Code,fallbacks:Bcp47Code[]} an array with |
| 977 | * two fields: |
| 978 | * - base: (Bcp47Code) Base BCP-47 language code (e.g. "zh") |
| 979 | * - fallbacks: (Bcp47Code[]) Fallback variants, as BCP-47 codes |
| 980 | */ |
| 981 | abstract public function variantsFor( Bcp47Code $lang ): ?array; |
| 982 | |
| 983 | /** |
| 984 | * Default thumbnail width |
| 985 | */ |
| 986 | abstract public function widthOption(): int; |
| 987 | |
| 988 | abstract protected function getVariableIDs(): array; |
| 989 | |
| 990 | abstract protected function getMagicWords(): array; |
| 991 | |
| 992 | /** |
| 993 | * Does the SiteConfig provide precomputed function synonyms? |
| 994 | * If no, the SiteConfig is expected to provide an implementation |
| 995 | * for updateFunctionSynonym. |
| 996 | */ |
| 997 | protected function haveComputedFunctionSynonyms(): bool { |
| 998 | return true; |
| 999 | } |
| 1000 | |
| 1001 | /** |
| 1002 | * Get a list of precomputed synonyms for parser functions registered |
| 1003 | * with the legacy parser. Be aware that this is distinct from the |
| 1004 | * set of parser functions with Parsoid-native implementations! |
| 1005 | * @return array{0:array<string,string>,1:array<string,string>} |
| 1006 | */ |
| 1007 | protected function getFunctionSynonyms(): array { |
| 1008 | return [ [], [], ]; |
| 1009 | } |
| 1010 | |
| 1011 | /** |
| 1012 | * If ::haveComputedFunctionSynoyms() returns false, this function is |
| 1013 | * called once on every magic word alias. This function is responsible |
| 1014 | * for determining if the magic word key ($magicword) corresponds to a |
| 1015 | * registered legacy parser function (list obtained via |
| 1016 | * `action=query&meta=siteinfo&siprop=functionhooks`) and setting |
| 1017 | * `$this->functionSynonyms[$case][$alias] = $magicword` if so. |
| 1018 | * |
| 1019 | * @param string $func A localized aliases for this magic word |
| 1020 | * @param string $magicword The lookup key for this magic word |
| 1021 | * @param bool $caseSensitive If true, $func is to be treated as |
| 1022 | * case-sensitive. |
| 1023 | */ |
| 1024 | protected function updateFunctionSynonym( string $func, string $magicword, bool $caseSensitive ): void { |
| 1025 | throw new \RuntimeException( "Unexpected code path!" ); |
| 1026 | } |
| 1027 | |
| 1028 | /** |
| 1029 | * Reset our cached magic word lookup tables. |
| 1030 | * |
| 1031 | * This function is intended to be used by parser tests to |
| 1032 | * re-compute magic words, behavior switches, lists of magic |
| 1033 | * variables, etc after processing test-specific settings. |
| 1034 | * @internal |
| 1035 | */ |
| 1036 | public function resetMagicWords() { |
| 1037 | $this->mwAliases = null; |
| 1038 | } |
| 1039 | |
| 1040 | private function populateMagicWords() { |
| 1041 | if ( !empty( $this->mwAliases ) ) { |
| 1042 | return; |
| 1043 | } |
| 1044 | |
| 1045 | $this->mwAliases = $this->behaviorSwitches = $this->variables = $this->mediaOptions = []; |
| 1046 | $variablesMap = PHPUtils::makeSet( $this->getVariableIDs() ); |
| 1047 | $this->functionSynonyms = $this->getFunctionSynonyms(); |
| 1048 | $haveSynonyms = $this->haveComputedFunctionSynonyms(); |
| 1049 | foreach ( $this->getMagicWords() as $magicword => $aliases ) { |
| 1050 | $caseSensitive = array_shift( $aliases ); |
| 1051 | $isVariable = isset( $variablesMap[$magicword] ); |
| 1052 | $isMediaOption = preg_match( '/^(img|timedmedia)_/', $magicword ); |
| 1053 | foreach ( $aliases as $alias ) { |
| 1054 | $this->mwAliases[$magicword][] = $alias; |
| 1055 | if ( !$caseSensitive ) { |
| 1056 | // T389029: strtolower is not the same as case-folding |
| 1057 | $alias = mb_strtolower( $alias ); |
| 1058 | $this->mwAliases[$magicword][] = $alias; |
| 1059 | } |
| 1060 | if ( substr( $alias, 0, 2 ) === '__' ) { |
| 1061 | $this->behaviorSwitches[$alias] = [ $caseSensitive, $magicword ]; |
| 1062 | } |
| 1063 | if ( $isVariable ) { |
| 1064 | $this->variables[$alias] = $magicword; |
| 1065 | } |
| 1066 | if ( $isMediaOption ) { |
| 1067 | $this->mediaOptions[$alias] = [ $caseSensitive, $magicword ]; |
| 1068 | } |
| 1069 | if ( !$haveSynonyms ) { |
| 1070 | $this->updateFunctionSynonym( $alias, $magicword, (bool)$caseSensitive ); |
| 1071 | } |
| 1072 | } |
| 1073 | } |
| 1074 | } |
| 1075 | |
| 1076 | /** |
| 1077 | * List all magic words by canonical name |
| 1078 | * @return string[][] Keys are canonical names, values are arrays of aliases. |
| 1079 | */ |
| 1080 | public function mwAliases(): array { |
| 1081 | $this->populateMagicWords(); |
| 1082 | return $this->mwAliases; |
| 1083 | } |
| 1084 | |
| 1085 | /** |
| 1086 | * Return canonical magic word for a parser function |
| 1087 | * @param string $str A localized potential parser function name, including |
| 1088 | * any leading `#` (but not a trailing colon or bar) |
| 1089 | * @return array{key:?string,isNative:bool} |
| 1090 | * The magic word "key" for this parser function and a boolean |
| 1091 | * indicating whether this is a parsoid-native PFragment handler |
| 1092 | * (true) or a parser function handled by the legacy parser |
| 1093 | * fallback (false). The key is `null` if no parser function |
| 1094 | * matching $str is known. |
| 1095 | */ |
| 1096 | public function getMagicWordForParserFunction( string $str ): array { |
| 1097 | # Case insensitive functions: |
| 1098 | # Core uses $parser->contLang->lc($str) which is optimized but |
| 1099 | # equivalent to mb_strtolower; case-insensitivity for parser |
| 1100 | # function names should be deprecated, though, and converting |
| 1101 | # to lower case doesn't actually yield a case-insensitive match |
| 1102 | # (T389029) |
| 1103 | $lower = mb_strtolower( $str ); |
| 1104 | |
| 1105 | # Native implementations take precedence |
| 1106 | $isNative = true; |
| 1107 | $this->getExtConfig(); |
| 1108 | $key = $this->pFragmentHandlerFuncSynonyms[1][$str] ?? |
| 1109 | $this->pFragmentHandlerFuncSynonyms[0][$lower] ?? null; |
| 1110 | if ( $key === null ) { |
| 1111 | # Legacy parser functions |
| 1112 | $isNative = false; |
| 1113 | $this->populateMagicWords(); |
| 1114 | $key = $this->functionSynonyms[1][$str] ?? |
| 1115 | $this->functionSynonyms[0][$lower] ?? null; |
| 1116 | } |
| 1117 | return [ 'key' => $key, 'isNative' => $isNative ]; |
| 1118 | } |
| 1119 | |
| 1120 | /** |
| 1121 | * Return canonical magic word for a variable |
| 1122 | * @param string $str |
| 1123 | * @return string|null |
| 1124 | */ |
| 1125 | public function getMagicWordForVariable( string $str ): ?string { |
| 1126 | $this->populateMagicWords(); |
| 1127 | return $this->variables[$str] ?? null; |
| 1128 | } |
| 1129 | |
| 1130 | private static function getMagicWordCanonicalName( array $mws, string $word ): ?string { |
| 1131 | if ( isset( $mws[$word] ) ) { |
| 1132 | return $mws[$word][1]; |
| 1133 | } |
| 1134 | $mw = $mws[mb_strtolower( $word )] ?? null; |
| 1135 | return ( $mw && !$mw[0] ) ? $mw[1] : null; |
| 1136 | } |
| 1137 | |
| 1138 | /** |
| 1139 | * Return canonical magic word for a media option |
| 1140 | * @param string $word |
| 1141 | * @return string|null |
| 1142 | */ |
| 1143 | public function getMagicWordForMediaOption( string $word ): ?string { |
| 1144 | $this->populateMagicWords(); |
| 1145 | return self::getMagicWordCanonicalName( $this->mediaOptions, $word ); |
| 1146 | } |
| 1147 | |
| 1148 | /** |
| 1149 | * Return canonical magic word for a behavior switch |
| 1150 | * @param string $word |
| 1151 | * @return string|null |
| 1152 | */ |
| 1153 | public function getMagicWordForBehaviorSwitch( string $word ): ?string { |
| 1154 | $this->populateMagicWords(); |
| 1155 | return self::getMagicWordCanonicalName( $this->behaviorSwitches, $word ); |
| 1156 | } |
| 1157 | |
| 1158 | /** |
| 1159 | * Check if a string is a recognized behavior switch. |
| 1160 | * |
| 1161 | * @param string $word |
| 1162 | * @return bool |
| 1163 | */ |
| 1164 | public function isBehaviorSwitch( string $word ): bool { |
| 1165 | return $this->getMagicWordForBehaviorSwitch( $word ) !== null; |
| 1166 | } |
| 1167 | |
| 1168 | /** |
| 1169 | * Convert the internal canonical magic word name to the wikitext alias. |
| 1170 | * @param string $word Canonical magic word name |
| 1171 | * @param string $suggest Suggested alias (used as fallback and preferred choice) |
| 1172 | * @return string |
| 1173 | */ |
| 1174 | public function getMagicWordWT( string $word, string $suggest ): string { |
| 1175 | $aliases = $this->mwAliases()[$word] ?? null; |
| 1176 | if ( !$aliases ) { |
| 1177 | return $suggest; |
| 1178 | } |
| 1179 | $ind = 0; |
| 1180 | if ( $suggest ) { |
| 1181 | $ind = array_search( $suggest, $aliases, true ); |
| 1182 | } |
| 1183 | return $aliases[$ind ?: 0]; |
| 1184 | } |
| 1185 | |
| 1186 | /** |
| 1187 | * Get a regexp matching a localized magic word, given its id. |
| 1188 | * |
| 1189 | * FIXME: misleading function name |
| 1190 | * |
| 1191 | * @param string $id |
| 1192 | * @return string |
| 1193 | */ |
| 1194 | abstract public function getMagicWordMatcher( string $id ): string; |
| 1195 | |
| 1196 | /** |
| 1197 | * Get a matcher function for fetching values out of interpolated magic words, |
| 1198 | * ie those with `$1` in their aliases. |
| 1199 | * |
| 1200 | * The matcher takes a string and returns null if it doesn't match any of |
| 1201 | * the words, or an associative array if it did match: |
| 1202 | * - k: The magic word that matched |
| 1203 | * - v: The value of $1 that was matched |
| 1204 | * (the JS also returned 'a' with the specific alias that matched, but that |
| 1205 | * seems to be unused and so is omitted here) |
| 1206 | * |
| 1207 | * @param string[] $words Magic words to match |
| 1208 | * @return callable |
| 1209 | */ |
| 1210 | abstract protected function getParameterizedAliasMatcher( array $words ): callable; |
| 1211 | |
| 1212 | /** |
| 1213 | * Get a matcher function for fetching values out of interpolated magic words |
| 1214 | * which are media prefix options. |
| 1215 | * |
| 1216 | * The matcher takes a string and returns null if it doesn't match any of |
| 1217 | * the words, or an associative array if it did match: |
| 1218 | * - k: The magic word that matched |
| 1219 | * - v: The value of $1 that was matched |
| 1220 | * (the JS also returned 'a' with the specific alias that matched, but that |
| 1221 | * seems to be unused and so is omitted here) |
| 1222 | * |
| 1223 | * @return callable |
| 1224 | */ |
| 1225 | final public function getMediaPrefixParameterizedAliasMatcher(): callable { |
| 1226 | // PORT-FIXME: this shouldn't be a constant, we should fetch these |
| 1227 | // from the SiteConfig. Further, we probably need a hook here so |
| 1228 | // Parsoid can handle media options defined in extensions... in |
| 1229 | // particular timedmedia_* magic words from Extension:TimedMediaHandler |
| 1230 | $mws = array_keys( Consts::$Media['PrefixOptions'] ); |
| 1231 | return $this->getParameterizedAliasMatcher( $mws ); |
| 1232 | } |
| 1233 | |
| 1234 | /** |
| 1235 | * Get the maximum template depth |
| 1236 | * |
| 1237 | * @return int |
| 1238 | */ |
| 1239 | abstract public function getMaxTemplateDepth(): int; |
| 1240 | |
| 1241 | /** |
| 1242 | * Return name spaces aliases for the NS_SPECIAL namespace |
| 1243 | * @return array |
| 1244 | */ |
| 1245 | abstract protected function getSpecialNSAliases(): array; |
| 1246 | |
| 1247 | /** |
| 1248 | * Return Special Page aliases for a special page name |
| 1249 | * @param string $specialPage |
| 1250 | * @return array |
| 1251 | */ |
| 1252 | abstract protected function getSpecialPageAliases( string $specialPage ): array; |
| 1253 | |
| 1254 | /** |
| 1255 | * Quote a title regex |
| 1256 | * |
| 1257 | * Assumes '/' as the delimiter, and replaces spaces or underscores with |
| 1258 | * `[ _]` so either will be matched. |
| 1259 | * |
| 1260 | * @param string $s |
| 1261 | * @param string $delimiter Defaults to '/' |
| 1262 | * @return string |
| 1263 | */ |
| 1264 | protected static function quoteTitleRe( string $s, string $delimiter = '/' ): string { |
| 1265 | $s = preg_quote( $s, $delimiter ); |
| 1266 | $s = strtr( $s, [ |
| 1267 | ' ' => '[ _]', |
| 1268 | '_' => '[ _]', |
| 1269 | ] ); |
| 1270 | return $s; |
| 1271 | } |
| 1272 | |
| 1273 | /** |
| 1274 | * Matcher for ISBN/RFC/PMID URL patterns, returning the type and number. |
| 1275 | * |
| 1276 | * The match method takes a string and returns false on no match or a tuple |
| 1277 | * like this on match: [ 'RFC', '12345' ] |
| 1278 | * |
| 1279 | * @return callable |
| 1280 | */ |
| 1281 | public function getExtResourceURLPatternMatcher(): callable { |
| 1282 | $nsAliases = implode( '|', array_unique( $this->getSpecialNSAliases() ) ); |
| 1283 | $pageAliases = implode( '|', array_map( [ $this, 'quoteTitleRe' ], |
| 1284 | $this->getSpecialPageAliases( 'Booksources' ) |
| 1285 | ) ); |
| 1286 | |
| 1287 | $pats = [ |
| 1288 | 'ISBN' => '(?:\.\.?/)*(?i:' . $nsAliases . ')(?:%3[Aa]|:)' |
| 1289 | . '(?i:' . $pageAliases . ')(?:%2[Ff]|/)(?P<ISBN>\d+[Xx]?)', |
| 1290 | // Recently the target url for RFCs changed from |
| 1291 | // tools.ietf.org to datatracker.ietf.org/docs. |
| 1292 | // Given edit stash storage on Wikimedia wikis, we need to retain the |
| 1293 | // old mapping to ensure html->wt can handle that HTML properly |
| 1294 | // But, 3rd party wikis with Parsoid HTML in their caches will also |
| 1295 | // need this b/c support for much longer. Once the MW LTS release with |
| 1296 | // tools.ietf.org EOLs, we can remove the tools.ietf.org string here. |
| 1297 | // T382963 tracks the eventual removal of this b/c. |
| 1298 | 'RFC' => '[^/]*//(?:datatracker\.ietf\.org/doc|tools\.ietf\.org)/html/rfc(?P<RFC>\w+)', |
| 1299 | 'PMID' => '[^/]*//www\.ncbi\.nlm\.nih\.gov/pubmed/(?P<PMID>\w+)\?dopt=Abstract', |
| 1300 | ]; |
| 1301 | // T145590: remove patterns for disabled magic links |
| 1302 | foreach ( array_keys( $pats ) as $v ) { |
| 1303 | if ( !$this->magicLinkEnabled( $v ) ) { |
| 1304 | unset( $pats[$v] ); |
| 1305 | } |
| 1306 | } |
| 1307 | $regex = '!^(?:' . implode( '|', $pats ) . ')$!'; |
| 1308 | return static function ( $text ) use ( $pats, $regex ) { |
| 1309 | if ( preg_match( $regex, $text, $m ) ) { |
| 1310 | foreach ( $pats as $k => $re ) { |
| 1311 | if ( isset( $m[$k] ) && $m[$k] !== '' ) { |
| 1312 | return [ $k, $m[$k] ]; |
| 1313 | } |
| 1314 | } |
| 1315 | } |
| 1316 | return false; |
| 1317 | }; |
| 1318 | } |
| 1319 | |
| 1320 | /** |
| 1321 | * @return bool |
| 1322 | */ |
| 1323 | public function linterEnabled(): bool { |
| 1324 | return $this->linterEnabled; |
| 1325 | } |
| 1326 | |
| 1327 | /** |
| 1328 | * Return the desired linter configuration. These are heuristic values |
| 1329 | * which have hardcoded defaults but could be overridden on a per-wiki |
| 1330 | * basis. |
| 1331 | * @return array{enabled?:?string[],disabled?:?string[],maxTableColumnHeuristic?:int,maxTableRowsToCheck?:int} |
| 1332 | */ |
| 1333 | public function getLinterSiteConfig(): array { |
| 1334 | return [ |
| 1335 | // Allow list for specific lint types. |
| 1336 | // Takes precedence over block list. |
| 1337 | 'enabled' => null, |
| 1338 | // Block list for specific lint types. |
| 1339 | // Not used if an allow list is set. |
| 1340 | 'disabled' => null, |
| 1341 | // The maximum columns in a table before the table is considered |
| 1342 | // large |
| 1343 | 'maxTableColumnHeuristic' => 5, |
| 1344 | // The maximum rows (header or data) to be checked for the large |
| 1345 | // table lint |
| 1346 | // - If we consider the first N rows to be representative of the |
| 1347 | // table, and the table is well-formed and uniform, it is |
| 1348 | // sufficent to check the first N rows to check if the table is |
| 1349 | // "large". |
| 1350 | // - This heuristic is used together with the |
| 1351 | // 'maxTableColumnHeuristic' to identify "large tables". |
| 1352 | 'maxTableRowsToCheck' => 10, |
| 1353 | // Max length of content covered by 'white-space:nowrap' CSS |
| 1354 | // that we consider "safe" when Tidy is replaced. Beyond that, |
| 1355 | // wikitext will have to be fixed up to manually insert whitespace |
| 1356 | // at the right places. Length in bytes. |
| 1357 | 'tidyWhitespaceBugMaxLength' => 100, |
| 1358 | ]; |
| 1359 | } |
| 1360 | |
| 1361 | /** |
| 1362 | * Serialize ISBN/RFC/PMID URL patterns |
| 1363 | * |
| 1364 | * @param string[] $match As returned by the getExtResourceURLPatternMatcher() matcher |
| 1365 | * @param string $href Fallback link target, if $match is invalid. |
| 1366 | * @param string $content Link text |
| 1367 | * @return string |
| 1368 | */ |
| 1369 | public function makeExtResourceURL( array $match, string $href, string $content ): string { |
| 1370 | $normalized = preg_replace( |
| 1371 | '/[ \x{00A0}\x{1680}\x{2000}-\x{200A}\x{202F}\x{205F}\x{3000}]+/u', ' ', |
| 1372 | Utils::decodeWtEntities( $content ) |
| 1373 | ); |
| 1374 | |
| 1375 | // TODO: T145590 ("Update Parsoid to be compatible with magic links being disabled") |
| 1376 | switch ( $match[0] ) { |
| 1377 | case 'ISBN': |
| 1378 | $normalized = strtoupper( preg_replace( '/[\- \t]/', '', $normalized ) ); |
| 1379 | // validate ISBN length and format, so as not to produce magic links |
| 1380 | // which aren't actually magic |
| 1381 | $valid = preg_match( '/^ISBN(97[89])?\d{9}(\d|X)$/D', $normalized ); |
| 1382 | if ( implode( '', $match ) === $normalized && $valid ) { |
| 1383 | return $content; |
| 1384 | } |
| 1385 | // strip "./" prefix. TODO: Use relativeLinkPrefix() instead? |
| 1386 | $href = PHPUtils::stripPrefix( $href, './' ); |
| 1387 | return "[[$href|$content]]"; |
| 1388 | |
| 1389 | case 'RFC': |
| 1390 | case 'PMID': |
| 1391 | $normalized = preg_replace( '/[ \t]/', '', $normalized ); |
| 1392 | return implode( '', $match ) === $normalized ? $content : "[$href $content]"; |
| 1393 | |
| 1394 | default: |
| 1395 | throw new \InvalidArgumentException( "Invalid match type '{$match[0]}'" ); |
| 1396 | } |
| 1397 | } |
| 1398 | |
| 1399 | /** |
| 1400 | * Get the list of valid protocols |
| 1401 | * @return array |
| 1402 | */ |
| 1403 | abstract protected function getProtocols(): array; |
| 1404 | |
| 1405 | /** |
| 1406 | * Get a regex fragment matching URL protocols, quoted for an exclamation |
| 1407 | * mark delimiter. The case-insensitive option should be used. |
| 1408 | * |
| 1409 | * @param bool $excludeProtRel Whether to exclude protocol-relative URLs |
| 1410 | * @return string |
| 1411 | */ |
| 1412 | public function getProtocolsRegex( bool $excludeProtRel = false ) { |
| 1413 | $excludeProtRel = (int)$excludeProtRel; |
| 1414 | if ( !isset( $this->protocolsRegexes[$excludeProtRel] ) ) { |
| 1415 | $parts = []; |
| 1416 | foreach ( $this->getProtocols() as $protocol ) { |
| 1417 | if ( !$excludeProtRel || $protocol !== '//' ) { |
| 1418 | $parts[] = preg_quote( $protocol, '!' ); |
| 1419 | } |
| 1420 | } |
| 1421 | $this->protocolsRegexes[$excludeProtRel] = implode( '|', $parts ); |
| 1422 | } |
| 1423 | return $this->protocolsRegexes[$excludeProtRel]; |
| 1424 | } |
| 1425 | |
| 1426 | /** |
| 1427 | * Matcher for valid protocols, must be anchored at start of string. |
| 1428 | * @param string $potentialLink |
| 1429 | * @return bool Whether $potentialLink begins with a valid protocol |
| 1430 | */ |
| 1431 | public function hasValidProtocol( string $potentialLink ): bool { |
| 1432 | $re = '!^(?:' . $this->getProtocolsRegex() . ')!i'; |
| 1433 | return (bool)preg_match( $re, $potentialLink ); |
| 1434 | } |
| 1435 | |
| 1436 | /** |
| 1437 | * Matcher for valid protocols, may occur at any point within string. |
| 1438 | * @param string $potentialLink |
| 1439 | * @return bool Whether $potentialLink contains a valid protocol |
| 1440 | */ |
| 1441 | public function findValidProtocol( string $potentialLink ): bool { |
| 1442 | $re = '!(?:\W|^)(?:' . $this->getProtocolsRegex() . ')!i'; |
| 1443 | return (bool)preg_match( $re, $potentialLink ); |
| 1444 | } |
| 1445 | |
| 1446 | /** @} */ |
| 1447 | |
| 1448 | /** |
| 1449 | * Fake timestamp, for unit tests. |
| 1450 | * @return int|null Unix timestamp, or null to not fake it |
| 1451 | */ |
| 1452 | public function fakeTimestamp(): ?int { |
| 1453 | return null; |
| 1454 | } |
| 1455 | |
| 1456 | /** |
| 1457 | * Get an array of defined extension tags, with the lower case name in the |
| 1458 | * key, the value arbitrary. This is the set of extension tags that are |
| 1459 | * configured in M/W core. $coreExtModules may already be part of it, |
| 1460 | * but eventually this distinction will disappear since all extension tags |
| 1461 | * have to be defined against the Parsoid's extension API. |
| 1462 | * |
| 1463 | * @return array |
| 1464 | */ |
| 1465 | abstract protected function getNonNativeExtensionTags(): array; |
| 1466 | |
| 1467 | /** |
| 1468 | * Return an object factory to use when instantiating extensions. |
| 1469 | * (This is assumed to be plumbed up to an appropriate service container.) |
| 1470 | * @return ObjectFactory The object factory to use for extensions |
| 1471 | */ |
| 1472 | public function getObjectFactory(): ObjectFactory { |
| 1473 | // Default implementation returns an object factory with an |
| 1474 | // empty service container. |
| 1475 | return new ObjectFactory( new class() implements ContainerInterface { |
| 1476 | |
| 1477 | /** |
| 1478 | * @param string $id |
| 1479 | * @return never |
| 1480 | */ |
| 1481 | public function get( $id ) { |
| 1482 | throw new class( "Empty service container" ) extends \Error |
| 1483 | implements NotFoundExceptionInterface { |
| 1484 | }; |
| 1485 | } |
| 1486 | |
| 1487 | /** |
| 1488 | * @param string $id |
| 1489 | * @return false |
| 1490 | */ |
| 1491 | public function has( $id ): bool { |
| 1492 | return false; |
| 1493 | } |
| 1494 | } ); |
| 1495 | } |
| 1496 | |
| 1497 | /** |
| 1498 | * Whether to validate extension module's configuration arrays |
| 1499 | * against the schema. Returns true by default. Subclasses |
| 1500 | * should return true when running tests, but may elect to return |
| 1501 | * false in production. |
| 1502 | */ |
| 1503 | protected function shouldValidateExtConfig(): bool { |
| 1504 | return true; |
| 1505 | } |
| 1506 | |
| 1507 | /** |
| 1508 | * FIXME: might benefit from T250230 (caching) but see T270307 -- |
| 1509 | * currently SiteConfig::unregisterExtensionModule() is called |
| 1510 | * during testing, which requires invalidating $this->extConfig. |
| 1511 | * (See also SiteConfig::fakeTimestamp() etc.) We'd probably need |
| 1512 | * to more fully separate/mock the "testing SiteConfig" as well |
| 1513 | * as provide a way for parser options to en/disable individual |
| 1514 | * registered modules before this class can be considered immutable |
| 1515 | * and cached. |
| 1516 | */ |
| 1517 | private function constructExtConfig() { |
| 1518 | $this->extConfig = [ |
| 1519 | 'allTags' => [], |
| 1520 | 'parsoidExtTags' => [], |
| 1521 | 'annotationTags' => [], |
| 1522 | 'domProcessors' => [], |
| 1523 | 'annotationStrippers' => [], |
| 1524 | 'contentModels' => [], |
| 1525 | 'pFragmentHandlers' => [], |
| 1526 | ]; |
| 1527 | |
| 1528 | // There may be some tags defined by the parent wiki which have no |
| 1529 | // associated parsoid modules; for now we handle these by invoking |
| 1530 | // the legacy parser. |
| 1531 | $this->extConfig['allTags'] = $this->getNonNativeExtensionTags(); |
| 1532 | |
| 1533 | // Reset the list of PFragment handler synonyms; they will be recreated |
| 1534 | // as we process the extension modules. |
| 1535 | $this->pFragmentHandlerFuncSynonyms = [ [], [], ]; |
| 1536 | |
| 1537 | $this->getExtensionModules(); |
| 1538 | foreach ( $this->extModules as $extId => $module ) { |
| 1539 | $this->processExtensionModule( $module, $this->extModuleOwner[$extId] ); |
| 1540 | } |
| 1541 | } |
| 1542 | |
| 1543 | /** |
| 1544 | * @param string $lowerTagName |
| 1545 | * @return bool |
| 1546 | */ |
| 1547 | public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool { |
| 1548 | return isset( $this->t299103Tags[$lowerTagName] ); |
| 1549 | } |
| 1550 | |
| 1551 | /** |
| 1552 | * Return the JSON Schema for Extension Modules. |
| 1553 | */ |
| 1554 | private static function getExtensionModuleSchema(): object { |
| 1555 | static $schema = null; |
| 1556 | if ( $schema === null ) { |
| 1557 | $schemaPath = __DIR__ . '/../Ext/moduleconfig.schema.json'; |
| 1558 | $schema = json_decode( file_get_contents( $schemaPath ) ); |
| 1559 | } |
| 1560 | return $schema; |
| 1561 | } |
| 1562 | |
| 1563 | /** |
| 1564 | * Register a Parsoid-compatible extension |
| 1565 | * @param ExtensionModule $ext |
| 1566 | * @param string|null $extensionOwner the MediaWiki extension which registered |
| 1567 | * this extension module. |
| 1568 | */ |
| 1569 | protected function processExtensionModule( ExtensionModule $ext, ?string $extensionOwner = null ): void { |
| 1570 | Assert::invariant( $this->extConfig !== null, "not yet inited!" ); |
| 1571 | $extConfig = $ext->getConfig(); |
| 1572 | $name = $extConfig['name'] ?? null; |
| 1573 | Assert::invariant( |
| 1574 | $name !== null, |
| 1575 | "Every extension module must have a name." |
| 1576 | ); |
| 1577 | |
| 1578 | if ( $this->shouldValidateExtConfig() ) { |
| 1579 | $validator = new Validator; |
| 1580 | $validator->validate( |
| 1581 | $extConfig, |
| 1582 | self::getExtensionModuleSchema(), |
| 1583 | Constraint::CHECK_MODE_TYPE_CAST // allow associative arrays |
| 1584 | ); |
| 1585 | Assert::invariant( |
| 1586 | $validator->isValid(), |
| 1587 | "Found errors when validating " . |
| 1588 | $extConfig['name'] . " ExtensionModule config: " . |
| 1589 | json_encode( $validator->getErrors(), JSON_PRETTY_PRINT ) |
| 1590 | ); |
| 1591 | } |
| 1592 | |
| 1593 | // These are extension tag handlers. They have |
| 1594 | // wt2html (sourceToDom), html2wt (domToWikitext), and |
| 1595 | // linter functionality. |
| 1596 | foreach ( $extConfig['tags'] ?? [] as $tagConfig ) { |
| 1597 | $lowerTagName = mb_strtolower( $tagConfig['name'] ); |
| 1598 | $this->extConfig['allTags'][$lowerTagName] = true; |
| 1599 | $this->extConfig['parsoidExtTags'][$lowerTagName] = $tagConfig; |
| 1600 | // Deal with b/c nowiki stripping support needed by some extensions. |
| 1601 | // This register this tag with the legacy parser for |
| 1602 | // implicit nowiki stripping in {{#tag:..}} args for this tag. |
| 1603 | if ( isset( $tagConfig['options']['stripNowiki'] ) ) { |
| 1604 | $this->t299103Tags[$lowerTagName] = true; |
| 1605 | } |
| 1606 | } |
| 1607 | |
| 1608 | if ( isset( $extConfig['annotations'] ) ) { |
| 1609 | $annotationConfig = $extConfig['annotations']; |
| 1610 | $annotationTags = $annotationConfig['tagNames'] ?? []; |
| 1611 | foreach ( $annotationTags as $aTag ) { |
| 1612 | $lowerTagName = mb_strtolower( $aTag ); |
| 1613 | $this->extConfig['allTags'][$lowerTagName] = true; |
| 1614 | $this->extConfig['annotationTags'][$lowerTagName] = true; |
| 1615 | } |
| 1616 | if ( isset( $annotationConfig['annotationStripper'] ) ) { |
| 1617 | $obj = $this->getObjectFactory()->createObject( $annotationConfig['annotationStripper'], [ |
| 1618 | 'allowClassName' => true, |
| 1619 | 'assertClass' => AnnotationStripper::class, |
| 1620 | ] ); |
| 1621 | $this->extConfig['annotationStrippers'][$name] = $obj; |
| 1622 | } |
| 1623 | } |
| 1624 | |
| 1625 | $this->populateMagicWords(); |
| 1626 | $magicWordMap = $this->getMagicWords(); |
| 1627 | // PFragment handlers are named using magic words |
| 1628 | foreach ( $extConfig['pFragmentHandlers'] ?? $extConfig['fragmentHandlers'] ?? [] as $pFragmentHandler ) { |
| 1629 | $key = $pFragmentHandler['key'] ?? null; # A magic word |
| 1630 | if ( !$key ) { |
| 1631 | continue; |
| 1632 | } |
| 1633 | # transfer information about the extension and parsoid module |
| 1634 | # in which this fragment handler is defined |
| 1635 | $pFragmentHandler['module-name'] = $name; |
| 1636 | $pFragmentHandler['extension-name'] = $extensionOwner; |
| 1637 | $this->extConfig['pFragmentHandlers'][$key] = $pFragmentHandler; |
| 1638 | if ( !array_key_exists( $key, $magicWordMap ) ) { |
| 1639 | continue; |
| 1640 | } |
| 1641 | // Case-insensitive is deprecated! T389029 |
| 1642 | $caseSensitive = $magicWordMap[$key][0] ?? 0; |
| 1643 | foreach ( $this->mwAliases[$key] as $alias ) { |
| 1644 | if ( isset( $pFragmentHandler['options']['parserFunction'] ) ) { |
| 1645 | # 'hash' is the default; for legacy compatibility a few |
| 1646 | # parser functions are defined without a hash or have |
| 1647 | # the hash already prepended to the magic word alias |
| 1648 | $pfAlias = $alias; |
| 1649 | if ( !isset( $pFragmentHandler['options']['nohash'] ) ) { |
| 1650 | $pfAlias = '#' . $pfAlias; |
| 1651 | } |
| 1652 | $this->pFragmentHandlerFuncSynonyms[$caseSensitive][$pfAlias] = $key; |
| 1653 | } |
| 1654 | // TODO (T390342): ['options']['extensionTag'] can also be set, |
| 1655 | // and we would register this PFragment handler as a |
| 1656 | // localizable (!) extension tag. |
| 1657 | // $this->pFragmentHandlerTagSynonyms[$case][$alias]=$key; |
| 1658 | } |
| 1659 | } |
| 1660 | |
| 1661 | // Extension modules may also register dom processors. |
| 1662 | // This is for wt2htmlPostProcessor and html2wtPreProcessor |
| 1663 | // functionality. |
| 1664 | if ( isset( $extConfig['domProcessors'] ) ) { |
| 1665 | $this->extConfig['domProcessors'][$name] = $extConfig['domProcessors']; |
| 1666 | } |
| 1667 | |
| 1668 | foreach ( $extConfig['contentModels'] ?? [] as $cm => $spec ) { |
| 1669 | // For compatibility with mediawiki core, the first |
| 1670 | // registered extension wins. |
| 1671 | if ( isset( $this->extConfig['contentModels'][$cm] ) ) { |
| 1672 | continue; |
| 1673 | } |
| 1674 | $handler = $this->getObjectFactory()->createObject( $spec, [ |
| 1675 | 'allowClassName' => true, |
| 1676 | 'assertClass' => ContentModelHandler::class, |
| 1677 | ] ); |
| 1678 | $this->extConfig['contentModels'][$cm] = $handler; |
| 1679 | } |
| 1680 | |
| 1681 | // Extension modules can register new PFragment types |
| 1682 | foreach ( $extConfig['PFragmentTypes'] ?? [] as $pfClass ) { |
| 1683 | PFragment::registerFragmentClass( $pfClass ); |
| 1684 | } |
| 1685 | } |
| 1686 | |
| 1687 | protected function getExtConfig(): array { |
| 1688 | if ( !$this->extConfig ) { |
| 1689 | $this->constructExtConfig(); |
| 1690 | } |
| 1691 | return $this->extConfig; |
| 1692 | } |
| 1693 | |
| 1694 | /** |
| 1695 | * Return a ContentModelHandler for the specified $contentmodel, if one is registered. |
| 1696 | * If null is returned, will use the default wikitext content model handler. |
| 1697 | * |
| 1698 | * @param string $contentmodel |
| 1699 | * @return ContentModelHandler|null |
| 1700 | */ |
| 1701 | public function getContentModelHandler( string $contentmodel ): ?ContentModelHandler { |
| 1702 | return ( $this->getExtConfig() )['contentModels'][$contentmodel] ?? null; |
| 1703 | } |
| 1704 | |
| 1705 | /** |
| 1706 | * Returns all the annotationStrippers that are defined as annotation configuration |
| 1707 | * @return array<AnnotationStripper> |
| 1708 | */ |
| 1709 | public function getAnnotationStrippers(): array { |
| 1710 | $res = $this->getExtConfig()['annotationStrippers'] ?? []; |
| 1711 | // ensures stability of the method list order |
| 1712 | ksort( $res ); |
| 1713 | return array_values( $res ); |
| 1714 | } |
| 1715 | |
| 1716 | /** |
| 1717 | * Determine whether a given name, which must have already been converted |
| 1718 | * to lower case, is a valid extension tag name. |
| 1719 | * |
| 1720 | * @param string $name |
| 1721 | * @return bool |
| 1722 | */ |
| 1723 | public function isExtensionTag( string $name ): bool { |
| 1724 | return isset( $this->getExtensionTagNameMap()[$name] ); |
| 1725 | } |
| 1726 | |
| 1727 | /** |
| 1728 | * @param string $tagName is $tagName an annotation tag? |
| 1729 | * @return bool |
| 1730 | */ |
| 1731 | public function isAnnotationTag( string $tagName ): bool { |
| 1732 | return $this->getExtConfig()['annotationTags'][mb_strtolower( $tagName )] ?? false; |
| 1733 | } |
| 1734 | |
| 1735 | /** |
| 1736 | * Get an array of defined annotation tags in lower case |
| 1737 | * @return array |
| 1738 | */ |
| 1739 | public function getAnnotationTags(): array { |
| 1740 | $extConfig = $this->getExtConfig(); |
| 1741 | return array_keys( $extConfig['annotationTags'] ); |
| 1742 | } |
| 1743 | |
| 1744 | /** |
| 1745 | * Get an array of defined extension tags, with the lower case name |
| 1746 | * in the key, and the value being arbitrary. |
| 1747 | * |
| 1748 | * @return array<string,true> |
| 1749 | */ |
| 1750 | public function getExtensionTagNameMap(): array { |
| 1751 | $extConfig = $this->getExtConfig(); |
| 1752 | return $extConfig['allTags']; |
| 1753 | } |
| 1754 | |
| 1755 | /** |
| 1756 | * @param string $tagName Extension tag name |
| 1757 | * @return array|null |
| 1758 | */ |
| 1759 | public function getExtTagConfig( string $tagName ): ?array { |
| 1760 | $extConfig = $this->getExtConfig(); |
| 1761 | return $extConfig['parsoidExtTags'][mb_strtolower( $tagName )] ?? null; |
| 1762 | } |
| 1763 | |
| 1764 | /** @var array<string,?ExtensionTagHandler> */ |
| 1765 | private array $tagHandlerCache = []; |
| 1766 | /** @var array<string,?PFragmentHandler> */ |
| 1767 | private array $pFragmentHandlerCache = []; |
| 1768 | |
| 1769 | /** |
| 1770 | * @param string $tagName Extension tag name |
| 1771 | * @return ExtensionTagHandler|null |
| 1772 | * Returns the implementation of the named extension, if there is one. |
| 1773 | */ |
| 1774 | public function getExtTagImpl( string $tagName ): ?ExtensionTagHandler { |
| 1775 | $tagName = mb_strtolower( $tagName ); |
| 1776 | if ( !array_key_exists( $tagName, $this->tagHandlerCache ) ) { |
| 1777 | $tagConfig = $this->getExtTagConfig( $tagName ); |
| 1778 | $this->tagHandlerCache[$tagName] = isset( $tagConfig['handler'] ) ? |
| 1779 | $this->getObjectFactory()->createObject( $tagConfig['handler'], [ |
| 1780 | 'allowClassName' => true, |
| 1781 | 'assertClass' => ExtensionTagHandler::class, |
| 1782 | ] ) : null; |
| 1783 | } |
| 1784 | |
| 1785 | return $this->tagHandlerCache[$tagName]; |
| 1786 | } |
| 1787 | |
| 1788 | /** |
| 1789 | * @return list<string> Magic word IDs naming PFragment handlers |
| 1790 | * registered with Parsoid. |
| 1791 | */ |
| 1792 | public function getPFragmentHandlerKeys() { |
| 1793 | $extConfig = $this->getExtConfig(); |
| 1794 | return array_keys( $extConfig['pFragmentHandlers'] ?? [] ); |
| 1795 | } |
| 1796 | |
| 1797 | /** |
| 1798 | * @param string $key Magic word ID naming this PFragment handler |
| 1799 | * @return array{handler?:string|array}|null Configuration for the |
| 1800 | * fragment handler, including a 'handler' property which contains |
| 1801 | * an object factory specification for a PFragmentHandler. |
| 1802 | */ |
| 1803 | public function getPFragmentHandlerConfig( string $key ) { |
| 1804 | $extConfig = $this->getExtConfig(); |
| 1805 | return $extConfig['pFragmentHandlers'][$key] ?? null; |
| 1806 | } |
| 1807 | |
| 1808 | /** |
| 1809 | * @param string $key Magic word ID naming this PFragment handler |
| 1810 | * |
| 1811 | * @return ?PFragmentHandler |
| 1812 | */ |
| 1813 | public function getPFragmentHandlerImpl( string $key ): ?PFragmentHandler { |
| 1814 | if ( !array_key_exists( $key, $this->pFragmentHandlerCache ) ) { |
| 1815 | $handlerConfig = $this->getPFragmentHandlerConfig( $key ); |
| 1816 | $this->pFragmentHandlerCache[$key] = isset( $handlerConfig['handler'] ) ? |
| 1817 | $this->getObjectFactory()->createObject( $handlerConfig['handler'], [ |
| 1818 | 'allowClassName' => true, |
| 1819 | 'assertClass' => PFragmentHandler::class, |
| 1820 | ] ) : null; |
| 1821 | } |
| 1822 | |
| 1823 | return $this->pFragmentHandlerCache[$key]; |
| 1824 | } |
| 1825 | |
| 1826 | /** |
| 1827 | * Return an array mapping extension name to an array of object factory |
| 1828 | * specs for Ext\DOMProcessor objects |
| 1829 | * @return array<name,list<string|array>> |
| 1830 | */ |
| 1831 | public function getExtDOMProcessors(): array { |
| 1832 | $extConfig = $this->getExtConfig(); |
| 1833 | return $extConfig['domProcessors']; |
| 1834 | } |
| 1835 | |
| 1836 | /** |
| 1837 | * Return the localization key we should use for asynchronous |
| 1838 | * fallback content. |
| 1839 | */ |
| 1840 | public function getAsyncFallbackMessageKey(): string { |
| 1841 | return 'parsoid-async-not-ready-fallback'; |
| 1842 | } |
| 1843 | |
| 1844 | /** @var array<string,int> */ |
| 1845 | protected $wt2htmlLimits = [ |
| 1846 | // We won't handle pages beyond this size |
| 1847 | 'wikitextSize' => 2048 * 1024, // ParserOptions::maxIncludeSize |
| 1848 | |
| 1849 | // Max list items per page |
| 1850 | 'listItem' => 30000, |
| 1851 | |
| 1852 | // Max table cells per page |
| 1853 | 'tableCell' => 30000, |
| 1854 | |
| 1855 | // Max transclusions per page |
| 1856 | 'transclusion' => 10000, |
| 1857 | |
| 1858 | // DISABLED for now |
| 1859 | // Max images per page |
| 1860 | 'image' => 1000, |
| 1861 | |
| 1862 | // Max top-level token size |
| 1863 | 'token' => 1000000, // 1M |
| 1864 | ]; |
| 1865 | |
| 1866 | /** |
| 1867 | * @return array<string,int> |
| 1868 | */ |
| 1869 | public function getWt2HtmlLimits(): array { |
| 1870 | return $this->wt2htmlLimits; |
| 1871 | } |
| 1872 | |
| 1873 | /** @var array<string,int> */ |
| 1874 | protected $html2wtLimits = [ |
| 1875 | // We refuse to serialize HTML strings bigger than this |
| 1876 | 'htmlSize' => 10000000, // 10M |
| 1877 | ]; |
| 1878 | |
| 1879 | /** |
| 1880 | * @return array<string,int> |
| 1881 | */ |
| 1882 | public function getHtml2WtLimits(): array { |
| 1883 | return $this->html2wtLimits; |
| 1884 | } |
| 1885 | |
| 1886 | /** |
| 1887 | * @param ?string $filePath File to log to (if null, logs to console) |
| 1888 | * @return Logger |
| 1889 | */ |
| 1890 | public static function createLogger( ?string $filePath = null ): Logger { |
| 1891 | // Use Monolog's PHP console handler |
| 1892 | $logger = new Logger( "Parsoid CLI" ); |
| 1893 | $format = '%message%'; |
| 1894 | if ( $filePath ) { |
| 1895 | $handler = new StreamHandler( $filePath ); |
| 1896 | $format .= "\n"; |
| 1897 | } else { |
| 1898 | $handler = new ErrorLogHandler(); |
| 1899 | } |
| 1900 | // Don't suppress inline newlines |
| 1901 | $handler->setFormatter( new LineFormatter( $format, null, true ) ); |
| 1902 | $logger->pushHandler( $handler ); |
| 1903 | |
| 1904 | if ( $filePath ) { |
| 1905 | // Separator between logs since StreamHandler appends |
| 1906 | $logger->log( Logger::INFO, "-------------- starting fresh log --------------" ); |
| 1907 | } |
| 1908 | |
| 1909 | return $logger; |
| 1910 | } |
| 1911 | |
| 1912 | abstract public function getNoFollowConfig(): array; |
| 1913 | |
| 1914 | /** @return string|false */ |
| 1915 | abstract public function getExternalLinkTarget(); |
| 1916 | } |