Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
36.26% |
95 / 262 |
|
7.14% |
5 / 70 |
CRAP | |
0.00% |
0 / 1 |
Env | |
36.26% |
95 / 262 |
|
7.14% |
5 / 70 |
3912.55 | |
0.00% |
0 / 1 |
__construct | |
88.00% |
44 / 50 |
|
0.00% |
0 / 1 |
6.06 | |||
checkPlatform | |
12.50% |
2 / 16 |
|
0.00% |
0 / 1 |
9.03 | |||
profiling | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
pushNewProfile | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
popProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlag | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasDumpFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasDumpFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
writeDump | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSiteConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPageConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDataAccess | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTOCData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nativeTemplateExpansionEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWrapSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPipelineFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRequestOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getContextTitle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
resolveTitle | |
96.67% |
29 / 30 |
|
0.00% |
0 / 1 |
15 | |||
normalizedTitleKey | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
makeTitle | |
37.50% |
3 / 8 |
|
0.00% |
0 / 1 |
7.91 | |||
makeTitleFromText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeTitleFromURLDecodedStr | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeLink | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
isValidLinkTarget | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
generateUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
generateAnnotationUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAnnotationId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAboutId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newFragmentId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setupTopLevelDoc | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
getTopLevelDoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setTopLevelDoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fetchRemexPipeline | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
setBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragmentMap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
removeDOMFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getPFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addToPFragmentMap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
recordLint | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
getLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
log | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
bumpWt2HtmlResourceUse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
compareWt2HtmlLimit | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
bumpHtml2WtResourceUse | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getContentHandler | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
langConverterEnabled | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getInputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOutputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtmlVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWtVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSkipLanguageConversionPass | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
htmlVary | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
htmlContentLanguageBcp47 | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExternalLinkAttribs | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
56 | |||
getLinterConfig | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
linting | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Config; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Bcp47Code\Bcp47Code; |
8 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
9 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
10 | use Wikimedia\Parsoid\Core\DomPageBundle; |
11 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
12 | use Wikimedia\Parsoid\Core\Sanitizer; |
13 | use Wikimedia\Parsoid\Core\TOCData; |
14 | use Wikimedia\Parsoid\DOM\Document; |
15 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
16 | use Wikimedia\Parsoid\Fragments\PFragment; |
17 | use Wikimedia\Parsoid\Logger\ParsoidLogger; |
18 | use Wikimedia\Parsoid\Parsoid; |
19 | use Wikimedia\Parsoid\Tokens\Token; |
20 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
21 | use Wikimedia\Parsoid\Utils\PHPUtils; |
22 | use Wikimedia\Parsoid\Utils\Title; |
23 | use Wikimedia\Parsoid\Utils\TitleException; |
24 | use Wikimedia\Parsoid\Utils\TokenUtils; |
25 | use Wikimedia\Parsoid\Utils\UrlUtils; |
26 | use Wikimedia\Parsoid\Utils\Utils; |
27 | use Wikimedia\Parsoid\Wikitext\ContentModelHandler as WikitextContentModelHandler; |
28 | use Wikimedia\Parsoid\Wt2Html\Frame; |
29 | use Wikimedia\Parsoid\Wt2Html\PageConfigFrame; |
30 | use Wikimedia\Parsoid\Wt2Html\ParserPipelineFactory; |
31 | use Wikimedia\Parsoid\Wt2Html\TreeBuilder\RemexPipeline; |
32 | |
33 | /** |
34 | * Environment/Envelope class for Parsoid |
35 | * |
36 | * Carries around the SiteConfig and PageConfig during an operation |
37 | * and provides certain other services. |
38 | */ |
39 | class Env { |
40 | |
41 | /** @var SiteConfig */ |
42 | private $siteConfig; |
43 | |
44 | /** @var PageConfig */ |
45 | private $pageConfig; |
46 | |
47 | /** @var DataAccess */ |
48 | private $dataAccess; |
49 | |
50 | /** @var ContentMetadataCollector */ |
51 | private $metadata; |
52 | |
53 | /** @var TOCData Table of Contents metadata for the article */ |
54 | private $tocData; |
55 | |
56 | /** |
57 | * The top-level frame for this conversion. This largely wraps the |
58 | * PageConfig. |
59 | * |
60 | * In the future we may replace PageConfig with the Frame, and add |
61 | * a |
62 | * @var Frame |
63 | */ |
64 | public $topFrame; |
65 | // XXX In the future, perhaps replace PageConfig with the Frame, and |
66 | // add $this->currentFrame (relocated from TokenTransformManager) if/when |
67 | // we've removed async parsing. |
68 | |
69 | /** |
70 | * @var bool Are we using native template expansion? |
71 | * |
72 | * Parsoid implements native template expansion, which is currently |
73 | * only used during parser tests; in production, template expansion |
74 | * is done via MediaWiki's legacy preprocessor. |
75 | * |
76 | * FIXME: Hopefully this distinction can be removed when we're entirely |
77 | * in PHP land. |
78 | */ |
79 | private $nativeTemplateExpansion; |
80 | |
81 | /** @var array<string,int> */ |
82 | private $wt2htmlUsage = []; |
83 | |
84 | /** @var array<string,int> */ |
85 | private $html2wtUsage = []; |
86 | |
87 | /** @var bool */ |
88 | private $profiling = false; |
89 | |
90 | /** @var array<Profile> */ |
91 | private $profileStack = []; |
92 | |
93 | /** @var bool */ |
94 | private $wrapSections; |
95 | |
96 | /** @var ('byte'|'ucs2'|'char') */ |
97 | private $requestOffsetType = 'byte'; |
98 | |
99 | /** @var ('byte'|'ucs2'|'char') */ |
100 | private $currentOffsetType = 'byte'; |
101 | |
102 | /** @var bool */ |
103 | private $skipLanguageConversionPass = false; |
104 | |
105 | /** @var array<string,mixed> */ |
106 | private $behaviorSwitches = []; |
107 | |
108 | /** |
109 | * Maps fragment id to the fragment forest (array of Nodes). |
110 | * @var array<string,DocumentFragment> |
111 | */ |
112 | private $fragmentMap = []; |
113 | |
114 | /** |
115 | * Maps pfragment id to a PFragment. |
116 | * @var array<string,PFragment> |
117 | */ |
118 | private array $pFragmentMap = []; |
119 | |
120 | /** |
121 | * @var int used to generate fragment ids as needed during parse |
122 | */ |
123 | private $fid = 1; |
124 | |
125 | /** @var int used to generate uids as needed during this parse */ |
126 | private $uid = 1; |
127 | |
128 | /** @var int used to generate annotation uids as needed during this parse */ |
129 | private $annUid = 0; |
130 | |
131 | /** @var array[] Lints recorded */ |
132 | private $lints = []; |
133 | |
134 | /** @var bool logLinterData */ |
135 | public $logLinterData = false; |
136 | |
137 | /** @var array linterOverrides */ |
138 | private $linterOverrides = []; |
139 | |
140 | /** @var bool[] */ |
141 | private $traceFlags; |
142 | |
143 | /** @var bool[] */ |
144 | private $dumpFlags; |
145 | |
146 | /** @var bool[] */ |
147 | private $debugFlags; |
148 | |
149 | /** @var ParsoidLogger */ |
150 | private $parsoidLogger; |
151 | |
152 | /** |
153 | * The default content version that Parsoid assumes it's serializing or |
154 | * updating in the pb2pb endpoints |
155 | * |
156 | * @var string |
157 | */ |
158 | private $inputContentVersion; |
159 | |
160 | /** |
161 | * The default content version that Parsoid will generate. |
162 | * |
163 | * @var string |
164 | */ |
165 | private $outputContentVersion; |
166 | |
167 | /** |
168 | * If non-null, the language variant used for Parsoid HTML; |
169 | * we convert to this if wt2html, or from this if html2wt. |
170 | * @var ?Bcp47Code |
171 | */ |
172 | private $htmlVariantLanguage; |
173 | |
174 | /** |
175 | * If non-null, the language variant to be used for wikitext. |
176 | * If null, heuristics will be used to identify the original wikitext variant |
177 | * in wt2html mode, and in html2wt mode new or edited HTML will be left unconverted. |
178 | * @var ?Bcp47Code |
179 | */ |
180 | private $wtVariantLanguage; |
181 | |
182 | /** @var ParserPipelineFactory */ |
183 | private $pipelineFactory; |
184 | |
185 | /** |
186 | * FIXME Used in DedupeStyles::dedupe() |
187 | * @var array |
188 | */ |
189 | public $styleTagKeys = []; |
190 | |
191 | /** |
192 | * The DomPageBundle holding the JSON data for data-parsoid and data-mw |
193 | * attributes, or `null` if these are to be encoded as inline HTML |
194 | * attributes. |
195 | */ |
196 | public ?DomPageBundle $pageBundle = null; |
197 | |
198 | /** @var Document */ |
199 | private $domDiff; |
200 | |
201 | /** @var bool */ |
202 | public $hasAnnotations; |
203 | |
204 | /** |
205 | * PORT-FIXME: public currently |
206 | * Cache of wikitext source for a title |
207 | * @var array |
208 | */ |
209 | public $pageCache = []; |
210 | |
211 | /** |
212 | * PORT-FIXME: public currently |
213 | * HTML Cache of expanded transclusions to support |
214 | * reusing expansions from HTML of previous revision. |
215 | * @var array |
216 | */ |
217 | public $transclusionCache = []; |
218 | |
219 | /** |
220 | * PORT-FIXME: public currently |
221 | * HTML Cache of expanded media wikiext to support |
222 | * reusing expansions from HTML of previous revision. |
223 | * @var array |
224 | */ |
225 | public $mediaCache = []; |
226 | |
227 | /** |
228 | * PORT-FIXME: public currently |
229 | * HTML Cache of expanded extension tags to support |
230 | * reusing expansions from HTML of previous revision. |
231 | * @var array |
232 | */ |
233 | public $extensionCache = []; |
234 | |
235 | /** |
236 | * The current top-level document. During wt2html, this will be the document |
237 | * associated with the RemexPipeline. During html2wt, this will be the |
238 | * input document, typically passed as a constructor option. |
239 | * |
240 | * @var Document |
241 | */ |
242 | private $topLevelDoc; |
243 | |
244 | /** |
245 | * The RemexPipeline used during a wt2html operation. |
246 | * |
247 | * @var RemexPipeline|null |
248 | */ |
249 | private $remexPipeline; |
250 | |
251 | /** |
252 | * @var WikitextContentModelHandler |
253 | */ |
254 | private $wikitextContentModelHandler; |
255 | |
256 | private ?Title $cachedContextTitle = null; |
257 | |
258 | /** |
259 | * @param SiteConfig $siteConfig |
260 | * @param PageConfig $pageConfig |
261 | * @param DataAccess $dataAccess |
262 | * @param ContentMetadataCollector $metadata |
263 | * @param ?array $options |
264 | * - wrapSections: (bool) Whether `<section>` wrappers should be added. |
265 | * - pageBundle: (bool) When true, sets ids on nodes and stores |
266 | * data-* attributes in a JSON blob in Env::$pageBundle |
267 | * - traceFlags: (array) Flags indicating which components need to be traced |
268 | * - dumpFlags: (bool[]) Dump flags |
269 | * - debugFlags: (bool[]) Debug flags |
270 | * - nativeTemplateExpansion: boolean |
271 | * - offsetType: 'byte' (default), 'ucs2', 'char' |
272 | * See `Parsoid\Wt2Html\DOM\Processors\ConvertOffsets`. |
273 | * - logLinterData: (bool) Should we log linter data if linting is enabled? |
274 | * - linterOverrides: (array) Override the site linting configs. |
275 | * - skipLanguageConversionPass: (bool) Should we skip the language |
276 | * conversion pass? (defaults to false) |
277 | * - htmlVariantLanguage: Bcp47Code|null |
278 | * If non-null, the language variant used for Parsoid HTML |
279 | * as a BCP 47 object. |
280 | * We convert to this if wt2html, or from this if html2wt. |
281 | * - wtVariantLanguage: Bcp47Code|null |
282 | * If non-null, the language variant to be used for wikitext |
283 | * as a BCP 47 object. |
284 | * If null, heuristics will be used to identify the original |
285 | * wikitext variant in wt2html mode, and in html2wt mode new |
286 | * or edited HTML will be left unconverted. |
287 | * - logLevels: (string[]) Levels to log |
288 | * - topLevelDoc: (Document) Set explicitly when serializing otherwise |
289 | * it gets initialized for parsing. |
290 | */ |
291 | public function __construct( |
292 | SiteConfig $siteConfig, |
293 | PageConfig $pageConfig, |
294 | DataAccess $dataAccess, |
295 | ContentMetadataCollector $metadata, |
296 | ?array $options = null |
297 | ) { |
298 | self::checkPlatform(); |
299 | $options ??= []; |
300 | $this->siteConfig = $siteConfig; |
301 | $this->pageConfig = $pageConfig; |
302 | $this->dataAccess = $dataAccess; |
303 | $this->metadata = $metadata; |
304 | $this->tocData = new TOCData(); |
305 | $this->topFrame = new PageConfigFrame( $this, $pageConfig, $siteConfig ); |
306 | $this->wrapSections = (bool)( $options['wrapSections'] ?? true ); |
307 | $this->pipelineFactory = new ParserPipelineFactory( $this ); |
308 | $defaultContentVersion = Parsoid::defaultHTMLVersion(); |
309 | $this->inputContentVersion = $options['inputContentVersion'] ?? $defaultContentVersion; |
310 | // FIXME: We should have a check for the supported input content versions as well. |
311 | // That will require a separate constant. |
312 | $this->outputContentVersion = $options['outputContentVersion'] ?? $defaultContentVersion; |
313 | if ( !in_array( $this->outputContentVersion, Parsoid::AVAILABLE_VERSIONS, true ) ) { |
314 | throw new \UnexpectedValueException( |
315 | $this->outputContentVersion . ' is not an available content version.' ); |
316 | } |
317 | $this->skipLanguageConversionPass = |
318 | $options['skipLanguageConversionPass'] ?? false; |
319 | $this->htmlVariantLanguage = !empty( $options['htmlVariantLanguage'] ) ? |
320 | Utils::mwCodeToBcp47( |
321 | $options['htmlVariantLanguage'], |
322 | // Be strict in what we accept here. |
323 | true, $this->siteConfig->getLogger() |
324 | ) : null; |
325 | $this->wtVariantLanguage = !empty( $options['wtVariantLanguage'] ) ? |
326 | Utils::mwCodeToBcp47( |
327 | $options['wtVariantLanguage'], |
328 | // Be strict in what we accept here. |
329 | true, $this->siteConfig->getLogger() |
330 | ) : null; |
331 | $this->nativeTemplateExpansion = !empty( $options['nativeTemplateExpansion'] ); |
332 | $this->requestOffsetType = $options['offsetType'] ?? 'byte'; |
333 | $this->logLinterData = !empty( $options['logLinterData'] ); |
334 | $this->linterOverrides = $options['linterOverrides'] ?? []; |
335 | $this->traceFlags = $options['traceFlags'] ?? []; |
336 | $this->dumpFlags = $options['dumpFlags'] ?? []; |
337 | $this->debugFlags = $options['debugFlags'] ?? []; |
338 | $this->parsoidLogger = new ParsoidLogger( $this->siteConfig->getLogger(), [ |
339 | 'logLevels' => $options['logLevels'] ?? [ 'fatal', 'error', 'warn', 'info' ], |
340 | 'debugFlags' => $this->debugFlags, |
341 | 'dumpFlags' => $this->dumpFlags, |
342 | 'traceFlags' => $this->traceFlags |
343 | ] ); |
344 | if ( $this->hasTraceFlag( 'time' ) ) { |
345 | $this->profiling = true; |
346 | } |
347 | $this->setupTopLevelDoc( $options['topLevelDoc'] ?? null ); |
348 | if ( $options['pageBundle'] ?? false ) { |
349 | $this->pageBundle = DomPageBundle::newEmpty( |
350 | $this->topLevelDoc |
351 | ); |
352 | } |
353 | // NOTE: |
354 | // Don't try to do this in setupTopLevelDoc since it is called on existing Env objects |
355 | // in a couple of places. That then leads to a multiple-write to tocdata property on |
356 | // the metadata object. |
357 | // |
358 | // setupTopLevelDoc is called outside Env in these couple cases: |
359 | // 1. html2wt in ContentModelHandler for dealing with |
360 | // missing original HTML. |
361 | // 2. ParserTestRunner's html2html tests |
362 | // |
363 | // That is done to either reuse an existing Env object (as in 1.) |
364 | // OR to refresh the attached DOC (html2html as in 2.). |
365 | // Constructing a new Env in both cases could eliminate this issue. |
366 | $this->metadata->setTOCData( $this->tocData ); |
367 | |
368 | $this->wikitextContentModelHandler = new WikitextContentModelHandler( $this ); |
369 | } |
370 | |
371 | /** |
372 | * Check to see if the PHP platform is sensible |
373 | */ |
374 | private static function checkPlatform() { |
375 | static $checked; |
376 | if ( !$checked ) { |
377 | $highBytes = |
378 | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" . |
379 | "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" . |
380 | "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" . |
381 | "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" . |
382 | "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" . |
383 | "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" . |
384 | "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" . |
385 | "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
386 | if ( strtolower( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' . $highBytes ) |
387 | !== 'abcdefghijklmnopqrstuvwxyz' . $highBytes |
388 | ) { |
389 | throw new \RuntimeException( 'strtolower() doesn\'t work -- ' . |
390 | 'please set the locale to C or a UTF-8 variant such as C.UTF-8' ); |
391 | } |
392 | $checked = true; |
393 | } |
394 | } |
395 | |
396 | /** |
397 | * Is profiling enabled? |
398 | * @return bool |
399 | */ |
400 | public function profiling(): bool { |
401 | return $this->profiling; |
402 | } |
403 | |
404 | /** |
405 | * Get the profile at the top of the stack |
406 | * |
407 | * FIXME: This implicitly assumes sequential in-order processing |
408 | * This wouldn't have worked in Parsoid/JS and may not work in the future |
409 | * depending on how / if we restructure the pipeline for concurrency, etc. |
410 | * |
411 | * @return Profile |
412 | */ |
413 | public function getCurrentProfile(): Profile { |
414 | return PHPUtils::lastItem( $this->profileStack ); |
415 | } |
416 | |
417 | /** |
418 | * New pipeline started. Push profile. |
419 | * @return Profile |
420 | */ |
421 | public function pushNewProfile(): Profile { |
422 | $currProfile = count( $this->profileStack ) > 0 ? $this->getCurrentProfile() : null; |
423 | $profile = new Profile(); |
424 | $this->profileStack[] = $profile; |
425 | if ( $currProfile !== null ) { |
426 | $currProfile->pushNestedProfile( $profile ); |
427 | } |
428 | return $profile; |
429 | } |
430 | |
431 | /** |
432 | * Pipeline ended. Pop profile. |
433 | * @return Profile |
434 | */ |
435 | public function popProfile(): Profile { |
436 | return array_pop( $this->profileStack ); |
437 | } |
438 | |
439 | public function hasTraceFlags(): bool { |
440 | return !empty( $this->traceFlags ); |
441 | } |
442 | |
443 | /** |
444 | * Test which trace information to log |
445 | * |
446 | * @param string $flag Flag name. |
447 | * @return bool |
448 | */ |
449 | public function hasTraceFlag( string $flag ): bool { |
450 | return isset( $this->traceFlags[$flag] ); |
451 | } |
452 | |
453 | public function hasDumpFlags(): bool { |
454 | return !empty( $this->dumpFlags ); |
455 | } |
456 | |
457 | /** |
458 | * Test which state to dump |
459 | * |
460 | * @param string $flag Flag name. |
461 | * @return bool |
462 | */ |
463 | public function hasDumpFlag( string $flag ): bool { |
464 | return isset( $this->dumpFlags[$flag] ); |
465 | } |
466 | |
467 | /** |
468 | * Write out a string (because it was requested by dumpFlags) |
469 | * @param string $str |
470 | */ |
471 | public function writeDump( string $str ) { |
472 | $this->log( 'dump', $str ); |
473 | } |
474 | |
475 | /** |
476 | * Get the site config |
477 | * @return SiteConfig |
478 | */ |
479 | public function getSiteConfig(): SiteConfig { |
480 | return $this->siteConfig; |
481 | } |
482 | |
483 | /** |
484 | * Get the page config |
485 | * @return PageConfig |
486 | */ |
487 | public function getPageConfig(): PageConfig { |
488 | return $this->pageConfig; |
489 | } |
490 | |
491 | /** |
492 | * Get the data access object |
493 | * @return DataAccess |
494 | */ |
495 | public function getDataAccess(): DataAccess { |
496 | return $this->dataAccess; |
497 | } |
498 | |
499 | /** |
500 | * Return the ContentMetadataCollector. |
501 | * @return ContentMetadataCollector |
502 | */ |
503 | public function getMetadata(): ContentMetadataCollector { |
504 | return $this->metadata; |
505 | } |
506 | |
507 | /** |
508 | * Return the Table of Contents information for the article. |
509 | * @return TOCData |
510 | */ |
511 | public function getTOCData(): TOCData { |
512 | return $this->tocData; |
513 | } |
514 | |
515 | public function nativeTemplateExpansionEnabled(): bool { |
516 | return $this->nativeTemplateExpansion; |
517 | } |
518 | |
519 | /** |
520 | * Get the current uid counter value |
521 | * @return int |
522 | */ |
523 | public function getUID(): int { |
524 | return $this->uid; |
525 | } |
526 | |
527 | /** |
528 | * Get the current fragment id counter value |
529 | * @return int |
530 | */ |
531 | public function getFID(): int { |
532 | return $this->fid; |
533 | } |
534 | |
535 | /** |
536 | * Whether `<section>` wrappers should be added. |
537 | * @todo Does this actually belong here? Should it be a behavior switch? |
538 | * @return bool |
539 | */ |
540 | public function getWrapSections(): bool { |
541 | return $this->wrapSections; |
542 | } |
543 | |
544 | /** |
545 | * Get the pipeline factory. |
546 | * @return ParserPipelineFactory |
547 | */ |
548 | public function getPipelineFactory(): ParserPipelineFactory { |
549 | return $this->pipelineFactory; |
550 | } |
551 | |
552 | /** |
553 | * Return the external format of character offsets in source ranges. |
554 | * Internally we always keep DomSourceRange and SourceRange information |
555 | * as UTF-8 byte offsets for efficiency (matches the native string |
556 | * representation), but for external use we can convert these to |
557 | * other formats when we output wt2html or input for html2wt. |
558 | * |
559 | * @see Parsoid\Wt2Html\DOM\Processors\ConvertOffsets |
560 | * @return ('byte'|'ucs2'|'char') |
561 | */ |
562 | public function getRequestOffsetType(): string { |
563 | return $this->requestOffsetType; |
564 | } |
565 | |
566 | /** |
567 | * Return the current format of character offsets in source ranges. |
568 | * This allows us to track whether the internal byte offsets have |
569 | * been converted to the external format (as returned by |
570 | * `getRequestOffsetType`) yet. |
571 | * |
572 | * @see Parsoid\Wt2Html\DOM\Processors\ConvertOffsets |
573 | * @return ('byte'|'ucs2'|'char') |
574 | */ |
575 | public function getCurrentOffsetType(): string { |
576 | return $this->currentOffsetType; |
577 | } |
578 | |
579 | /** |
580 | * Update the current offset type. Only |
581 | * Parsoid\Wt2Html\DOM\Processors\ConvertOffsets should be doing this. |
582 | * @param ('byte'|'ucs2'|'char') $offsetType 'byte', 'ucs2', or 'char' |
583 | */ |
584 | public function setCurrentOffsetType( string $offsetType ) { |
585 | $this->currentOffsetType = $offsetType; |
586 | } |
587 | |
588 | /** |
589 | * Return the title from the PageConfig, as a Parsoid title. |
590 | * @return Title |
591 | */ |
592 | public function getContextTitle(): Title { |
593 | if ( $this->cachedContextTitle === null ) { |
594 | $this->cachedContextTitle = Title::newFromLinkTarget( |
595 | $this->pageConfig->getLinkTarget(), $this->siteConfig |
596 | ); |
597 | } |
598 | return $this->cachedContextTitle; |
599 | } |
600 | |
601 | /** |
602 | * Resolve strings that are page-fragments or subpage references with |
603 | * respect to the current page name. |
604 | * |
605 | * @param string $str Page fragment or subpage reference. Not URL encoded. |
606 | * @param bool $resolveOnly If true, only trim and add the current title to |
607 | * lone fragments. TODO: This parameter seems poorly named. |
608 | * @return string Resolved title |
609 | */ |
610 | public function resolveTitle( string $str, bool $resolveOnly = false ): string { |
611 | $origName = $str; |
612 | $str = trim( $str ); |
613 | |
614 | $pageConfig = $this->getPageConfig(); |
615 | $title = $this->getContextTitle(); |
616 | |
617 | // Resolve lonely fragments (important if the current page is a subpage, |
618 | // otherwise the relative link will be wrong) |
619 | if ( $str !== '' && $str[0] === '#' ) { |
620 | return $title->getPrefixedText() . $str; |
621 | } |
622 | |
623 | // Default return value |
624 | $titleKey = $str; |
625 | if ( $this->getSiteConfig()->namespaceHasSubpages( $title->getNamespace() ) ) { |
626 | // Resolve subpages |
627 | $reNormalize = false; |
628 | if ( preg_match( '!^(?:\.\./)+!', $str, $relUp ) ) { |
629 | $levels = strlen( $relUp[0] ) / 3; // Levels are indicated by '../'. |
630 | $titleBits = explode( '/', $title->getPrefixedText() ); |
631 | if ( $titleBits[0] === '' ) { |
632 | // FIXME: Punt on subpages of titles starting with "/" for now |
633 | return $origName; |
634 | } |
635 | if ( count( $titleBits ) <= $levels ) { |
636 | // Too many levels -- invalid relative link |
637 | return $origName; |
638 | } |
639 | $newBits = array_slice( $titleBits, 0, -$levels ); |
640 | if ( $str !== $relUp[0] ) { |
641 | $newBits[] = substr( $str, $levels * 3 ); |
642 | } |
643 | $str = implode( '/', $newBits ); |
644 | $reNormalize = true; |
645 | } elseif ( $str !== '' && $str[0] === '/' ) { |
646 | // Resolve absolute subpage links |
647 | $str = $title->getPrefixedText() . $str; |
648 | $reNormalize = true; |
649 | } |
650 | |
651 | if ( $reNormalize && !$resolveOnly ) { |
652 | // Remove final slashes if present. |
653 | // See https://gerrit.wikimedia.org/r/173431 |
654 | $str = rtrim( $str, '/' ); |
655 | $titleKey = (string)$this->normalizedTitleKey( $str ); |
656 | } |
657 | } |
658 | |
659 | // Strip leading ':' |
660 | if ( $titleKey !== '' && $titleKey[0] === ':' && !$resolveOnly ) { |
661 | $titleKey = substr( $titleKey, 1 ); |
662 | } |
663 | return $titleKey; |
664 | } |
665 | |
666 | /** |
667 | * Get normalized title key for a title string. |
668 | * |
669 | * @param string $str Should be in url-decoded format. |
670 | * @param bool $noExceptions Return null instead of throwing exceptions. |
671 | * @param bool $ignoreFragment Ignore the fragment, if any. |
672 | * @return string|null Normalized title key for a title string (or null for invalid titles). |
673 | */ |
674 | public function normalizedTitleKey( |
675 | string $str, bool $noExceptions = false, bool $ignoreFragment = false |
676 | ): ?string { |
677 | $title = $this->makeTitleFromURLDecodedStr( $str, 0, $noExceptions ); |
678 | if ( !$title ) { |
679 | return null; |
680 | } |
681 | return $ignoreFragment ? |
682 | $title->getPrefixedDBKey() : |
683 | $title->getFullDBKey(); |
684 | } |
685 | |
686 | /** |
687 | * Create a Title object |
688 | * @param string $text URL-decoded text |
689 | * @param ?int $defaultNs |
690 | * @param bool $noExceptions |
691 | * @return Title|null |
692 | */ |
693 | private function makeTitle( string $text, ?int $defaultNs = null, bool $noExceptions = false ): ?Title { |
694 | try { |
695 | if ( preg_match( '!^(?:[#/]|\.\./)!', $text ) ) { |
696 | $defaultNs = $this->getContextTitle()->getNamespace(); |
697 | } |
698 | $text = $this->resolveTitle( $text ); |
699 | return Title::newFromText( $text, $this->getSiteConfig(), $defaultNs ); |
700 | } catch ( TitleException $e ) { |
701 | if ( $noExceptions ) { |
702 | return null; |
703 | } |
704 | throw $e; |
705 | } |
706 | } |
707 | |
708 | /** |
709 | * Create a Title object |
710 | * @see Title::newFromURL in MediaWiki |
711 | * @param string $str URL-encoded text |
712 | * @param ?int $defaultNs |
713 | * @param bool $noExceptions |
714 | * @return Title|null |
715 | */ |
716 | public function makeTitleFromText( |
717 | string $str, ?int $defaultNs = null, bool $noExceptions = false |
718 | ): ?Title { |
719 | return $this->makeTitle( Utils::decodeURIComponent( $str ), $defaultNs, $noExceptions ); |
720 | } |
721 | |
722 | /** |
723 | * Create a Title object |
724 | * @see Title::newFromText in MediaWiki |
725 | * @param string $str URL-decoded text |
726 | * @param ?int $defaultNs |
727 | * @param bool $noExceptions |
728 | * @return Title|null |
729 | */ |
730 | public function makeTitleFromURLDecodedStr( |
731 | string $str, ?int $defaultNs = null, bool $noExceptions = false |
732 | ): ?Title { |
733 | return $this->makeTitle( $str, $defaultNs, $noExceptions ); |
734 | } |
735 | |
736 | /** |
737 | * Make a link to a local Title |
738 | * @param Title $title |
739 | * @return string |
740 | */ |
741 | public function makeLink( Title $title ): string { |
742 | // T380676: This method *should* be used only for local titles, |
743 | // (ie $title->getInterwiki() should be '') but apparently we |
744 | // are using it for interwiki/interlanguage links as well. |
745 | return $this->getSiteConfig()->relativeLinkPrefix() . Sanitizer::sanitizeTitleURI( |
746 | $title->getFullDBKey(), |
747 | false |
748 | ); |
749 | } |
750 | |
751 | /** |
752 | * Test if an href attribute value could be a valid link target |
753 | * @param string|(Token|string)[] $href |
754 | * @return bool |
755 | */ |
756 | public function isValidLinkTarget( $href ): bool { |
757 | $href = TokenUtils::tokensToString( $href ); |
758 | |
759 | // decode percent-encoding so that we can reliably detect |
760 | // bad page title characters |
761 | $hrefToken = Utils::decodeURIComponent( $href ); |
762 | return $this->normalizedTitleKey( $this->resolveTitle( $hrefToken, true ), true ) !== null; |
763 | } |
764 | |
765 | /** |
766 | * Generate a new uid |
767 | * @return int |
768 | */ |
769 | public function generateUID(): int { |
770 | return $this->uid++; |
771 | } |
772 | |
773 | /** |
774 | * Generate a new annotation uid |
775 | * @return int |
776 | */ |
777 | public function generateAnnotationUID(): int { |
778 | return $this->annUid++; |
779 | } |
780 | |
781 | /** |
782 | * Generate a new annotation id |
783 | * @return string |
784 | */ |
785 | public function newAnnotationId(): string { |
786 | return "mwa" . $this->generateAnnotationUID(); |
787 | } |
788 | |
789 | /** |
790 | * Generate a new about id |
791 | * @return string |
792 | */ |
793 | public function newAboutId(): string { |
794 | return '#mwt' . $this->generateUID(); |
795 | } |
796 | |
797 | /** |
798 | * Store reference to DOM diff document |
799 | * @param Document $doc |
800 | */ |
801 | public function setDOMDiff( $doc ): void { |
802 | $this->domDiff = $doc; |
803 | } |
804 | |
805 | /** |
806 | * Return reference to DOM diff document |
807 | * @return Document|null |
808 | */ |
809 | public function getDOMDiff(): ?Document { |
810 | return $this->domDiff; |
811 | } |
812 | |
813 | /** |
814 | * Generate a new fragment id |
815 | * @return string |
816 | */ |
817 | public function newFragmentId(): string { |
818 | return "mwf" . (string)$this->fid++; |
819 | } |
820 | |
821 | /** |
822 | * When an environment is constructed, we initialize a document (and |
823 | * RemexPipeline) to be used throughout the parse. |
824 | * |
825 | * @param ?Document $topLevelDoc |
826 | */ |
827 | public function setupTopLevelDoc( ?Document $topLevelDoc = null ) { |
828 | if ( $topLevelDoc ) { |
829 | $this->remexPipeline = null; |
830 | $this->topLevelDoc = $topLevelDoc; |
831 | } else { |
832 | $this->remexPipeline = new RemexPipeline( $this ); |
833 | $this->topLevelDoc = $this->remexPipeline->doc; |
834 | } |
835 | DOMDataUtils::prepareDoc( $this->topLevelDoc ); |
836 | } |
837 | |
838 | public function getTopLevelDoc(): Document { |
839 | return $this->topLevelDoc; |
840 | } |
841 | |
842 | /** FIXME: Callers should use ::setupTopLevelDoc instead */ |
843 | public function setTopLevelDoc( Document $doc ): void { |
844 | $this->topLevelDoc = $doc; |
845 | } |
846 | |
847 | public function fetchRemexPipeline( bool $toFragment ): RemexPipeline { |
848 | if ( !$toFragment ) { |
849 | return $this->remexPipeline; |
850 | } else { |
851 | $pipeline = new RemexPipeline( $this ); |
852 | // Attach the top-level bag to the document, for the convenience |
853 | // of code that modifies the data within the RemexHtml TreeBuilder |
854 | // pipeline, prior to the migration of nodes to the top-level |
855 | // document. |
856 | DOMDataUtils::prepareChildDoc( $this->topLevelDoc, $pipeline->doc ); |
857 | return $pipeline; |
858 | } |
859 | } |
860 | |
861 | /** |
862 | * Record a behavior switch. |
863 | * |
864 | * @param string $switch Switch name |
865 | * @param mixed $state Relevant state data to record |
866 | */ |
867 | public function setBehaviorSwitch( string $switch, $state ): void { |
868 | $this->behaviorSwitches[$switch] = $state; |
869 | } |
870 | |
871 | /** |
872 | * Fetch the state of a previously-recorded behavior switch. |
873 | * |
874 | * @param string $switch Switch name |
875 | * @param mixed $default Default value if the switch was never set |
876 | * @return mixed State data that was previously passed to setBehaviorSwitch(), or $default |
877 | */ |
878 | public function getBehaviorSwitch( string $switch, $default = null ) { |
879 | return $this->behaviorSwitches[$switch] ?? $default; |
880 | } |
881 | |
882 | /** |
883 | * @return array<string,DocumentFragment> |
884 | */ |
885 | public function getDOMFragmentMap(): array { |
886 | return $this->fragmentMap; |
887 | } |
888 | |
889 | /** |
890 | * @param string $id Fragment id |
891 | * @return DocumentFragment |
892 | */ |
893 | public function getDOMFragment( string $id ): DocumentFragment { |
894 | return $this->fragmentMap[$id]; |
895 | } |
896 | |
897 | /** |
898 | * @param string $id Fragment id |
899 | * @param DocumentFragment $forest DOM forest |
900 | * to store against the fragment id |
901 | */ |
902 | public function setDOMFragment( |
903 | string $id, DocumentFragment $forest |
904 | ): void { |
905 | $this->fragmentMap[$id] = $forest; |
906 | } |
907 | |
908 | public function removeDOMFragment( string $id ): void { |
909 | $domFragment = $this->fragmentMap[$id]; |
910 | Assert::invariant( |
911 | !$domFragment->hasChildNodes(), 'Fragment should be empty.' |
912 | ); |
913 | unset( $this->fragmentMap[$id] ); |
914 | } |
915 | |
916 | public function getPFragment( string $id ): PFragment { |
917 | return $this->pFragmentMap[$id]; |
918 | } |
919 | |
920 | /** @param array<string,PFragment> $mapping */ |
921 | public function addToPFragmentMap( array $mapping ): void { |
922 | $this->pFragmentMap += $mapping; |
923 | } |
924 | |
925 | /** |
926 | * Record a lint |
927 | * @param string $type Lint type key |
928 | * @param array $lintData Data for the lint. |
929 | * - dsr: (SourceRange) |
930 | * - params: (array) |
931 | * - templateInfo: (array|null) |
932 | */ |
933 | public function recordLint( string $type, array $lintData ): void { |
934 | if ( !$this->linting( $type ) ) { |
935 | return; |
936 | } |
937 | |
938 | if ( empty( $lintData['dsr'] ) ) { |
939 | $this->log( 'error/lint', "Missing DSR; msg=", $lintData ); |
940 | return; |
941 | } |
942 | |
943 | // This will always be recorded as a native 'byte' offset |
944 | $lintData['dsr'] = $lintData['dsr']->toJsonArray(); |
945 | $lintData['params'] ??= []; |
946 | |
947 | $this->lints[] = [ 'type' => $type ] + $lintData; |
948 | } |
949 | |
950 | /** |
951 | * Retrieve recorded lints |
952 | * @return array[] |
953 | */ |
954 | public function getLints(): array { |
955 | return $this->lints; |
956 | } |
957 | |
958 | /** |
959 | * Init lints to the passed array. |
960 | * |
961 | * FIXME: This is currently needed to reset lints after converting |
962 | * DSR offsets because of ordering of DOM passes. So, in reality, |
963 | * there should be no real use case for setting this anywhere else |
964 | * but from that single callsite. |
965 | * |
966 | * @param array $lints |
967 | */ |
968 | public function setLints( array $lints ): void { |
969 | $this->lints = $lints; |
970 | } |
971 | |
972 | /** |
973 | * @param string $prefix |
974 | * @param mixed ...$args |
975 | */ |
976 | public function log( string $prefix, ...$args ): void { |
977 | $this->parsoidLogger->log( $prefix, ...$args ); |
978 | } |
979 | |
980 | /** |
981 | * Bump usage of some limited parser resource |
982 | * (ex: tokens, # transclusions, # list items, etc.) |
983 | * |
984 | * @param string $resource |
985 | * @param int $count How much of the resource is used? |
986 | * @return ?bool Returns `null` if the limit was already reached, `false` when exceeded |
987 | */ |
988 | public function bumpWt2HtmlResourceUse( string $resource, int $count = 1 ): ?bool { |
989 | $n = $this->wt2htmlUsage[$resource] ?? 0; |
990 | if ( !$this->compareWt2HtmlLimit( $resource, $n ) ) { |
991 | return null; |
992 | } |
993 | $n += $count; |
994 | $this->wt2htmlUsage[$resource] = $n; |
995 | return $this->compareWt2HtmlLimit( $resource, $n ); |
996 | } |
997 | |
998 | /** |
999 | * @param string $resource |
1000 | * @param int $n |
1001 | * @return bool Return `false` when exceeded |
1002 | */ |
1003 | public function compareWt2HtmlLimit( string $resource, int $n ): bool { |
1004 | $wt2htmlLimits = $this->siteConfig->getWt2HtmlLimits(); |
1005 | return !( isset( $wt2htmlLimits[$resource] ) && $n > $wt2htmlLimits[$resource] ); |
1006 | } |
1007 | |
1008 | /** |
1009 | * Bump usage of some limited serializer resource |
1010 | * (ex: html size) |
1011 | * |
1012 | * @param string $resource |
1013 | * @param int $count How much of the resource is used? (defaults to 1) |
1014 | * @throws ResourceLimitExceededException |
1015 | */ |
1016 | public function bumpHtml2WtResourceUse( string $resource, int $count = 1 ): void { |
1017 | $n = $this->html2wtUsage[$resource] ?? 0; |
1018 | $n += $count; |
1019 | $this->html2wtUsage[$resource] = $n; |
1020 | $html2wtLimits = $this->siteConfig->getHtml2WtLimits(); |
1021 | if ( |
1022 | isset( $html2wtLimits[$resource] ) && |
1023 | $n > $html2wtLimits[$resource] |
1024 | ) { |
1025 | throw new ResourceLimitExceededException( "html2wt: $resource limit exceeded: $n" ); |
1026 | } |
1027 | } |
1028 | |
1029 | /** |
1030 | * Get an appropriate content handler, given a contentmodel. |
1031 | * |
1032 | * @param ?string &$contentmodel An optional content model which |
1033 | * will override whatever the source specifies. It gets set to the |
1034 | * handler which is used. |
1035 | * @return ContentModelHandler An appropriate content handler |
1036 | */ |
1037 | public function getContentHandler( |
1038 | ?string &$contentmodel = null |
1039 | ): ContentModelHandler { |
1040 | $contentmodel ??= $this->pageConfig->getContentModel(); |
1041 | $handler = $this->siteConfig->getContentModelHandler( $contentmodel ); |
1042 | if ( !$handler && $contentmodel !== 'wikitext' ) { |
1043 | // For now, fallback to 'wikitext' as the default handler |
1044 | // FIXME: This is bogus, but this is just so suppress noise in our |
1045 | // logs till we get around to handling all these other content models. |
1046 | // $this->log( 'warn', "Unknown contentmodel $contentmodel" ); |
1047 | } |
1048 | return $handler ?? $this->wikitextContentModelHandler; |
1049 | } |
1050 | |
1051 | /** |
1052 | * Is the language converter enabled on this page? |
1053 | * |
1054 | * @return bool |
1055 | */ |
1056 | public function langConverterEnabled(): bool { |
1057 | return $this->siteConfig->langConverterEnabledBcp47( |
1058 | $this->pageConfig->getPageLanguageBcp47() |
1059 | ); |
1060 | } |
1061 | |
1062 | /** |
1063 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1064 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1065 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1066 | * @return string A semver version number |
1067 | */ |
1068 | public function getInputContentVersion(): string { |
1069 | return $this->inputContentVersion; |
1070 | } |
1071 | |
1072 | /** |
1073 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1074 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1075 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1076 | * @return string A semver version number |
1077 | */ |
1078 | public function getOutputContentVersion(): string { |
1079 | return $this->outputContentVersion; |
1080 | } |
1081 | |
1082 | /** |
1083 | * If non-null, the language variant used for Parsoid HTML; we convert |
1084 | * to this if wt2html, or from this (if html2wt). |
1085 | * |
1086 | * @return ?Bcp47Code a BCP-47 language code |
1087 | */ |
1088 | public function getHtmlVariantLanguageBcp47(): ?Bcp47Code { |
1089 | return $this->htmlVariantLanguage; // Stored as BCP-47 |
1090 | } |
1091 | |
1092 | /** |
1093 | * If non-null, the language variant to be used for wikitext. If null, |
1094 | * heuristics will be used to identify the original wikitext variant |
1095 | * in wt2html mode, and in html2wt mode new or edited HTML will be left |
1096 | * unconverted. |
1097 | * |
1098 | * @return ?Bcp47Code a BCP-47 language code |
1099 | */ |
1100 | public function getWtVariantLanguageBcp47(): ?Bcp47Code { |
1101 | return $this->wtVariantLanguage; |
1102 | } |
1103 | |
1104 | public function getSkipLanguageConversionPass(): bool { |
1105 | return $this->skipLanguageConversionPass; |
1106 | } |
1107 | |
1108 | /** |
1109 | * Determine appropriate vary headers for the HTML form of this page. |
1110 | * @return string |
1111 | */ |
1112 | public function htmlVary(): string { |
1113 | $varies = [ 'Accept' ]; // varies on Content-Type |
1114 | if ( $this->langConverterEnabled() ) { |
1115 | $varies[] = 'Accept-Language'; |
1116 | } |
1117 | |
1118 | sort( $varies ); |
1119 | return implode( ', ', $varies ); |
1120 | } |
1121 | |
1122 | /** |
1123 | * Determine an appropriate content-language for the HTML form of this page. |
1124 | * @return Bcp47Code a BCP-47 language code. |
1125 | */ |
1126 | public function htmlContentLanguageBcp47(): Bcp47Code { |
1127 | // PageConfig::htmlVariant is set iff we do variant conversion on the |
1128 | // HTML |
1129 | return $this->pageConfig->getVariantBcp47() ?? |
1130 | $this->pageConfig->getPageLanguageBcp47(); |
1131 | } |
1132 | |
1133 | /** |
1134 | * Get an array of attributes to apply to an anchor linking to $url |
1135 | */ |
1136 | public function getExternalLinkAttribs( string $url ): array { |
1137 | $siteConfig = $this->getSiteConfig(); |
1138 | $noFollowConfig = $siteConfig->getNoFollowConfig(); |
1139 | $attribs = []; |
1140 | $ns = $this->getContextTitle()->getNamespace(); |
1141 | if ( |
1142 | $noFollowConfig['nofollow'] && |
1143 | !in_array( $ns, $noFollowConfig['nsexceptions'], true ) && |
1144 | !UrlUtils::matchesDomainList( |
1145 | $url, |
1146 | // Cast to an array because parserTests sets it as a string |
1147 | (array)$noFollowConfig['domainexceptions'] |
1148 | ) |
1149 | ) { |
1150 | $attribs['rel'] = [ 'nofollow' ]; |
1151 | } |
1152 | $target = $siteConfig->getExternalLinkTarget(); |
1153 | if ( $target ) { |
1154 | $attribs['target'] = $target; |
1155 | if ( !in_array( $target, [ '_self', '_parent', '_top' ], true ) ) { |
1156 | // T133507. New windows can navigate parent cross-origin. |
1157 | // Including noreferrer due to lacking browser |
1158 | // support of noopener. Eventually noreferrer should be removed. |
1159 | if ( !isset( $attribs['rel'] ) ) { |
1160 | $attribs['rel'] = []; |
1161 | } |
1162 | array_push( $attribs['rel'], 'noreferrer', 'noopener' ); |
1163 | } |
1164 | } |
1165 | return $attribs; |
1166 | } |
1167 | |
1168 | /** |
1169 | * @return array |
1170 | */ |
1171 | public function getLinterConfig(): array { |
1172 | return $this->linterOverrides + $this->getSiteConfig()->getLinterSiteConfig(); |
1173 | } |
1174 | |
1175 | /** |
1176 | * Whether to enable linter Backend. |
1177 | * Consults the allow list and block list from ::getLinterConfig(). |
1178 | * |
1179 | * @param null $type If $type is null or omitted, returns true if *any* linting |
1180 | * type is enabled; otherwise returns true only if the specified |
1181 | * linting type is enabled. |
1182 | * @return bool If $type is null or omitted, returns true if *any* linting |
1183 | * type is enabled; otherwise returns true only if the specified |
1184 | * linting type is enabled. |
1185 | */ |
1186 | public function linting( ?string $type = null ) { |
1187 | if ( !$this->getSiteConfig()->linterEnabled() ) { |
1188 | return false; |
1189 | } |
1190 | $lintConfig = $this->getLinterConfig(); |
1191 | // Allow list |
1192 | $allowList = $lintConfig['enabled'] ?? null; |
1193 | if ( is_array( $allowList ) ) { |
1194 | if ( $type === null ) { |
1195 | return count( $allowList ) > 0; |
1196 | } |
1197 | return in_array( $type, $allowList, true ); |
1198 | } |
1199 | // Block list |
1200 | if ( $type === null ) { |
1201 | return true; |
1202 | } |
1203 | $blockList = $lintConfig['disabled'] ?? null; |
1204 | if ( is_array( $blockList ) ) { |
1205 | return !in_array( $type, $blockList, true ); |
1206 | } |
1207 | // No specific configuration |
1208 | return true; |
1209 | } |
1210 | } |