Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
37.18% |
103 / 277 |
|
7.25% |
5 / 69 |
CRAP | |
0.00% |
0 / 1 |
Env | |
37.18% |
103 / 277 |
|
7.25% |
5 / 69 |
3689.20 | |
0.00% |
0 / 1 |
__construct | |
88.00% |
44 / 50 |
|
0.00% |
0 / 1 |
6.06 | |||
checkPlatform | |
12.50% |
2 / 16 |
|
0.00% |
0 / 1 |
9.03 | |||
profiling | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
pushNewProfile | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
popProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlag | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasDumpFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasDumpFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
writeDump | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSiteConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPageConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDataAccess | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTOCData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nativeTemplateExpansionEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWrapSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPipelineFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRequestOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getContextTitle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
resolveTitle | |
96.67% |
29 / 30 |
|
0.00% |
0 / 1 |
15 | |||
normalizedTitleKey | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
makeTitle | |
37.50% |
3 / 8 |
|
0.00% |
0 / 1 |
7.91 | |||
makeTitleFromText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeTitleFromURLDecodedStr | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeLink | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
isValidLinkTarget | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
generateUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
generateAnnotationUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAnnotationId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAboutId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newFragmentId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setupTopLevelDoc | |
66.67% |
12 / 18 |
|
0.00% |
0 / 1 |
2.15 | |||
getTopLevelDoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fetchRemexPipeline | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
setBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragmentMap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
removeDOMFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getPFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addToPFragmentMap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
recordLint | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
getLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
log | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
bumpWt2HtmlResourceUse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
compareWt2HtmlLimit | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
bumpHtml2WtResourceUse | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getContentHandler | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
langConverterEnabled | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getInputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOutputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtmlVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWtVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSkipLanguageConversionPass | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
htmlVary | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
htmlContentLanguageBcp47 | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExternalLinkAttribs | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
56 | |||
getLinterConfig | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
linting | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Config; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Bcp47Code\Bcp47Code; |
8 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
9 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
10 | use Wikimedia\Parsoid\Core\DomPageBundle; |
11 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
12 | use Wikimedia\Parsoid\Core\Sanitizer; |
13 | use Wikimedia\Parsoid\Core\TOCData; |
14 | use Wikimedia\Parsoid\DOM\Document; |
15 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
16 | use Wikimedia\Parsoid\Fragments\PFragment; |
17 | use Wikimedia\Parsoid\Logger\ParsoidLogger; |
18 | use Wikimedia\Parsoid\Parsoid; |
19 | use Wikimedia\Parsoid\Tokens\Token; |
20 | use Wikimedia\Parsoid\Utils\DOMCompat; |
21 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
22 | use Wikimedia\Parsoid\Utils\PHPUtils; |
23 | use Wikimedia\Parsoid\Utils\Title; |
24 | use Wikimedia\Parsoid\Utils\TitleException; |
25 | use Wikimedia\Parsoid\Utils\TokenUtils; |
26 | use Wikimedia\Parsoid\Utils\UrlUtils; |
27 | use Wikimedia\Parsoid\Utils\Utils; |
28 | use Wikimedia\Parsoid\Wikitext\ContentModelHandler as WikitextContentModelHandler; |
29 | use Wikimedia\Parsoid\Wt2Html\Frame; |
30 | use Wikimedia\Parsoid\Wt2Html\PageConfigFrame; |
31 | use Wikimedia\Parsoid\Wt2Html\ParserPipelineFactory; |
32 | use Wikimedia\Parsoid\Wt2Html\TreeBuilder\RemexPipeline; |
33 | |
34 | /** |
35 | * Environment/Envelope class for Parsoid |
36 | * |
37 | * Carries around the SiteConfig and PageConfig during an operation |
38 | * and provides certain other services. |
39 | */ |
40 | class Env { |
41 | |
42 | /** @var SiteConfig */ |
43 | private $siteConfig; |
44 | |
45 | /** @var PageConfig */ |
46 | private $pageConfig; |
47 | |
48 | /** @var DataAccess */ |
49 | private $dataAccess; |
50 | |
51 | /** @var ContentMetadataCollector */ |
52 | private $metadata; |
53 | |
54 | /** @var TOCData Table of Contents metadata for the article */ |
55 | private $tocData; |
56 | |
57 | /** |
58 | * The top-level frame for this conversion. This largely wraps the |
59 | * PageConfig. |
60 | * |
61 | * In the future we may replace PageConfig with the Frame, and add |
62 | * a |
63 | * @var Frame |
64 | */ |
65 | public $topFrame; |
66 | // XXX In the future, perhaps replace PageConfig with the Frame, and |
67 | // add $this->currentFrame (relocated from TokenHandlerPipeline) if/when |
68 | // we've removed async parsing. |
69 | |
70 | /** |
71 | * @var bool Are we using native template expansion? |
72 | * |
73 | * Parsoid implements native template expansion, which is currently |
74 | * only used during parser tests; in production, template expansion |
75 | * is done via MediaWiki's legacy preprocessor. |
76 | * |
77 | * FIXME: Hopefully this distinction can be removed when we're entirely |
78 | * in PHP land. |
79 | */ |
80 | private $nativeTemplateExpansion; |
81 | |
82 | /** @var array<string,int> */ |
83 | private $wt2htmlUsage = []; |
84 | |
85 | /** @var array<string,int> */ |
86 | private $html2wtUsage = []; |
87 | |
88 | /** @var bool */ |
89 | private $profiling = false; |
90 | |
91 | /** @var array<Profile> */ |
92 | private $profileStack = []; |
93 | |
94 | /** @var bool */ |
95 | private $wrapSections; |
96 | |
97 | /** @var ('byte'|'ucs2'|'char') */ |
98 | private $requestOffsetType = 'byte'; |
99 | |
100 | /** @var ('byte'|'ucs2'|'char') */ |
101 | private $currentOffsetType = 'byte'; |
102 | |
103 | /** @var bool */ |
104 | private $skipLanguageConversionPass = false; |
105 | |
106 | /** @var array<string,mixed> */ |
107 | private $behaviorSwitches = []; |
108 | |
109 | /** |
110 | * Maps fragment id to the fragment forest (array of Nodes). |
111 | * @var array<string,DocumentFragment> |
112 | */ |
113 | private $fragmentMap = []; |
114 | |
115 | /** |
116 | * Maps pfragment id to a PFragment. |
117 | * @var array<string,PFragment> |
118 | */ |
119 | private array $pFragmentMap = []; |
120 | |
121 | /** |
122 | * @var int used to generate fragment ids as needed during parse |
123 | */ |
124 | private $fid = 1; |
125 | |
126 | /** @var int used to generate uids as needed during this parse */ |
127 | private $uid = 1; |
128 | |
129 | /** @var int used to generate annotation uids as needed during this parse */ |
130 | private $annUid = 0; |
131 | |
132 | /** @var array[] Lints recorded */ |
133 | private $lints = []; |
134 | |
135 | /** @var bool logLinterData */ |
136 | public $logLinterData = false; |
137 | |
138 | /** @var array linterOverrides */ |
139 | private $linterOverrides = []; |
140 | |
141 | /** @var bool[] */ |
142 | private $traceFlags; |
143 | |
144 | /** @var bool[] */ |
145 | private $dumpFlags; |
146 | |
147 | /** @var bool[] */ |
148 | private $debugFlags; |
149 | |
150 | /** @var ParsoidLogger */ |
151 | private $parsoidLogger; |
152 | |
153 | /** |
154 | * The default content version that Parsoid assumes it's serializing or |
155 | * updating in the pb2pb endpoints |
156 | * |
157 | * @var string |
158 | */ |
159 | private $inputContentVersion; |
160 | |
161 | /** |
162 | * The default content version that Parsoid will generate. |
163 | * |
164 | * @var string |
165 | */ |
166 | private $outputContentVersion; |
167 | |
168 | /** |
169 | * If non-null, the language variant used for Parsoid HTML; |
170 | * we convert to this if wt2html, or from this if html2wt. |
171 | * @var ?Bcp47Code |
172 | */ |
173 | private $htmlVariantLanguage; |
174 | |
175 | /** |
176 | * If non-null, the language variant to be used for wikitext. |
177 | * If null, heuristics will be used to identify the original wikitext variant |
178 | * in wt2html mode, and in html2wt mode new or edited HTML will be left unconverted. |
179 | * @var ?Bcp47Code |
180 | */ |
181 | private $wtVariantLanguage; |
182 | |
183 | /** @var ParserPipelineFactory */ |
184 | private $pipelineFactory; |
185 | |
186 | /** |
187 | * FIXME Used in DedupeStyles::dedupe() |
188 | * @var array |
189 | */ |
190 | public $styleTagKeys = []; |
191 | |
192 | /** |
193 | * The DomPageBundle holding the JSON data for data-parsoid and data-mw |
194 | * attributes, or `null` if these are to be encoded as inline HTML |
195 | * attributes. |
196 | */ |
197 | public ?DomPageBundle $pageBundle = null; |
198 | |
199 | /** @var Document */ |
200 | private $domDiff; |
201 | |
202 | /** @var bool */ |
203 | public $hasAnnotations; |
204 | |
205 | /** |
206 | * Cache of wikitext source for a title; only used for ParserTests. |
207 | * @var array |
208 | */ |
209 | public $pageCache = []; |
210 | |
211 | /** |
212 | * The current top-level document. During wt2html, this will be the document |
213 | * associated with the RemexPipeline. During html2wt, this will be the |
214 | * input document, typically passed as a constructor option. |
215 | * |
216 | * This document should be prepared and loaded; see |
217 | * ContentUtils::createAndLoadDocument(). |
218 | * |
219 | * @var Document |
220 | */ |
221 | private $topLevelDoc; |
222 | |
223 | /** |
224 | * The RemexPipeline used during a wt2html operation. |
225 | * |
226 | * @var RemexPipeline|null |
227 | */ |
228 | private $remexPipeline; |
229 | |
230 | /** |
231 | * @var WikitextContentModelHandler |
232 | */ |
233 | private $wikitextContentModelHandler; |
234 | |
235 | private ?Title $cachedContextTitle = null; |
236 | |
237 | /** |
238 | * @param SiteConfig $siteConfig |
239 | * @param PageConfig $pageConfig |
240 | * @param DataAccess $dataAccess |
241 | * @param ContentMetadataCollector $metadata |
242 | * @param ?array $options |
243 | * - wrapSections: (bool) Whether `<section>` wrappers should be added. |
244 | * - pageBundle: (bool) When true, sets ids on nodes and stores |
245 | * data-* attributes in a JSON blob in Env::$pageBundle |
246 | * - traceFlags: (array) Flags indicating which components need to be traced |
247 | * - dumpFlags: (bool[]) Dump flags |
248 | * - debugFlags: (bool[]) Debug flags |
249 | * - nativeTemplateExpansion: boolean |
250 | * - offsetType: 'byte' (default), 'ucs2', 'char' |
251 | * See `Parsoid\Wt2Html\DOM\Processors\ConvertOffsets`. |
252 | * - logLinterData: (bool) Should we log linter data if linting is enabled? |
253 | * - linterOverrides: (array) Override the site linting configs. |
254 | * - skipLanguageConversionPass: (bool) Should we skip the language |
255 | * conversion pass? (defaults to false) |
256 | * - htmlVariantLanguage: Bcp47Code|null |
257 | * If non-null, the language variant used for Parsoid HTML |
258 | * as a BCP 47 object. |
259 | * We convert to this if wt2html, or from this if html2wt. |
260 | * - wtVariantLanguage: Bcp47Code|null |
261 | * If non-null, the language variant to be used for wikitext |
262 | * as a BCP 47 object. |
263 | * If null, heuristics will be used to identify the original |
264 | * wikitext variant in wt2html mode, and in html2wt mode new |
265 | * or edited HTML will be left unconverted. |
266 | * - logLevels: (string[]) Levels to log |
267 | * - topLevelDoc: Document Set explicitly |
268 | * when serializing otherwise it gets initialized for parsing. |
269 | * This should be a "prepared & loaded" document. |
270 | */ |
271 | public function __construct( |
272 | SiteConfig $siteConfig, |
273 | PageConfig $pageConfig, |
274 | DataAccess $dataAccess, |
275 | ContentMetadataCollector $metadata, |
276 | ?array $options = null |
277 | ) { |
278 | self::checkPlatform(); |
279 | $options ??= []; |
280 | $this->siteConfig = $siteConfig; |
281 | $this->pageConfig = $pageConfig; |
282 | $this->dataAccess = $dataAccess; |
283 | $this->metadata = $metadata; |
284 | $this->tocData = new TOCData(); |
285 | $this->topFrame = new PageConfigFrame( $this, $pageConfig, $siteConfig ); |
286 | $this->wrapSections = (bool)( $options['wrapSections'] ?? true ); |
287 | $this->pipelineFactory = new ParserPipelineFactory( $this ); |
288 | $defaultContentVersion = Parsoid::defaultHTMLVersion(); |
289 | $this->inputContentVersion = $options['inputContentVersion'] ?? $defaultContentVersion; |
290 | // FIXME: We should have a check for the supported input content versions as well. |
291 | // That will require a separate constant. |
292 | $this->outputContentVersion = $options['outputContentVersion'] ?? $defaultContentVersion; |
293 | if ( !in_array( $this->outputContentVersion, Parsoid::AVAILABLE_VERSIONS, true ) ) { |
294 | throw new \UnexpectedValueException( |
295 | $this->outputContentVersion . ' is not an available content version.' ); |
296 | } |
297 | $this->skipLanguageConversionPass = |
298 | $options['skipLanguageConversionPass'] ?? false; |
299 | $this->htmlVariantLanguage = !empty( $options['htmlVariantLanguage'] ) ? |
300 | Utils::mwCodeToBcp47( |
301 | $options['htmlVariantLanguage'], |
302 | // Be strict in what we accept here. |
303 | true, $this->siteConfig->getLogger() |
304 | ) : null; |
305 | $this->wtVariantLanguage = !empty( $options['wtVariantLanguage'] ) ? |
306 | Utils::mwCodeToBcp47( |
307 | $options['wtVariantLanguage'], |
308 | // Be strict in what we accept here. |
309 | true, $this->siteConfig->getLogger() |
310 | ) : null; |
311 | $this->nativeTemplateExpansion = !empty( $options['nativeTemplateExpansion'] ); |
312 | $this->requestOffsetType = $options['offsetType'] ?? 'byte'; |
313 | $this->logLinterData = !empty( $options['logLinterData'] ); |
314 | $this->linterOverrides = $options['linterOverrides'] ?? []; |
315 | $this->traceFlags = $options['traceFlags'] ?? []; |
316 | $this->dumpFlags = $options['dumpFlags'] ?? []; |
317 | $this->debugFlags = $options['debugFlags'] ?? []; |
318 | $this->parsoidLogger = new ParsoidLogger( $this->siteConfig->getLogger(), [ |
319 | 'logLevels' => $options['logLevels'] ?? [ 'fatal', 'error', 'warn', 'info' ], |
320 | 'debugFlags' => $this->debugFlags, |
321 | 'dumpFlags' => $this->dumpFlags, |
322 | 'traceFlags' => $this->traceFlags |
323 | ] ); |
324 | if ( $this->hasTraceFlag( 'time' ) ) { |
325 | $this->profiling = true; |
326 | } |
327 | $this->setupTopLevelDoc( $options['topLevelDoc'] ?? null ); |
328 | if ( $options['pageBundle'] ?? false ) { |
329 | $this->pageBundle = DomPageBundle::newEmpty( |
330 | $this->topLevelDoc |
331 | ); |
332 | } |
333 | // NOTE: |
334 | // Don't try to do this in setupTopLevelDoc since it is called on existing Env objects |
335 | // in a couple of places. That then leads to a multiple-write to tocdata property on |
336 | // the metadata object. |
337 | // |
338 | // setupTopLevelDoc is called outside Env in these couple cases: |
339 | // 1. html2wt in ContentModelHandler for dealing with |
340 | // missing original HTML. |
341 | // 2. ParserTestRunner's html2html tests |
342 | // |
343 | // That is done to either reuse an existing Env object (as in 1.) |
344 | // OR to refresh the attached DOC (html2html as in 2.). |
345 | // Constructing a new Env in both cases could eliminate this issue. |
346 | $this->metadata->setTOCData( $this->tocData ); |
347 | |
348 | $this->wikitextContentModelHandler = new WikitextContentModelHandler( $this ); |
349 | } |
350 | |
351 | /** |
352 | * Check to see if the PHP platform is sensible |
353 | */ |
354 | private static function checkPlatform() { |
355 | static $checked; |
356 | if ( !$checked ) { |
357 | $highBytes = |
358 | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" . |
359 | "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" . |
360 | "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" . |
361 | "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" . |
362 | "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" . |
363 | "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" . |
364 | "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" . |
365 | "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
366 | if ( strtolower( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' . $highBytes ) |
367 | !== 'abcdefghijklmnopqrstuvwxyz' . $highBytes |
368 | ) { |
369 | throw new \RuntimeException( 'strtolower() doesn\'t work -- ' . |
370 | 'please set the locale to C or a UTF-8 variant such as C.UTF-8' ); |
371 | } |
372 | $checked = true; |
373 | } |
374 | } |
375 | |
376 | /** |
377 | * Is profiling enabled? |
378 | * @return bool |
379 | */ |
380 | public function profiling(): bool { |
381 | return $this->profiling; |
382 | } |
383 | |
384 | /** |
385 | * Get the profile at the top of the stack |
386 | * |
387 | * FIXME: This implicitly assumes sequential in-order processing |
388 | * This wouldn't have worked in Parsoid/JS and may not work in the future |
389 | * depending on how / if we restructure the pipeline for concurrency, etc. |
390 | * |
391 | * @return Profile |
392 | */ |
393 | public function getCurrentProfile(): Profile { |
394 | return PHPUtils::lastItem( $this->profileStack ); |
395 | } |
396 | |
397 | /** |
398 | * New pipeline started. Push profile. |
399 | * @return Profile |
400 | */ |
401 | public function pushNewProfile(): Profile { |
402 | $currProfile = count( $this->profileStack ) > 0 ? $this->getCurrentProfile() : null; |
403 | $profile = new Profile(); |
404 | $this->profileStack[] = $profile; |
405 | if ( $currProfile !== null ) { |
406 | $currProfile->pushNestedProfile( $profile ); |
407 | } |
408 | return $profile; |
409 | } |
410 | |
411 | /** |
412 | * Pipeline ended. Pop profile. |
413 | * @return Profile |
414 | */ |
415 | public function popProfile(): Profile { |
416 | return array_pop( $this->profileStack ); |
417 | } |
418 | |
419 | public function hasTraceFlags(): bool { |
420 | return !empty( $this->traceFlags ); |
421 | } |
422 | |
423 | /** |
424 | * Test which trace information to log |
425 | * |
426 | * @param string $flag Flag name. |
427 | * @return bool |
428 | */ |
429 | public function hasTraceFlag( string $flag ): bool { |
430 | return isset( $this->traceFlags[$flag] ); |
431 | } |
432 | |
433 | public function hasDumpFlags(): bool { |
434 | return !empty( $this->dumpFlags ); |
435 | } |
436 | |
437 | /** |
438 | * Test which state to dump |
439 | * |
440 | * @param string $flag Flag name. |
441 | * @return bool |
442 | */ |
443 | public function hasDumpFlag( string $flag ): bool { |
444 | return isset( $this->dumpFlags[$flag] ); |
445 | } |
446 | |
447 | /** |
448 | * Write out a string (because it was requested by dumpFlags) |
449 | * @param string $str |
450 | */ |
451 | public function writeDump( string $str ) { |
452 | $this->log( 'dump', $str ); |
453 | } |
454 | |
455 | /** |
456 | * Get the site config |
457 | * @return SiteConfig |
458 | */ |
459 | public function getSiteConfig(): SiteConfig { |
460 | return $this->siteConfig; |
461 | } |
462 | |
463 | /** |
464 | * Get the page config |
465 | * @return PageConfig |
466 | */ |
467 | public function getPageConfig(): PageConfig { |
468 | return $this->pageConfig; |
469 | } |
470 | |
471 | /** |
472 | * Get the data access object |
473 | * @return DataAccess |
474 | */ |
475 | public function getDataAccess(): DataAccess { |
476 | return $this->dataAccess; |
477 | } |
478 | |
479 | /** |
480 | * Return the ContentMetadataCollector. |
481 | * @return ContentMetadataCollector |
482 | */ |
483 | public function getMetadata(): ContentMetadataCollector { |
484 | return $this->metadata; |
485 | } |
486 | |
487 | /** |
488 | * Return the Table of Contents information for the article. |
489 | * @return TOCData |
490 | */ |
491 | public function getTOCData(): TOCData { |
492 | return $this->tocData; |
493 | } |
494 | |
495 | public function nativeTemplateExpansionEnabled(): bool { |
496 | return $this->nativeTemplateExpansion; |
497 | } |
498 | |
499 | /** |
500 | * Get the current uid counter value |
501 | * @return int |
502 | */ |
503 | public function getUID(): int { |
504 | return $this->uid; |
505 | } |
506 | |
507 | /** |
508 | * Get the current fragment id counter value |
509 | * @return int |
510 | */ |
511 | public function getFID(): int { |
512 | return $this->fid; |
513 | } |
514 | |
515 | /** |
516 | * Whether `<section>` wrappers should be added. |
517 | * @todo Does this actually belong here? Should it be a behavior switch? |
518 | * @return bool |
519 | */ |
520 | public function getWrapSections(): bool { |
521 | return $this->wrapSections; |
522 | } |
523 | |
524 | /** |
525 | * Get the pipeline factory. |
526 | * @return ParserPipelineFactory |
527 | */ |
528 | public function getPipelineFactory(): ParserPipelineFactory { |
529 | return $this->pipelineFactory; |
530 | } |
531 | |
532 | /** |
533 | * Return the external format of character offsets in source ranges. |
534 | * Internally we always keep DomSourceRange and SourceRange information |
535 | * as UTF-8 byte offsets for efficiency (matches the native string |
536 | * representation), but for external use we can convert these to |
537 | * other formats when we output wt2html or input for html2wt. |
538 | * |
539 | * @see Parsoid\Wt2Html\DOM\Processors\ConvertOffsets |
540 | * @return ('byte'|'ucs2'|'char') |
541 | */ |
542 | public function getRequestOffsetType(): string { |
543 | return $this->requestOffsetType; |
544 | } |
545 | |
546 | /** |
547 | * Return the current format of character offsets in source ranges. |
548 | * This allows us to track whether the internal byte offsets have |
549 | * been converted to the external format (as returned by |
550 | * `getRequestOffsetType`) yet. |
551 | * |
552 | * @see Parsoid\Wt2Html\DOM\Processors\ConvertOffsets |
553 | * @return ('byte'|'ucs2'|'char') |
554 | */ |
555 | public function getCurrentOffsetType(): string { |
556 | return $this->currentOffsetType; |
557 | } |
558 | |
559 | /** |
560 | * Update the current offset type. Only |
561 | * Parsoid\Wt2Html\DOM\Processors\ConvertOffsets should be doing this. |
562 | * @param ('byte'|'ucs2'|'char') $offsetType 'byte', 'ucs2', or 'char' |
563 | */ |
564 | public function setCurrentOffsetType( string $offsetType ) { |
565 | $this->currentOffsetType = $offsetType; |
566 | } |
567 | |
568 | /** |
569 | * Return the title from the PageConfig, as a Parsoid title. |
570 | * @return Title |
571 | */ |
572 | public function getContextTitle(): Title { |
573 | if ( $this->cachedContextTitle === null ) { |
574 | $this->cachedContextTitle = Title::newFromLinkTarget( |
575 | $this->pageConfig->getLinkTarget(), $this->siteConfig |
576 | ); |
577 | } |
578 | return $this->cachedContextTitle; |
579 | } |
580 | |
581 | /** |
582 | * Resolve strings that are page-fragments or subpage references with |
583 | * respect to the current page name. |
584 | * |
585 | * @param string $str Page fragment or subpage reference. Not URL encoded. |
586 | * @param bool $resolveOnly If true, only trim and add the current title to |
587 | * lone fragments. TODO: This parameter seems poorly named. |
588 | * @return string Resolved title |
589 | */ |
590 | public function resolveTitle( string $str, bool $resolveOnly = false ): string { |
591 | $origName = $str; |
592 | $str = trim( $str ); |
593 | |
594 | $pageConfig = $this->getPageConfig(); |
595 | $title = $this->getContextTitle(); |
596 | |
597 | // Resolve lonely fragments (important if the current page is a subpage, |
598 | // otherwise the relative link will be wrong) |
599 | if ( $str !== '' && $str[0] === '#' ) { |
600 | return $title->getPrefixedText() . $str; |
601 | } |
602 | |
603 | // Default return value |
604 | $titleKey = $str; |
605 | if ( $this->getSiteConfig()->namespaceHasSubpages( $title->getNamespace() ) ) { |
606 | // Resolve subpages |
607 | $reNormalize = false; |
608 | if ( preg_match( '!^(?:\.\./)+!', $str, $relUp ) ) { |
609 | $levels = strlen( $relUp[0] ) / 3; // Levels are indicated by '../'. |
610 | $titleBits = explode( '/', $title->getPrefixedText() ); |
611 | if ( $titleBits[0] === '' ) { |
612 | // FIXME: Punt on subpages of titles starting with "/" for now |
613 | return $origName; |
614 | } |
615 | if ( count( $titleBits ) <= $levels ) { |
616 | // Too many levels -- invalid relative link |
617 | return $origName; |
618 | } |
619 | $newBits = array_slice( $titleBits, 0, -$levels ); |
620 | if ( $str !== $relUp[0] ) { |
621 | $newBits[] = substr( $str, $levels * 3 ); |
622 | } |
623 | $str = implode( '/', $newBits ); |
624 | $reNormalize = true; |
625 | } elseif ( $str !== '' && $str[0] === '/' ) { |
626 | // Resolve absolute subpage links |
627 | $str = $title->getPrefixedText() . $str; |
628 | $reNormalize = true; |
629 | } |
630 | |
631 | if ( $reNormalize && !$resolveOnly ) { |
632 | // Remove final slashes if present. |
633 | // See https://gerrit.wikimedia.org/r/173431 |
634 | $str = rtrim( $str, '/' ); |
635 | $titleKey = (string)$this->normalizedTitleKey( $str ); |
636 | } |
637 | } |
638 | |
639 | // Strip leading ':' |
640 | if ( $titleKey !== '' && $titleKey[0] === ':' && !$resolveOnly ) { |
641 | $titleKey = substr( $titleKey, 1 ); |
642 | } |
643 | return $titleKey; |
644 | } |
645 | |
646 | /** |
647 | * Get normalized title key for a title string. |
648 | * |
649 | * @param string $str Should be in url-decoded format. |
650 | * @param bool $noExceptions Return null instead of throwing exceptions. |
651 | * @param bool $ignoreFragment Ignore the fragment, if any. |
652 | * @return string|null Normalized title key for a title string (or null for invalid titles). |
653 | */ |
654 | public function normalizedTitleKey( |
655 | string $str, bool $noExceptions = false, bool $ignoreFragment = false |
656 | ): ?string { |
657 | $title = $this->makeTitleFromURLDecodedStr( $str, 0, $noExceptions ); |
658 | if ( !$title ) { |
659 | return null; |
660 | } |
661 | return $ignoreFragment ? |
662 | $title->getPrefixedDBKey() : |
663 | $title->getFullDBKey(); |
664 | } |
665 | |
666 | /** |
667 | * Create a Title object |
668 | * @param string $text URL-decoded text |
669 | * @param ?int $defaultNs |
670 | * @param bool $noExceptions |
671 | * @return Title|null |
672 | */ |
673 | private function makeTitle( string $text, ?int $defaultNs = null, bool $noExceptions = false ): ?Title { |
674 | try { |
675 | if ( preg_match( '!^(?:[#/]|\.\./)!', $text ) ) { |
676 | $defaultNs = $this->getContextTitle()->getNamespace(); |
677 | } |
678 | $text = $this->resolveTitle( $text ); |
679 | return Title::newFromText( $text, $this->getSiteConfig(), $defaultNs ); |
680 | } catch ( TitleException $e ) { |
681 | if ( $noExceptions ) { |
682 | return null; |
683 | } |
684 | throw $e; |
685 | } |
686 | } |
687 | |
688 | /** |
689 | * Create a Title object |
690 | * @see Title::newFromURL in MediaWiki |
691 | * @param string $str URL-encoded text |
692 | * @param ?int $defaultNs |
693 | * @param bool $noExceptions |
694 | * @return Title|null |
695 | */ |
696 | public function makeTitleFromText( |
697 | string $str, ?int $defaultNs = null, bool $noExceptions = false |
698 | ): ?Title { |
699 | return $this->makeTitle( Utils::decodeURIComponent( $str ), $defaultNs, $noExceptions ); |
700 | } |
701 | |
702 | /** |
703 | * Create a Title object |
704 | * @see Title::newFromText in MediaWiki |
705 | * @param string $str URL-decoded text |
706 | * @param ?int $defaultNs |
707 | * @param bool $noExceptions |
708 | * @return Title|null |
709 | */ |
710 | public function makeTitleFromURLDecodedStr( |
711 | string $str, ?int $defaultNs = null, bool $noExceptions = false |
712 | ): ?Title { |
713 | return $this->makeTitle( $str, $defaultNs, $noExceptions ); |
714 | } |
715 | |
716 | /** |
717 | * Make a link to a local Title |
718 | * @param Title $title |
719 | * @return string |
720 | */ |
721 | public function makeLink( Title $title ): string { |
722 | // T380676: This method *should* be used only for local titles, |
723 | // (ie $title->getInterwiki() should be '') but apparently we |
724 | // are using it for interwiki/interlanguage links as well. |
725 | return $this->getSiteConfig()->relativeLinkPrefix() . Sanitizer::sanitizeTitleURI( |
726 | $title->getFullDBKey(), |
727 | false |
728 | ); |
729 | } |
730 | |
731 | /** |
732 | * Test if an href attribute value could be a valid link target |
733 | * @param string|(Token|string)[] $href |
734 | * @return bool |
735 | */ |
736 | public function isValidLinkTarget( $href ): bool { |
737 | $href = TokenUtils::tokensToString( $href ); |
738 | |
739 | // decode percent-encoding so that we can reliably detect |
740 | // bad page title characters |
741 | $hrefToken = Utils::decodeURIComponent( $href ); |
742 | return $this->normalizedTitleKey( $this->resolveTitle( $hrefToken, true ), true ) !== null; |
743 | } |
744 | |
745 | /** |
746 | * Generate a new uid |
747 | * @return int |
748 | */ |
749 | public function generateUID(): int { |
750 | return $this->uid++; |
751 | } |
752 | |
753 | /** |
754 | * Generate a new annotation uid |
755 | * @return int |
756 | */ |
757 | public function generateAnnotationUID(): int { |
758 | return $this->annUid++; |
759 | } |
760 | |
761 | /** |
762 | * Generate a new annotation id |
763 | * @return string |
764 | */ |
765 | public function newAnnotationId(): string { |
766 | return "mwa" . $this->generateAnnotationUID(); |
767 | } |
768 | |
769 | /** |
770 | * Generate a new about id |
771 | * @return string |
772 | */ |
773 | public function newAboutId(): string { |
774 | return '#mwt' . $this->generateUID(); |
775 | } |
776 | |
777 | /** |
778 | * Store reference to DOM diff document |
779 | * @param Document $doc |
780 | */ |
781 | public function setDOMDiff( $doc ): void { |
782 | $this->domDiff = $doc; |
783 | } |
784 | |
785 | /** |
786 | * Return reference to DOM diff document |
787 | * @return Document|null |
788 | */ |
789 | public function getDOMDiff(): ?Document { |
790 | return $this->domDiff; |
791 | } |
792 | |
793 | /** |
794 | * Generate a new fragment id |
795 | * @return string |
796 | */ |
797 | public function newFragmentId(): string { |
798 | return "mwf" . (string)$this->fid++; |
799 | } |
800 | |
801 | /** |
802 | * When an environment is constructed, we initialize a document (and |
803 | * RemexPipeline) to be used throughout the parse. |
804 | * |
805 | * @param ?Document $topLevelDoc if non-null, |
806 | * the document should be prepared and loaded. |
807 | */ |
808 | public function setupTopLevelDoc( ?Document $topLevelDoc = null ): void { |
809 | if ( $topLevelDoc ) { |
810 | $this->remexPipeline = null; |
811 | // This is a prepared & loaded Document. |
812 | Assert::invariant( |
813 | DOMDataUtils::isPreparedAndLoaded( $topLevelDoc ), |
814 | "toplevelDoc should be prepared and loaded already" |
815 | ); |
816 | $this->topLevelDoc = $topLevelDoc; |
817 | } else { |
818 | $this->remexPipeline = new RemexPipeline( $this ); |
819 | $this->topLevelDoc = $this->remexPipeline->doc; |
820 | // Prepare and load. |
821 | // (Loading should be easy since the doc is expected to be empty.) |
822 | $options = [ |
823 | 'validateXMLNames' => true, |
824 | // Don't mark the <body> tag as new! |
825 | 'markNew' => false, |
826 | ]; |
827 | DOMDataUtils::prepareDoc( $this->topLevelDoc ); |
828 | DOMDataUtils::visitAndLoadDataAttribs( |
829 | DOMCompat::getBody( $this->topLevelDoc ), $options |
830 | ); |
831 | // Mark the document as loaded so we can try to catch errors which |
832 | // might try to reload this again later. |
833 | DOMDataUtils::getBag( $this->topLevelDoc )->loaded = true; |
834 | } |
835 | } |
836 | |
837 | /** |
838 | * Return the current top-level document. During wt2html, this |
839 | * will be the document associated with the RemexPipeline. During |
840 | * html2wt, this will be the input document, typically passed as a |
841 | * constructor option. |
842 | * |
843 | * This document will be prepared and loaded; see |
844 | * ContentUtils::createAndLoadDocument(). |
845 | */ |
846 | public function getTopLevelDoc(): Document { |
847 | return $this->topLevelDoc; |
848 | } |
849 | |
850 | public function fetchRemexPipeline( bool $toFragment ): RemexPipeline { |
851 | if ( !$toFragment ) { |
852 | return $this->remexPipeline; |
853 | } else { |
854 | $pipeline = new RemexPipeline( $this ); |
855 | // Attach the top-level bag to the document, for the convenience |
856 | // of code that modifies the data within the RemexHtml TreeBuilder |
857 | // pipeline, prior to the migration of nodes to the top-level |
858 | // document. |
859 | DOMDataUtils::prepareChildDoc( $this->topLevelDoc, $pipeline->doc ); |
860 | return $pipeline; |
861 | } |
862 | } |
863 | |
864 | /** |
865 | * Record a behavior switch. |
866 | * |
867 | * @param string $switch Switch name |
868 | * @param mixed $state Relevant state data to record |
869 | */ |
870 | public function setBehaviorSwitch( string $switch, $state ): void { |
871 | $this->behaviorSwitches[$switch] = $state; |
872 | } |
873 | |
874 | /** |
875 | * Fetch the state of a previously-recorded behavior switch. |
876 | * |
877 | * @param string $switch Switch name |
878 | * @param mixed $default Default value if the switch was never set |
879 | * @return mixed State data that was previously passed to setBehaviorSwitch(), or $default |
880 | */ |
881 | public function getBehaviorSwitch( string $switch, $default = null ) { |
882 | return $this->behaviorSwitches[$switch] ?? $default; |
883 | } |
884 | |
885 | /** |
886 | * @return array<string,DocumentFragment> |
887 | */ |
888 | public function getDOMFragmentMap(): array { |
889 | return $this->fragmentMap; |
890 | } |
891 | |
892 | /** |
893 | * @param string $id Fragment id |
894 | * @return DocumentFragment |
895 | */ |
896 | public function getDOMFragment( string $id ): DocumentFragment { |
897 | return $this->fragmentMap[$id]; |
898 | } |
899 | |
900 | /** |
901 | * @param string $id Fragment id |
902 | * @param DocumentFragment $forest DOM forest |
903 | * to store against the fragment id |
904 | */ |
905 | public function setDOMFragment( |
906 | string $id, DocumentFragment $forest |
907 | ): void { |
908 | Assert::invariant( |
909 | $forest->ownerDocument === $this->topLevelDoc, |
910 | "fragment should belong to the top level document" |
911 | ); |
912 | $this->fragmentMap[$id] = $forest; |
913 | } |
914 | |
915 | public function removeDOMFragment( string $id ): void { |
916 | $domFragment = $this->fragmentMap[$id]; |
917 | Assert::invariant( |
918 | !$domFragment->hasChildNodes(), 'Fragment should be empty.' |
919 | ); |
920 | unset( $this->fragmentMap[$id] ); |
921 | } |
922 | |
923 | public function getPFragment( string $id ): PFragment { |
924 | return $this->pFragmentMap[$id]; |
925 | } |
926 | |
927 | /** @param array<string,PFragment> $mapping */ |
928 | public function addToPFragmentMap( array $mapping ): void { |
929 | $this->pFragmentMap += $mapping; |
930 | } |
931 | |
932 | /** |
933 | * Record a lint |
934 | * @param string $type Lint type key |
935 | * @param array $lintData Data for the lint. |
936 | * - dsr: (SourceRange) |
937 | * - params: (array) |
938 | * - templateInfo: (array|null) |
939 | */ |
940 | public function recordLint( string $type, array $lintData ): void { |
941 | if ( !$this->linting( $type ) ) { |
942 | return; |
943 | } |
944 | |
945 | if ( empty( $lintData['dsr'] ) ) { |
946 | $this->log( 'error/lint', "Missing DSR; msg=", $lintData ); |
947 | return; |
948 | } |
949 | |
950 | // This will always be recorded as a native 'byte' offset |
951 | $lintData['dsr'] = $lintData['dsr']->toJsonArray(); |
952 | $lintData['params'] ??= []; |
953 | |
954 | $this->lints[] = [ 'type' => $type ] + $lintData; |
955 | } |
956 | |
957 | /** |
958 | * Retrieve recorded lints |
959 | * @return array[] |
960 | */ |
961 | public function getLints(): array { |
962 | return $this->lints; |
963 | } |
964 | |
965 | /** |
966 | * Init lints to the passed array. |
967 | * |
968 | * FIXME: This is currently needed to reset lints after converting |
969 | * DSR offsets because of ordering of DOM passes. So, in reality, |
970 | * there should be no real use case for setting this anywhere else |
971 | * but from that single callsite. |
972 | * |
973 | * @param array $lints |
974 | */ |
975 | public function setLints( array $lints ): void { |
976 | $this->lints = $lints; |
977 | } |
978 | |
979 | /** |
980 | * @param string $prefix |
981 | * @param mixed ...$args |
982 | */ |
983 | public function log( string $prefix, ...$args ): void { |
984 | $this->parsoidLogger->log( $prefix, ...$args ); |
985 | } |
986 | |
987 | /** |
988 | * Bump usage of some limited parser resource |
989 | * (ex: tokens, # transclusions, # list items, etc.) |
990 | * |
991 | * @param string $resource |
992 | * @param int $count How much of the resource is used? |
993 | * @return ?bool Returns `null` if the limit was already reached, `false` when exceeded |
994 | */ |
995 | public function bumpWt2HtmlResourceUse( string $resource, int $count = 1 ): ?bool { |
996 | $n = $this->wt2htmlUsage[$resource] ?? 0; |
997 | if ( !$this->compareWt2HtmlLimit( $resource, $n ) ) { |
998 | return null; |
999 | } |
1000 | $n += $count; |
1001 | $this->wt2htmlUsage[$resource] = $n; |
1002 | return $this->compareWt2HtmlLimit( $resource, $n ); |
1003 | } |
1004 | |
1005 | /** |
1006 | * @param string $resource |
1007 | * @param int $n |
1008 | * @return bool Return `false` when exceeded |
1009 | */ |
1010 | public function compareWt2HtmlLimit( string $resource, int $n ): bool { |
1011 | $wt2htmlLimits = $this->siteConfig->getWt2HtmlLimits(); |
1012 | return !( isset( $wt2htmlLimits[$resource] ) && $n > $wt2htmlLimits[$resource] ); |
1013 | } |
1014 | |
1015 | /** |
1016 | * Bump usage of some limited serializer resource |
1017 | * (ex: html size) |
1018 | * |
1019 | * @param string $resource |
1020 | * @param int $count How much of the resource is used? (defaults to 1) |
1021 | * @throws ResourceLimitExceededException |
1022 | */ |
1023 | public function bumpHtml2WtResourceUse( string $resource, int $count = 1 ): void { |
1024 | $n = $this->html2wtUsage[$resource] ?? 0; |
1025 | $n += $count; |
1026 | $this->html2wtUsage[$resource] = $n; |
1027 | $html2wtLimits = $this->siteConfig->getHtml2WtLimits(); |
1028 | if ( |
1029 | isset( $html2wtLimits[$resource] ) && |
1030 | $n > $html2wtLimits[$resource] |
1031 | ) { |
1032 | throw new ResourceLimitExceededException( "html2wt: $resource limit exceeded: $n" ); |
1033 | } |
1034 | } |
1035 | |
1036 | /** |
1037 | * Get an appropriate content handler, given a contentmodel. |
1038 | * |
1039 | * @param ?string &$contentmodel An optional content model which |
1040 | * will override whatever the source specifies. It gets set to the |
1041 | * handler which is used. |
1042 | * @return ContentModelHandler An appropriate content handler |
1043 | */ |
1044 | public function getContentHandler( |
1045 | ?string &$contentmodel = null |
1046 | ): ContentModelHandler { |
1047 | $contentmodel ??= $this->pageConfig->getContentModel(); |
1048 | $handler = $this->siteConfig->getContentModelHandler( $contentmodel ); |
1049 | if ( !$handler && $contentmodel !== 'wikitext' ) { |
1050 | // For now, fallback to 'wikitext' as the default handler |
1051 | // FIXME: This is bogus, but this is just so suppress noise in our |
1052 | // logs till we get around to handling all these other content models. |
1053 | // $this->log( 'warn', "Unknown contentmodel $contentmodel" ); |
1054 | } |
1055 | return $handler ?? $this->wikitextContentModelHandler; |
1056 | } |
1057 | |
1058 | /** |
1059 | * Is the language converter enabled on this page? |
1060 | * |
1061 | * @return bool |
1062 | */ |
1063 | public function langConverterEnabled(): bool { |
1064 | return $this->siteConfig->langConverterEnabledBcp47( |
1065 | $this->pageConfig->getPageLanguageBcp47() |
1066 | ); |
1067 | } |
1068 | |
1069 | /** |
1070 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1071 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1072 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1073 | * @return string A semver version number |
1074 | */ |
1075 | public function getInputContentVersion(): string { |
1076 | return $this->inputContentVersion; |
1077 | } |
1078 | |
1079 | /** |
1080 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1081 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1082 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1083 | * @return string A semver version number |
1084 | */ |
1085 | public function getOutputContentVersion(): string { |
1086 | return $this->outputContentVersion; |
1087 | } |
1088 | |
1089 | /** |
1090 | * If non-null, the language variant used for Parsoid HTML; we convert |
1091 | * to this if wt2html, or from this (if html2wt). |
1092 | * |
1093 | * @return ?Bcp47Code a BCP-47 language code |
1094 | */ |
1095 | public function getHtmlVariantLanguageBcp47(): ?Bcp47Code { |
1096 | return $this->htmlVariantLanguage; // Stored as BCP-47 |
1097 | } |
1098 | |
1099 | /** |
1100 | * If non-null, the language variant to be used for wikitext. If null, |
1101 | * heuristics will be used to identify the original wikitext variant |
1102 | * in wt2html mode, and in html2wt mode new or edited HTML will be left |
1103 | * unconverted. |
1104 | * |
1105 | * @return ?Bcp47Code a BCP-47 language code |
1106 | */ |
1107 | public function getWtVariantLanguageBcp47(): ?Bcp47Code { |
1108 | return $this->wtVariantLanguage; |
1109 | } |
1110 | |
1111 | public function getSkipLanguageConversionPass(): bool { |
1112 | return $this->skipLanguageConversionPass; |
1113 | } |
1114 | |
1115 | /** |
1116 | * Determine appropriate vary headers for the HTML form of this page. |
1117 | * @return string |
1118 | */ |
1119 | public function htmlVary(): string { |
1120 | $varies = [ 'Accept' ]; // varies on Content-Type |
1121 | if ( $this->langConverterEnabled() ) { |
1122 | $varies[] = 'Accept-Language'; |
1123 | } |
1124 | |
1125 | sort( $varies ); |
1126 | return implode( ', ', $varies ); |
1127 | } |
1128 | |
1129 | /** |
1130 | * Determine an appropriate content-language for the HTML form of this page. |
1131 | * @return Bcp47Code a BCP-47 language code. |
1132 | */ |
1133 | public function htmlContentLanguageBcp47(): Bcp47Code { |
1134 | // PageConfig::htmlVariant is set iff we do variant conversion on the |
1135 | // HTML |
1136 | return $this->pageConfig->getVariantBcp47() ?? |
1137 | $this->pageConfig->getPageLanguageBcp47(); |
1138 | } |
1139 | |
1140 | /** |
1141 | * Get an array of attributes to apply to an anchor linking to $url |
1142 | */ |
1143 | public function getExternalLinkAttribs( string $url ): array { |
1144 | $siteConfig = $this->getSiteConfig(); |
1145 | $noFollowConfig = $siteConfig->getNoFollowConfig(); |
1146 | $attribs = []; |
1147 | $ns = $this->getContextTitle()->getNamespace(); |
1148 | if ( |
1149 | $noFollowConfig['nofollow'] && |
1150 | !in_array( $ns, $noFollowConfig['nsexceptions'], true ) && |
1151 | !UrlUtils::matchesDomainList( |
1152 | $url, |
1153 | // Cast to an array because parserTests sets it as a string |
1154 | (array)$noFollowConfig['domainexceptions'] |
1155 | ) |
1156 | ) { |
1157 | $attribs['rel'] = [ 'nofollow' ]; |
1158 | } |
1159 | $target = $siteConfig->getExternalLinkTarget(); |
1160 | if ( $target ) { |
1161 | $attribs['target'] = $target; |
1162 | if ( !in_array( $target, [ '_self', '_parent', '_top' ], true ) ) { |
1163 | // T133507. New windows can navigate parent cross-origin. |
1164 | // Including noreferrer due to lacking browser |
1165 | // support of noopener. Eventually noreferrer should be removed. |
1166 | if ( !isset( $attribs['rel'] ) ) { |
1167 | $attribs['rel'] = []; |
1168 | } |
1169 | array_push( $attribs['rel'], 'noreferrer', 'noopener' ); |
1170 | } |
1171 | } |
1172 | return $attribs; |
1173 | } |
1174 | |
1175 | /** |
1176 | * @return array |
1177 | */ |
1178 | public function getLinterConfig(): array { |
1179 | return $this->linterOverrides + $this->getSiteConfig()->getLinterSiteConfig(); |
1180 | } |
1181 | |
1182 | /** |
1183 | * Whether to enable linter Backend. |
1184 | * Consults the allow list and block list from ::getLinterConfig(). |
1185 | * |
1186 | * @param null $type If $type is null or omitted, returns true if *any* linting |
1187 | * type is enabled; otherwise returns true only if the specified |
1188 | * linting type is enabled. |
1189 | * @return bool If $type is null or omitted, returns true if *any* linting |
1190 | * type is enabled; otherwise returns true only if the specified |
1191 | * linting type is enabled. |
1192 | */ |
1193 | public function linting( ?string $type = null ) { |
1194 | if ( !$this->getSiteConfig()->linterEnabled() ) { |
1195 | return false; |
1196 | } |
1197 | $lintConfig = $this->getLinterConfig(); |
1198 | // Allow list |
1199 | $allowList = $lintConfig['enabled'] ?? null; |
1200 | if ( is_array( $allowList ) ) { |
1201 | if ( $type === null ) { |
1202 | return count( $allowList ) > 0; |
1203 | } |
1204 | return in_array( $type, $allowList, true ); |
1205 | } |
1206 | // Block list |
1207 | if ( $type === null ) { |
1208 | return true; |
1209 | } |
1210 | $blockList = $lintConfig['disabled'] ?? null; |
1211 | if ( is_array( $blockList ) ) { |
1212 | return !in_array( $type, $blockList, true ); |
1213 | } |
1214 | // No specific configuration |
1215 | return true; |
1216 | } |
1217 | } |