Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
38.99% |
85 / 218 |
|
5.97% |
4 / 67 |
CRAP | |
0.00% |
0 / 1 |
Env | |
38.99% |
85 / 218 |
|
5.97% |
4 / 67 |
2756.70 | |
0.00% |
0 / 1 |
__construct | |
87.80% |
36 / 41 |
|
0.00% |
0 / 1 |
7.09 | |||
checkPlatform | |
12.50% |
2 / 16 |
|
0.00% |
0 / 1 |
9.03 | |||
profiling | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
pushNewProfile | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
popProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasTraceFlag | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasDumpFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasDumpFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
writeDump | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSiteConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPageConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDataAccess | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTOCData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nativeTemplateExpansionEnabled | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWrapSections | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPipelineFactory | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRequestOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setCurrentOffsetType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
resolveTitle | |
96.55% |
28 / 29 |
|
0.00% |
0 / 1 |
15 | |||
titleToString | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
normalizedTitleKey | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
makeTitle | |
37.50% |
3 / 8 |
|
0.00% |
0 / 1 |
7.91 | |||
makeTitleFromText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeTitleFromURLDecodedStr | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeLink | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
isValidLinkTarget | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
generateUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newObjectId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
generateAnnotationUID | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAnnotationId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newAboutId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newFragmentId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setupTopLevelDoc | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
fetchRemexPipeline | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
setVariable | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBehaviorSwitch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragmentMap | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDOMFragment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
removeDOMFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
recordLint | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
getLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setLints | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
log | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
bumpWt2HtmlResourceUse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
compareWt2HtmlLimit | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
bumpHtml2WtResourceUse | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getContentHandler | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
langConverterEnabled | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getInputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOutputContentVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtmlVariantLanguage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHtmlVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWtVariantLanguage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWtVariantLanguageBcp47 | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
htmlVary | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
htmlContentLanguage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
htmlContentLanguageBcp47 | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Config; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Bcp47Code\Bcp47Code; |
8 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
9 | use Wikimedia\Parsoid\Core\ContentModelHandler; |
10 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
11 | use Wikimedia\Parsoid\Core\Sanitizer; |
12 | use Wikimedia\Parsoid\Core\TOCData; |
13 | use Wikimedia\Parsoid\DOM\Document; |
14 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
15 | use Wikimedia\Parsoid\Logger\ParsoidLogger; |
16 | use Wikimedia\Parsoid\Parsoid; |
17 | use Wikimedia\Parsoid\Tokens\Token; |
18 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
19 | use Wikimedia\Parsoid\Utils\PHPUtils; |
20 | use Wikimedia\Parsoid\Utils\Title; |
21 | use Wikimedia\Parsoid\Utils\TitleException; |
22 | use Wikimedia\Parsoid\Utils\TitleNamespace; |
23 | use Wikimedia\Parsoid\Utils\TokenUtils; |
24 | use Wikimedia\Parsoid\Utils\Utils; |
25 | use Wikimedia\Parsoid\Wikitext\ContentModelHandler as WikitextContentModelHandler; |
26 | use Wikimedia\Parsoid\Wt2Html\Frame; |
27 | use Wikimedia\Parsoid\Wt2Html\PageConfigFrame; |
28 | use Wikimedia\Parsoid\Wt2Html\ParserPipelineFactory; |
29 | use Wikimedia\Parsoid\Wt2Html\TreeBuilder\RemexPipeline; |
30 | |
31 | /** |
32 | * Environment/Envelope class for Parsoid |
33 | * |
34 | * Carries around the SiteConfig and PageConfig during an operation |
35 | * and provides certain other services. |
36 | */ |
37 | class Env { |
38 | |
39 | /** @var SiteConfig */ |
40 | private $siteConfig; |
41 | |
42 | /** @var PageConfig */ |
43 | private $pageConfig; |
44 | |
45 | /** @var DataAccess */ |
46 | private $dataAccess; |
47 | |
48 | /** @var ContentMetadataCollector */ |
49 | private $metadata; |
50 | |
51 | /** @var TOCData Table of Contents metadata for the article */ |
52 | private $tocData; |
53 | |
54 | /** |
55 | * The top-level frame for this conversion. This largely wraps the |
56 | * PageConfig. |
57 | * |
58 | * In the future we may replace PageConfig with the Frame, and add |
59 | * a |
60 | * @var Frame |
61 | */ |
62 | public $topFrame; |
63 | // XXX In the future, perhaps replace PageConfig with the Frame, and |
64 | // add $this->currentFrame (relocated from TokenTransformManager) if/when |
65 | // we've removed async parsing. |
66 | |
67 | /** |
68 | * @var bool Are we using native template expansion? |
69 | * |
70 | * Parsoid implements native template expansion, which is currently |
71 | * only used during parser tests; in production, template expansion |
72 | * is done via MediaWiki's legacy preprocessor. |
73 | * |
74 | * FIXME: Hopefully this distinction can be removed when we're entirely |
75 | * in PHP land. |
76 | */ |
77 | private $nativeTemplateExpansion; |
78 | |
79 | /** @phan-var array<string,int> */ |
80 | private $wt2htmlUsage = []; |
81 | |
82 | /** @phan-var array<string,int> */ |
83 | private $html2wtUsage = []; |
84 | |
85 | /** @var bool */ |
86 | private $profiling = false; |
87 | |
88 | /** @var array<Profile> */ |
89 | private $profileStack = []; |
90 | |
91 | /** @var bool */ |
92 | private $wrapSections = true; |
93 | |
94 | /** @var string */ |
95 | private $requestOffsetType = 'byte'; |
96 | |
97 | /** @var string */ |
98 | private $currentOffsetType = 'byte'; |
99 | |
100 | /** @var array<string,mixed> */ |
101 | private $behaviorSwitches = []; |
102 | |
103 | /** |
104 | * Maps fragment id to the fragment forest (array of Nodes). |
105 | * @var array<string,DocumentFragment> |
106 | */ |
107 | private $fragmentMap = []; |
108 | |
109 | /** |
110 | * @var int used to generate fragment ids as needed during parse |
111 | */ |
112 | private $fid = 1; |
113 | |
114 | /** @var int used to generate uids as needed during this parse */ |
115 | private $uid = 1; |
116 | |
117 | /** @var int used to generate annotation uids as needed during this parse */ |
118 | private $annUid = 0; |
119 | |
120 | /** @var array[] Lints recorded */ |
121 | private $lints = []; |
122 | |
123 | /** @var bool logLinterData */ |
124 | public $logLinterData = false; |
125 | |
126 | /** @var bool[] */ |
127 | private $traceFlags; |
128 | |
129 | /** @var bool[] */ |
130 | private $dumpFlags; |
131 | |
132 | /** @var bool[] */ |
133 | private $debugFlags; |
134 | |
135 | /** @var ParsoidLogger */ |
136 | private $parsoidLogger; |
137 | |
138 | /** |
139 | * The default content version that Parsoid assumes it's serializing or |
140 | * updating in the pb2pb endpoints |
141 | * |
142 | * @var string |
143 | */ |
144 | private $inputContentVersion; |
145 | |
146 | /** |
147 | * The default content version that Parsoid will generate. |
148 | * |
149 | * @var string |
150 | */ |
151 | private $outputContentVersion; |
152 | |
153 | /** |
154 | * If non-null, the language variant used for Parsoid HTML; |
155 | * we convert to this if wt2html, or from this if html2wt. |
156 | * @var ?Bcp47Code |
157 | */ |
158 | private $htmlVariantLanguage; |
159 | |
160 | /** |
161 | * If non-null, the language variant to be used for wikitext. |
162 | * If null, heuristics will be used to identify the original wikitext variant |
163 | * in wt2html mode, and in html2wt mode new or edited HTML will be left unconverted. |
164 | * @var ?Bcp47Code |
165 | */ |
166 | private $wtVariantLanguage; |
167 | |
168 | /** @var ParserPipelineFactory */ |
169 | private $pipelineFactory; |
170 | |
171 | /** |
172 | * FIXME Used in DedupeStyles::dedupe() |
173 | * @var array |
174 | */ |
175 | public $styleTagKeys = []; |
176 | |
177 | /** @var bool */ |
178 | public $pageBundle = false; |
179 | |
180 | /** @var bool */ |
181 | public $discardDataParsoid = false; |
182 | |
183 | /** @var Document */ |
184 | private $domDiff; |
185 | |
186 | /** @var bool */ |
187 | public $hasAnnotations; |
188 | |
189 | /** |
190 | * PORT-FIXME: public currently |
191 | * Cache of wikitext source for a title |
192 | * @var array |
193 | */ |
194 | public $pageCache = []; |
195 | |
196 | /** |
197 | * PORT-FIXME: public currently |
198 | * HTML Cache of expanded transclusions to support |
199 | * reusing expansions from HTML of previous revision. |
200 | * @var array |
201 | */ |
202 | public $transclusionCache = []; |
203 | |
204 | /** |
205 | * PORT-FIXME: public currently |
206 | * HTML Cache of expanded media wikiext to support |
207 | * reusing expansions from HTML of previous revision. |
208 | * @var array |
209 | */ |
210 | public $mediaCache = []; |
211 | |
212 | /** |
213 | * PORT-FIXME: public currently |
214 | * HTML Cache of expanded extension tags to support |
215 | * reusing expansions from HTML of previous revision. |
216 | * @var array |
217 | */ |
218 | public $extensionCache = []; |
219 | |
220 | /** |
221 | * The current top-level document. During wt2html, this will be the document |
222 | * associated with the RemexPipeline. During html2wt, this will be the |
223 | * input document, typically passed as a constructor option. |
224 | * |
225 | * @var Document |
226 | */ |
227 | public $topLevelDoc; |
228 | |
229 | /** |
230 | * The RemexPipeline used during a wt2html operation. |
231 | * |
232 | * @var RemexPipeline|null |
233 | */ |
234 | private $remexPipeline; |
235 | |
236 | /** |
237 | * @var WikitextContentModelHandler |
238 | */ |
239 | private $wikitextContentModelHandler; |
240 | |
241 | /** |
242 | * @param SiteConfig $siteConfig |
243 | * @param PageConfig $pageConfig |
244 | * @param DataAccess $dataAccess |
245 | * @param ContentMetadataCollector $metadata |
246 | * @param ?array $options |
247 | * - wrapSections: (bool) Whether `<section>` wrappers should be added. |
248 | * - pageBundle: (bool) Sets ids on nodes and stores data-* attributes in a JSON blob. |
249 | * - traceFlags: (array) Flags indicating which components need to be traced |
250 | * - dumpFlags: (bool[]) Dump flags |
251 | * - debugFlags: (bool[]) Debug flags |
252 | * - nativeTemplateExpansion: boolean |
253 | * - discardDataParsoid: boolean |
254 | * - offsetType: 'byte' (default), 'ucs2', 'char' |
255 | * See `Parsoid\Wt2Html\PP\Processors\ConvertOffsets`. |
256 | * - logLinterData: (bool) Should we log linter data if linting is enabled? |
257 | * - htmlVariantLanguage: string|Bcp47Code|null |
258 | * If non-null, the language variant used for Parsoid HTML |
259 | * as a MediaWiki-internal language code string or BCP 47 object; |
260 | * we convert to this if wt2html, or from this if html2wt. |
261 | * - wtVariantLanguage: string|Bcp47Code|null |
262 | * If non-null, the language variant to be used for wikitext |
263 | * as a MediaWiki-internal language code string or BCP 47 object. |
264 | * If null, heuristics will be used to identify the original |
265 | * wikitext variant in wt2html mode, and in html2wt mode new |
266 | * or edited HTML will be left unconverted. |
267 | * - logLevels: (string[]) Levels to log |
268 | * - topLevelDoc: (Document) Set explicitly when serializing otherwise |
269 | * it gets initialized for parsing. |
270 | */ |
271 | public function __construct( |
272 | SiteConfig $siteConfig, |
273 | PageConfig $pageConfig, |
274 | DataAccess $dataAccess, |
275 | ContentMetadataCollector $metadata, |
276 | ?array $options = null |
277 | ) { |
278 | self::checkPlatform(); |
279 | $options ??= []; |
280 | $this->siteConfig = $siteConfig; |
281 | $this->pageConfig = $pageConfig; |
282 | $this->dataAccess = $dataAccess; |
283 | $this->metadata = $metadata; |
284 | $this->tocData = new TOCData(); |
285 | $this->topFrame = new PageConfigFrame( $this, $pageConfig, $siteConfig ); |
286 | if ( isset( $options['wrapSections'] ) ) { |
287 | $this->wrapSections = !empty( $options['wrapSections'] ); |
288 | } |
289 | if ( isset( $options['pageBundle'] ) ) { |
290 | $this->pageBundle = !empty( $options['pageBundle'] ); |
291 | } |
292 | $this->pipelineFactory = new ParserPipelineFactory( $this ); |
293 | $defaultContentVersion = Parsoid::defaultHTMLVersion(); |
294 | $this->inputContentVersion = $options['inputContentVersion'] ?? $defaultContentVersion; |
295 | // FIXME: We should have a check for the supported input content versions as well. |
296 | // That will require a separate constant. |
297 | $this->outputContentVersion = $options['outputContentVersion'] ?? $defaultContentVersion; |
298 | if ( !in_array( $this->outputContentVersion, Parsoid::AVAILABLE_VERSIONS, true ) ) { |
299 | throw new \UnexpectedValueException( |
300 | $this->outputContentVersion . ' is not an available content version.' ); |
301 | } |
302 | $this->htmlVariantLanguage = !empty( $options['htmlVariantLanguage'] ) ? |
303 | Utils::mwCodeToBcp47( $options['htmlVariantLanguage'] ) : null; |
304 | $this->wtVariantLanguage = !empty( $options['wtVariantLanguage'] ) ? |
305 | Utils::mwCodeToBcp47( $options['wtVariantLanguage'] ) : null; |
306 | $this->nativeTemplateExpansion = !empty( $options['nativeTemplateExpansion'] ); |
307 | $this->discardDataParsoid = !empty( $options['discardDataParsoid'] ); |
308 | $this->requestOffsetType = $options['offsetType'] ?? 'byte'; |
309 | $this->logLinterData = !empty( $options['logLinterData'] ); |
310 | $this->traceFlags = $options['traceFlags'] ?? []; |
311 | $this->dumpFlags = $options['dumpFlags'] ?? []; |
312 | $this->debugFlags = $options['debugFlags'] ?? []; |
313 | $this->parsoidLogger = new ParsoidLogger( $this->siteConfig->getLogger(), [ |
314 | 'logLevels' => $options['logLevels'] ?? [ 'fatal', 'error', 'warn', 'info' ], |
315 | 'debugFlags' => $this->debugFlags, |
316 | 'dumpFlags' => $this->dumpFlags, |
317 | 'traceFlags' => $this->traceFlags |
318 | ] ); |
319 | if ( $this->hasTraceFlag( 'time' ) ) { |
320 | $this->profiling = true; |
321 | } |
322 | $this->setupTopLevelDoc( $options['topLevelDoc'] ?? null ); |
323 | // NOTE: |
324 | // Don't try to do this in setupTopLevelDoc since it is called on existing Env objects |
325 | // in a couple of places. That then leads to a multiple-write to tocdata property on |
326 | // the metadata object. |
327 | // |
328 | // setupTopLevelDoc is called outside Env in these couple cases: |
329 | // 1. html2wt in ContentModelHandler for dealing with |
330 | // missing original HTML. |
331 | // 2. ParserTestRunner's html2html tests |
332 | // |
333 | // That is done to either reuse an existing Env object (as in 1.) |
334 | // OR to refresh the attached DOC (html2html as in 2.). |
335 | // Constructing a new Env in both cases could eliminate this issue. |
336 | $this->metadata->setTOCData( $this->tocData ); |
337 | |
338 | $this->wikitextContentModelHandler = new WikitextContentModelHandler( $this ); |
339 | } |
340 | |
341 | /** |
342 | * Check to see if the PHP platform is sensible |
343 | */ |
344 | private static function checkPlatform() { |
345 | static $checked; |
346 | if ( !$checked ) { |
347 | $highBytes = |
348 | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" . |
349 | "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" . |
350 | "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" . |
351 | "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" . |
352 | "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" . |
353 | "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" . |
354 | "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" . |
355 | "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
356 | if ( strtolower( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' . $highBytes ) |
357 | !== 'abcdefghijklmnopqrstuvwxyz' . $highBytes |
358 | ) { |
359 | throw new \RuntimeException( 'strtolower() doesn\'t work -- ' . |
360 | 'please set the locale to C or a UTF-8 variant such as C.UTF-8' ); |
361 | } |
362 | $checked = true; |
363 | } |
364 | } |
365 | |
366 | /** |
367 | * Is profiling enabled? |
368 | * @return bool |
369 | */ |
370 | public function profiling(): bool { |
371 | return $this->profiling; |
372 | } |
373 | |
374 | /** |
375 | * Get the profile at the top of the stack |
376 | * |
377 | * FIXME: This implicitly assumes sequential in-order processing |
378 | * This wouldn't have worked in Parsoid/JS and may not work in the future |
379 | * depending on how / if we restructure the pipeline for concurrency, etc. |
380 | * |
381 | * @return Profile |
382 | */ |
383 | public function getCurrentProfile(): Profile { |
384 | return PHPUtils::lastItem( $this->profileStack ); |
385 | } |
386 | |
387 | /** |
388 | * New pipeline started. Push profile. |
389 | * @return Profile |
390 | */ |
391 | public function pushNewProfile(): Profile { |
392 | $currProfile = count( $this->profileStack ) > 0 ? $this->getCurrentProfile() : null; |
393 | $profile = new Profile(); |
394 | $this->profileStack[] = $profile; |
395 | if ( $currProfile !== null ) { |
396 | $currProfile->pushNestedProfile( $profile ); |
397 | } |
398 | return $profile; |
399 | } |
400 | |
401 | /** |
402 | * Pipeline ended. Pop profile. |
403 | * @return Profile |
404 | */ |
405 | public function popProfile(): Profile { |
406 | return array_pop( $this->profileStack ); |
407 | } |
408 | |
409 | /** |
410 | * @return bool |
411 | */ |
412 | public function hasTraceFlags(): bool { |
413 | return !empty( $this->traceFlags ); |
414 | } |
415 | |
416 | /** |
417 | * Test which trace information to log |
418 | * |
419 | * @param string $flag Flag name. |
420 | * @return bool |
421 | */ |
422 | public function hasTraceFlag( string $flag ): bool { |
423 | return isset( $this->traceFlags[$flag] ); |
424 | } |
425 | |
426 | /** |
427 | * @return bool |
428 | */ |
429 | public function hasDumpFlags(): bool { |
430 | return !empty( $this->dumpFlags ); |
431 | } |
432 | |
433 | /** |
434 | * Test which state to dump |
435 | * |
436 | * @param string $flag Flag name. |
437 | * @return bool |
438 | */ |
439 | public function hasDumpFlag( string $flag ): bool { |
440 | return isset( $this->dumpFlags[$flag] ); |
441 | } |
442 | |
443 | /** |
444 | * Write out a string (because it was requested by dumpFlags) |
445 | * @param string $str |
446 | */ |
447 | public function writeDump( string $str ) { |
448 | $this->log( 'dump', $str ); |
449 | } |
450 | |
451 | /** |
452 | * Get the site config |
453 | * @return SiteConfig |
454 | */ |
455 | public function getSiteConfig(): SiteConfig { |
456 | return $this->siteConfig; |
457 | } |
458 | |
459 | /** |
460 | * Get the page config |
461 | * @return PageConfig |
462 | */ |
463 | public function getPageConfig(): PageConfig { |
464 | return $this->pageConfig; |
465 | } |
466 | |
467 | /** |
468 | * Get the data access object |
469 | * @return DataAccess |
470 | */ |
471 | public function getDataAccess(): DataAccess { |
472 | return $this->dataAccess; |
473 | } |
474 | |
475 | /** |
476 | * Return the ContentMetadataCollector. |
477 | * @return ContentMetadataCollector |
478 | */ |
479 | public function getMetadata(): ContentMetadataCollector { |
480 | return $this->metadata; |
481 | } |
482 | |
483 | /** |
484 | * Return the Table of Contents information for the article. |
485 | * @return TOCData |
486 | */ |
487 | public function getTOCData(): TOCData { |
488 | return $this->tocData; |
489 | } |
490 | |
491 | public function nativeTemplateExpansionEnabled(): bool { |
492 | return $this->nativeTemplateExpansion; |
493 | } |
494 | |
495 | /** |
496 | * Get the current uid counter value |
497 | * @return int |
498 | */ |
499 | public function getUID(): int { |
500 | return $this->uid; |
501 | } |
502 | |
503 | /** |
504 | * Get the current fragment id counter value |
505 | * @return int |
506 | */ |
507 | public function getFID(): int { |
508 | return $this->fid; |
509 | } |
510 | |
511 | /** |
512 | * Whether `<section>` wrappers should be added. |
513 | * @todo Does this actually belong here? Should it be a behavior switch? |
514 | * @return bool |
515 | */ |
516 | public function getWrapSections(): bool { |
517 | return $this->wrapSections; |
518 | } |
519 | |
520 | /** |
521 | * Get the pipeline factory. |
522 | * @return ParserPipelineFactory |
523 | */ |
524 | public function getPipelineFactory(): ParserPipelineFactory { |
525 | return $this->pipelineFactory; |
526 | } |
527 | |
528 | /** |
529 | * Return the external format of character offsets in source ranges. |
530 | * Internally we always keep DomSourceRange and SourceRange information |
531 | * as UTF-8 byte offsets for efficiency (matches the native string |
532 | * representation), but for external use we can convert these to |
533 | * other formats when we output wt2html or input for html2wt. |
534 | * |
535 | * @see Parsoid\Wt2Html\PP\Processors\ConvertOffsets |
536 | * @return string 'byte', 'ucs2', or 'char' |
537 | */ |
538 | public function getRequestOffsetType(): string { |
539 | return $this->requestOffsetType; |
540 | } |
541 | |
542 | /** |
543 | * Return the current format of character offsets in source ranges. |
544 | * This allows us to track whether the internal byte offsets have |
545 | * been converted to the external format (as returned by |
546 | * `getRequestOffsetType`) yet. |
547 | * |
548 | * @see Parsoid\Wt2Html\PP\Processors\ConvertOffsets |
549 | * @return string 'byte', 'ucs2', or 'char' |
550 | */ |
551 | public function getCurrentOffsetType(): string { |
552 | return $this->currentOffsetType; |
553 | } |
554 | |
555 | /** |
556 | * Update the current offset type. Only |
557 | * Parsoid\Wt2Html\PP\Processors\ConvertOffsets should be doing this. |
558 | * @param string $offsetType 'byte', 'ucs2', or 'char' |
559 | */ |
560 | public function setCurrentOffsetType( string $offsetType ) { |
561 | $this->currentOffsetType = $offsetType; |
562 | } |
563 | |
564 | /** |
565 | * Resolve strings that are page-fragments or subpage references with |
566 | * respect to the current page name. |
567 | * |
568 | * @param string $str Page fragment or subpage reference. Not URL encoded. |
569 | * @param bool $resolveOnly If true, only trim and add the current title to |
570 | * lone fragments. TODO: This parameter seems poorly named. |
571 | * @return string Resolved title |
572 | */ |
573 | public function resolveTitle( string $str, bool $resolveOnly = false ): string { |
574 | $origName = $str; |
575 | $str = trim( $str ); |
576 | |
577 | $pageConfig = $this->getPageConfig(); |
578 | |
579 | // Resolve lonely fragments (important if the current page is a subpage, |
580 | // otherwise the relative link will be wrong) |
581 | if ( $str !== '' && $str[0] === '#' ) { |
582 | return $pageConfig->getTitle() . $str; |
583 | } |
584 | |
585 | // Default return value |
586 | $titleKey = $str; |
587 | if ( $this->getSiteConfig()->namespaceHasSubpages( $pageConfig->getNs() ) ) { |
588 | // Resolve subpages |
589 | $reNormalize = false; |
590 | if ( preg_match( '!^(?:\.\./)+!', $str, $relUp ) ) { |
591 | $levels = strlen( $relUp[0] ) / 3; // Levels are indicated by '../'. |
592 | $titleBits = explode( '/', $pageConfig->getTitle() ); |
593 | if ( $titleBits[0] === '' ) { |
594 | // FIXME: Punt on subpages of titles starting with "/" for now |
595 | return $origName; |
596 | } |
597 | if ( count( $titleBits ) <= $levels ) { |
598 | // Too many levels -- invalid relative link |
599 | return $origName; |
600 | } |
601 | $newBits = array_slice( $titleBits, 0, -$levels ); |
602 | if ( $str !== $relUp[0] ) { |
603 | $newBits[] = substr( $str, $levels * 3 ); |
604 | } |
605 | $str = implode( '/', $newBits ); |
606 | $reNormalize = true; |
607 | } elseif ( $str !== '' && $str[0] === '/' ) { |
608 | // Resolve absolute subpage links |
609 | $str = $pageConfig->getTitle() . $str; |
610 | $reNormalize = true; |
611 | } |
612 | |
613 | if ( $reNormalize && !$resolveOnly ) { |
614 | // Remove final slashes if present. |
615 | // See https://gerrit.wikimedia.org/r/173431 |
616 | $str = rtrim( $str, '/' ); |
617 | $titleKey = (string)$this->normalizedTitleKey( $str ); |
618 | } |
619 | } |
620 | |
621 | // Strip leading ':' |
622 | if ( $titleKey !== '' && $titleKey[0] === ':' && !$resolveOnly ) { |
623 | $titleKey = substr( $titleKey, 1 ); |
624 | } |
625 | return $titleKey; |
626 | } |
627 | |
628 | /** |
629 | * Convert a Title to a string |
630 | * @param Title $title |
631 | * @param bool $ignoreFragment |
632 | * @return string |
633 | */ |
634 | private function titleToString( Title $title, bool $ignoreFragment = false ): string { |
635 | $ret = $title->getPrefixedDBKey(); |
636 | if ( !$ignoreFragment ) { |
637 | $fragment = $title->getFragment() ?? ''; |
638 | if ( $fragment !== '' ) { |
639 | $ret .= '#' . $fragment; |
640 | } |
641 | } |
642 | return $ret; |
643 | } |
644 | |
645 | /** |
646 | * Get normalized title key for a title string. |
647 | * |
648 | * @param string $str Should be in url-decoded format. |
649 | * @param bool $noExceptions Return null instead of throwing exceptions. |
650 | * @param bool $ignoreFragment Ignore the fragment, if any. |
651 | * @return string|null Normalized title key for a title string (or null for invalid titles). |
652 | */ |
653 | public function normalizedTitleKey( |
654 | string $str, bool $noExceptions = false, bool $ignoreFragment = false |
655 | ): ?string { |
656 | $title = $this->makeTitleFromURLDecodedStr( $str, 0, $noExceptions ); |
657 | if ( !$title ) { |
658 | return null; |
659 | } |
660 | return $this->titleToString( $title, $ignoreFragment ); |
661 | } |
662 | |
663 | /** |
664 | * Create a Title object |
665 | * @param string $text URL-decoded text |
666 | * @param int|TitleNamespace $defaultNs |
667 | * @param bool $noExceptions |
668 | * @return Title|null |
669 | */ |
670 | private function makeTitle( string $text, $defaultNs = 0, bool $noExceptions = false ): ?Title { |
671 | try { |
672 | if ( preg_match( '!^(?:[#/]|\.\./)!', $text ) ) { |
673 | $defaultNs = $this->getPageConfig()->getNs(); |
674 | } |
675 | $text = $this->resolveTitle( $text ); |
676 | return Title::newFromText( $text, $this->getSiteConfig(), $defaultNs ); |
677 | } catch ( TitleException $e ) { |
678 | if ( $noExceptions ) { |
679 | return null; |
680 | } |
681 | throw $e; |
682 | } |
683 | } |
684 | |
685 | /** |
686 | * Create a Title object |
687 | * @see Title::newFromURL in MediaWiki |
688 | * @param string $str URL-encoded text |
689 | * @param int|TitleNamespace $defaultNs |
690 | * @param bool $noExceptions |
691 | * @return Title|null |
692 | */ |
693 | public function makeTitleFromText( |
694 | string $str, $defaultNs = 0, bool $noExceptions = false |
695 | ): ?Title { |
696 | return $this->makeTitle( Utils::decodeURIComponent( $str ), $defaultNs, $noExceptions ); |
697 | } |
698 | |
699 | /** |
700 | * Create a Title object |
701 | * @see Title::newFromText in MediaWiki |
702 | * @param string $str URL-decoded text |
703 | * @param int|TitleNamespace $defaultNs |
704 | * @param bool $noExceptions |
705 | * @return Title|null |
706 | */ |
707 | public function makeTitleFromURLDecodedStr( |
708 | string $str, $defaultNs = 0, bool $noExceptions = false |
709 | ): ?Title { |
710 | return $this->makeTitle( $str, $defaultNs, $noExceptions ); |
711 | } |
712 | |
713 | /** |
714 | * Make a link to a Title |
715 | * @param Title $title |
716 | * @return string |
717 | */ |
718 | public function makeLink( Title $title ): string { |
719 | return $this->getSiteConfig()->relativeLinkPrefix() . Sanitizer::sanitizeTitleURI( |
720 | $this->titleToString( $title ), |
721 | false |
722 | ); |
723 | } |
724 | |
725 | /** |
726 | * Test if an href attribute value could be a valid link target |
727 | * @param string|(Token|string)[] $href |
728 | * @return bool |
729 | */ |
730 | public function isValidLinkTarget( $href ): bool { |
731 | $href = TokenUtils::tokensToString( $href ); |
732 | |
733 | // decode percent-encoding so that we can reliably detect |
734 | // bad page title characters |
735 | $hrefToken = Utils::decodeURIComponent( $href ); |
736 | return $this->normalizedTitleKey( $this->resolveTitle( $hrefToken, true ), true ) !== null; |
737 | } |
738 | |
739 | /** |
740 | * Generate a new uid |
741 | * @return int |
742 | */ |
743 | public function generateUID(): int { |
744 | return $this->uid++; |
745 | } |
746 | |
747 | /** |
748 | * Generate a new object id |
749 | * @return string |
750 | */ |
751 | public function newObjectId(): string { |
752 | return "mwt" . $this->generateUID(); |
753 | } |
754 | |
755 | /** |
756 | * Generate a new annotation uid |
757 | * @return int |
758 | */ |
759 | public function generateAnnotationUID(): int { |
760 | return $this->annUid++; |
761 | } |
762 | |
763 | /** |
764 | * Generate a new annotation id |
765 | * @return string |
766 | */ |
767 | public function newAnnotationId(): string { |
768 | return "mwa" . $this->generateAnnotationUID(); |
769 | } |
770 | |
771 | /** |
772 | * Generate a new about id |
773 | * @return string |
774 | */ |
775 | public function newAboutId(): string { |
776 | return "#" . $this->newObjectId(); |
777 | } |
778 | |
779 | /** |
780 | * Store reference to DOM diff document |
781 | * @param Document $doc |
782 | */ |
783 | public function setDOMDiff( $doc ): void { |
784 | $this->domDiff = $doc; |
785 | } |
786 | |
787 | /** |
788 | * Return reference to DOM diff document |
789 | * @return Document|null |
790 | */ |
791 | public function getDOMDiff(): ?Document { |
792 | return $this->domDiff; |
793 | } |
794 | |
795 | /** |
796 | * Generate a new fragment id |
797 | * @return string |
798 | */ |
799 | public function newFragmentId(): string { |
800 | return "mwf" . (string)$this->fid++; |
801 | } |
802 | |
803 | /** |
804 | * When an environment is constructed, we initialize a document (and |
805 | * RemexPipeline) to be used throughout the parse. |
806 | * |
807 | * @param ?Document $topLevelDoc |
808 | */ |
809 | public function setupTopLevelDoc( ?Document $topLevelDoc = null ) { |
810 | if ( $topLevelDoc ) { |
811 | $this->topLevelDoc = $topLevelDoc; |
812 | } else { |
813 | $this->remexPipeline = new RemexPipeline( $this ); |
814 | $this->topLevelDoc = $this->remexPipeline->doc; |
815 | } |
816 | DOMDataUtils::prepareDoc( $this->topLevelDoc ); |
817 | } |
818 | |
819 | /** |
820 | * @param bool $atTopLevel |
821 | * @return RemexPipeline |
822 | */ |
823 | public function fetchRemexPipeline( bool $atTopLevel ): RemexPipeline { |
824 | if ( $atTopLevel ) { |
825 | return $this->remexPipeline; |
826 | } else { |
827 | $pipeline = new RemexPipeline( $this ); |
828 | // Attach the top-level bag to the document, for the convenience |
829 | // of code that modifies the data within the RemexHtml TreeBuilder |
830 | // pipeline, prior to the migration of nodes to the top-level |
831 | // document. |
832 | DOMDataUtils::prepareChildDoc( $this->topLevelDoc, $pipeline->doc ); |
833 | return $pipeline; |
834 | } |
835 | } |
836 | |
837 | /** |
838 | * BehaviorSwitchHandler support function that adds a property named by |
839 | * $variable and sets it to $state |
840 | * |
841 | * @deprecated Use setBehaviorSwitch() instead. |
842 | * @param string $variable |
843 | * @param mixed $state |
844 | */ |
845 | public function setVariable( string $variable, $state ): void { |
846 | $this->setBehaviorSwitch( $variable, $state ); |
847 | } |
848 | |
849 | /** |
850 | * Record a behavior switch. |
851 | * |
852 | * @todo Does this belong here, or on some equivalent to MediaWiki's ParserOutput? |
853 | * @param string $switch Switch name |
854 | * @param mixed $state Relevant state data to record |
855 | */ |
856 | public function setBehaviorSwitch( string $switch, $state ): void { |
857 | $this->behaviorSwitches[$switch] = $state; |
858 | } |
859 | |
860 | /** |
861 | * Fetch the state of a previously-recorded behavior switch. |
862 | * |
863 | * @todo Does this belong here, or on some equivalent to MediaWiki's ParserOutput? |
864 | * @param string $switch Switch name |
865 | * @param mixed $default Default value if the switch was never set |
866 | * @return mixed State data that was previously passed to setBehaviorSwitch(), or $default |
867 | */ |
868 | public function getBehaviorSwitch( string $switch, $default = null ) { |
869 | return $this->behaviorSwitches[$switch] ?? $default; |
870 | } |
871 | |
872 | /** |
873 | * @return array<string,DocumentFragment> |
874 | */ |
875 | public function getDOMFragmentMap(): array { |
876 | return $this->fragmentMap; |
877 | } |
878 | |
879 | /** |
880 | * @param string $id Fragment id |
881 | * @return DocumentFragment |
882 | */ |
883 | public function getDOMFragment( string $id ): DocumentFragment { |
884 | return $this->fragmentMap[$id]; |
885 | } |
886 | |
887 | /** |
888 | * @param string $id Fragment id |
889 | * @param DocumentFragment $forest DOM forest |
890 | * to store against the fragment id |
891 | */ |
892 | public function setDOMFragment( |
893 | string $id, DocumentFragment $forest |
894 | ): void { |
895 | $this->fragmentMap[$id] = $forest; |
896 | } |
897 | |
898 | /** |
899 | * @param string $id |
900 | */ |
901 | public function removeDOMFragment( string $id ): void { |
902 | $domFragment = $this->fragmentMap[$id]; |
903 | Assert::invariant( |
904 | !$domFragment->hasChildNodes(), 'Fragment should be empty.' |
905 | ); |
906 | unset( $this->fragmentMap[$id] ); |
907 | } |
908 | |
909 | /** |
910 | * Record a lint |
911 | * @param string $type Lint type key |
912 | * @param array $lintData Data for the lint. |
913 | * - dsr: (SourceRange) |
914 | * - params: (array) |
915 | * - templateInfo: (array|null) |
916 | */ |
917 | public function recordLint( string $type, array $lintData ): void { |
918 | // Parsoid-JS tests don't like getting null properties where JS had undefined. |
919 | $lintData = array_filter( $lintData, static function ( $v ) { |
920 | return $v !== null; |
921 | } ); |
922 | |
923 | if ( empty( $lintData['dsr'] ) ) { |
924 | $this->log( 'error/lint', "Missing DSR; msg=", $lintData ); |
925 | return; |
926 | } |
927 | |
928 | // This will always be recorded as a native 'byte' offset |
929 | $lintData['dsr'] = $lintData['dsr']->jsonSerialize(); |
930 | |
931 | // Ensure a "params" array |
932 | if ( !isset( $lintData['params'] ) ) { |
933 | $lintData['params'] = []; |
934 | } |
935 | |
936 | $this->lints[] = [ 'type' => $type ] + $lintData; |
937 | } |
938 | |
939 | /** |
940 | * Retrieve recorded lints |
941 | * @return array[] |
942 | */ |
943 | public function getLints(): array { |
944 | return $this->lints; |
945 | } |
946 | |
947 | /** |
948 | * Init lints to the passed array. |
949 | * |
950 | * FIXME: This is currently needed to reset lints after converting |
951 | * DSR offsets because of ordering of DOM passes. So, in reality, |
952 | * there should be no real use case for setting this anywhere else |
953 | * but from that single callsite. |
954 | * |
955 | * @param array $lints |
956 | */ |
957 | public function setLints( array $lints ): void { |
958 | $this->lints = $lints; |
959 | } |
960 | |
961 | /** |
962 | * @param string $prefix |
963 | * @param mixed ...$args |
964 | */ |
965 | public function log( string $prefix, ...$args ): void { |
966 | $this->parsoidLogger->log( $prefix, ...$args ); |
967 | } |
968 | |
969 | /** |
970 | * Bump usage of some limited parser resource |
971 | * (ex: tokens, # transclusions, # list items, etc.) |
972 | * |
973 | * @param string $resource |
974 | * @param int $count How much of the resource is used? |
975 | * @return ?bool Returns `null` if the limit was already reached, `false` when exceeded |
976 | */ |
977 | public function bumpWt2HtmlResourceUse( string $resource, int $count = 1 ): ?bool { |
978 | $n = $this->wt2htmlUsage[$resource] ?? 0; |
979 | if ( !$this->compareWt2HtmlLimit( $resource, $n ) ) { |
980 | return null; |
981 | } |
982 | $n += $count; |
983 | $this->wt2htmlUsage[$resource] = $n; |
984 | return $this->compareWt2HtmlLimit( $resource, $n ); |
985 | } |
986 | |
987 | /** |
988 | * @param string $resource |
989 | * @param int $n |
990 | * @return bool Return `false` when exceeded |
991 | */ |
992 | public function compareWt2HtmlLimit( string $resource, int $n ): bool { |
993 | $wt2htmlLimits = $this->siteConfig->getWt2HtmlLimits(); |
994 | return !( isset( $wt2htmlLimits[$resource] ) && $n > $wt2htmlLimits[$resource] ); |
995 | } |
996 | |
997 | /** |
998 | * Bump usage of some limited serializer resource |
999 | * (ex: html size) |
1000 | * |
1001 | * @param string $resource |
1002 | * @param int $count How much of the resource is used? (defaults to 1) |
1003 | * @throws ResourceLimitExceededException |
1004 | */ |
1005 | public function bumpHtml2WtResourceUse( string $resource, int $count = 1 ): void { |
1006 | $n = $this->html2wtUsage[$resource] ?? 0; |
1007 | $n += $count; |
1008 | $this->html2wtUsage[$resource] = $n; |
1009 | $html2wtLimits = $this->siteConfig->getHtml2WtLimits(); |
1010 | if ( |
1011 | isset( $html2wtLimits[$resource] ) && |
1012 | $n > $html2wtLimits[$resource] |
1013 | ) { |
1014 | throw new ResourceLimitExceededException( "html2wt: $resource limit exceeded: $n" ); |
1015 | } |
1016 | } |
1017 | |
1018 | /** |
1019 | * Get an appropriate content handler, given a contentmodel. |
1020 | * |
1021 | * @param ?string &$contentmodel An optional content model which |
1022 | * will override whatever the source specifies. It gets set to the |
1023 | * handler which is used. |
1024 | * @return ContentModelHandler An appropriate content handler |
1025 | */ |
1026 | public function getContentHandler( |
1027 | ?string &$contentmodel = null |
1028 | ): ContentModelHandler { |
1029 | $contentmodel ??= $this->pageConfig->getContentModel(); |
1030 | $handler = $this->siteConfig->getContentModelHandler( $contentmodel ); |
1031 | if ( !$handler && $contentmodel !== 'wikitext' ) { |
1032 | // For now, fallback to 'wikitext' as the default handler |
1033 | // FIXME: This is bogus, but this is just so suppress noise in our |
1034 | // logs till we get around to handling all these other content models. |
1035 | // $this->log( 'warn', "Unknown contentmodel $contentmodel" ); |
1036 | } |
1037 | return $handler ?? $this->wikitextContentModelHandler; |
1038 | } |
1039 | |
1040 | /** |
1041 | * Is the language converter enabled on this page? |
1042 | * |
1043 | * @return bool |
1044 | */ |
1045 | public function langConverterEnabled(): bool { |
1046 | return $this->siteConfig->langConverterEnabledBcp47( |
1047 | $this->pageConfig->getPageLanguageBcp47() |
1048 | ); |
1049 | } |
1050 | |
1051 | /** |
1052 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1053 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1054 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1055 | * @return string A semver version number |
1056 | */ |
1057 | public function getInputContentVersion(): string { |
1058 | return $this->inputContentVersion; |
1059 | } |
1060 | |
1061 | /** |
1062 | * The HTML content version of the input document (for html2wt and html2html conversions). |
1063 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
1064 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
1065 | * @return string A semver version number |
1066 | */ |
1067 | public function getOutputContentVersion(): string { |
1068 | return $this->outputContentVersion; |
1069 | } |
1070 | |
1071 | /** |
1072 | * If non-null, the language variant used for Parsoid HTML; we convert |
1073 | * to this if wt2html, or from this (if html2wt). |
1074 | * |
1075 | * @return string|null a MediaWiki-internal language code |
1076 | * @deprecated Use ::getHtmlVariantLanguageBcp47() (T320662) |
1077 | */ |
1078 | public function getHtmlVariantLanguage(): ?string { |
1079 | return Utils::bcp47ToMwCode( $this->htmlVariantLanguage ); |
1080 | } |
1081 | |
1082 | /** |
1083 | * If non-null, the language variant used for Parsoid HTML; we convert |
1084 | * to this if wt2html, or from this (if html2wt). |
1085 | * |
1086 | * @return ?Bcp47Code a BCP-47 language code |
1087 | */ |
1088 | public function getHtmlVariantLanguageBcp47(): ?Bcp47Code { |
1089 | return $this->htmlVariantLanguage; // Stored as BCP-47 |
1090 | } |
1091 | |
1092 | /** |
1093 | * If non-null, the language variant to be used for wikitext. If null, |
1094 | * heuristics will be used to identify the original wikitext variant |
1095 | * in wt2html mode, and in html2wt mode new or edited HTML will be left |
1096 | * unconverted. |
1097 | * |
1098 | * @return string|null a MediaWiki-internal language code |
1099 | * @deprecated Use ::getWtVariantLanguageBcp47() (T320662) |
1100 | */ |
1101 | public function getWtVariantLanguage(): ?string { |
1102 | return Utils::bcp47ToMwCode( $this->wtVariantLanguage ); |
1103 | } |
1104 | |
1105 | /** |
1106 | * If non-null, the language variant to be used for wikitext. If null, |
1107 | * heuristics will be used to identify the original wikitext variant |
1108 | * in wt2html mode, and in html2wt mode new or edited HTML will be left |
1109 | * unconverted. |
1110 | * |
1111 | * @return ?Bcp47Code a BCP-47 language code |
1112 | */ |
1113 | public function getWtVariantLanguageBcp47(): ?Bcp47Code { |
1114 | return $this->wtVariantLanguage; |
1115 | } |
1116 | |
1117 | /** |
1118 | * Determine appropriate vary headers for the HTML form of this page. |
1119 | * @return string |
1120 | */ |
1121 | public function htmlVary(): string { |
1122 | $varies = [ 'Accept' ]; // varies on Content-Type |
1123 | if ( $this->langConverterEnabled() ) { |
1124 | $varies[] = 'Accept-Language'; |
1125 | } |
1126 | |
1127 | sort( $varies ); |
1128 | return implode( ', ', $varies ); |
1129 | } |
1130 | |
1131 | /** |
1132 | * Determine an appropriate content-language for the HTML form of this page. |
1133 | * @return string a MediaWiki-internal language code |
1134 | * @deprecated Use ::htmlContentLanguageBcp47() (T320662) |
1135 | */ |
1136 | public function htmlContentLanguage(): string { |
1137 | return Utils::bcp47ToMwCode( $this->htmlContentLanguageBcp47() ); |
1138 | } |
1139 | |
1140 | /** |
1141 | * Determine an appropriate content-language for the HTML form of this page. |
1142 | * @return Bcp47Code a BCP-47 language code. |
1143 | */ |
1144 | public function htmlContentLanguageBcp47(): Bcp47Code { |
1145 | // PageConfig::htmlVariant is set iff we do variant conversion on the |
1146 | // HTML |
1147 | return $this->pageConfig->getVariantBcp47() ?? |
1148 | $this->pageConfig->getPageLanguageBcp47(); |
1149 | } |
1150 | } |