Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 187 |
|
0.00% |
0 / 11 |
CRAP | |
0.00% |
0 / 1 |
DataAccess | |
0.00% |
0 / 187 |
|
0.00% |
0 / 11 |
2070 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
makeTransformOptions | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
56 | |||
getPageInfo | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
90 | |||
getFileInfo | |
0.00% |
0 / 50 |
|
0.00% |
0 / 1 |
182 | |||
prepareParser | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
doPst | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
parseWikitext | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
preprocessWikitext | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
fetchTemplateSource | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
fetchTemplateData | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
logLinterData | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2011-2022 Wikimedia Foundation and others. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | */ |
19 | |
20 | namespace MediaWiki\Parser\Parsoid\Config; |
21 | |
22 | use ContentHandler; |
23 | use File; |
24 | use LanguageCode; |
25 | use MediaTransformError; |
26 | use MediaWiki\Cache\LinkBatchFactory; |
27 | use MediaWiki\Config\ServiceOptions; |
28 | use MediaWiki\Content\Transform\ContentTransformer; |
29 | use MediaWiki\HookContainer\HookContainer; |
30 | use MediaWiki\HookContainer\HookRunner; |
31 | use MediaWiki\Linker\Linker; |
32 | use MediaWiki\MainConfigNames; |
33 | use MediaWiki\Page\File\BadFileLookup; |
34 | use MediaWiki\Parser\Parser; |
35 | use MediaWiki\Title\Title; |
36 | use ParserFactory; |
37 | use PPFrame; |
38 | use RepoGroup; |
39 | use Wikimedia\Assert\UnreachableException; |
40 | use Wikimedia\Parsoid\Config\DataAccess as IDataAccess; |
41 | use Wikimedia\Parsoid\Config\PageConfig as IPageConfig; |
42 | use Wikimedia\Parsoid\Config\PageContent as IPageContent; |
43 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
44 | use Wikimedia\Parsoid\Core\LinkTarget as ParsoidLinkTarget; |
45 | use Wikimedia\Rdbms\ReadOnlyMode; |
46 | |
47 | /** |
48 | * Implement Parsoid's abstract class for data access. |
49 | * |
50 | * @since 1.39 |
51 | * @internal |
52 | */ |
53 | class DataAccess extends IDataAccess { |
54 | public const CONSTRUCTOR_OPTIONS = [ |
55 | MainConfigNames::SVGMaxSize, |
56 | ]; |
57 | |
58 | private RepoGroup $repoGroup; |
59 | private BadFileLookup $badFileLookup; |
60 | private HookContainer $hookContainer; |
61 | private HookRunner $hookRunner; |
62 | private ContentTransformer $contentTransformer; |
63 | private ParserFactory $parserFactory; |
64 | /** Lazy-created via self::prepareParser() */ |
65 | private ?Parser $parser = null; |
66 | private PPFrame $ppFrame; |
67 | private ?PageConfig $previousPageConfig = null; |
68 | private ServiceOptions $config; |
69 | private ReadOnlyMode $readOnlyMode; |
70 | private LinkBatchFactory $linkBatchFactory; |
71 | |
72 | /** |
73 | * @param ServiceOptions $config MediaWiki main configuration object |
74 | * @param RepoGroup $repoGroup |
75 | * @param BadFileLookup $badFileLookup |
76 | * @param HookContainer $hookContainer |
77 | * @param ContentTransformer $contentTransformer |
78 | * @param ReadOnlyMode $readOnlyMode used to disable linting when the |
79 | * database is read-only. |
80 | * @param ParserFactory $parserFactory A legacy parser factory, |
81 | * for PST/preprocessing/extension handling |
82 | * @param LinkBatchFactory $linkBatchFactory |
83 | */ |
84 | public function __construct( |
85 | ServiceOptions $config, |
86 | RepoGroup $repoGroup, |
87 | BadFileLookup $badFileLookup, |
88 | HookContainer $hookContainer, |
89 | ContentTransformer $contentTransformer, |
90 | ReadOnlyMode $readOnlyMode, |
91 | ParserFactory $parserFactory, |
92 | LinkBatchFactory $linkBatchFactory |
93 | ) { |
94 | $config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
95 | $this->config = $config; |
96 | $this->repoGroup = $repoGroup; |
97 | $this->badFileLookup = $badFileLookup; |
98 | $this->hookContainer = $hookContainer; |
99 | $this->contentTransformer = $contentTransformer; |
100 | $this->readOnlyMode = $readOnlyMode; |
101 | $this->linkBatchFactory = $linkBatchFactory; |
102 | |
103 | $this->hookRunner = new HookRunner( $hookContainer ); |
104 | |
105 | $this->parserFactory = $parserFactory; |
106 | $this->previousPageConfig = null; // ensure we initialize parser options |
107 | } |
108 | |
109 | /** |
110 | * @param IPageConfig $pageConfig |
111 | * @param File $file |
112 | * @param array $hp |
113 | * @return array |
114 | */ |
115 | private function makeTransformOptions( IPageConfig $pageConfig, $file, array $hp ): array { |
116 | // Validate the input parameters like Parser::makeImage() |
117 | $handler = $file->getHandler(); |
118 | if ( !$handler ) { |
119 | return []; // will get iconThumb() |
120 | } |
121 | foreach ( $hp as $name => $value ) { |
122 | if ( !$handler->validateParam( $name, $value ) ) { |
123 | unset( $hp[$name] ); |
124 | } |
125 | } |
126 | |
127 | // This part is similar to Linker::makeImageLink(). If there is no width, |
128 | // set one based on the source file size. |
129 | $page = $hp['page'] ?? 0; |
130 | if ( !isset( $hp['width'] ) ) { |
131 | if ( isset( $hp['height'] ) && $file->isVectorized() ) { |
132 | // If it's a vector image, and user only specifies height |
133 | // we don't want it to be limited by its "normal" width. |
134 | $hp['width'] = $this->config->get( MainConfigNames::SVGMaxSize ); |
135 | } else { |
136 | $hp['width'] = $file->getWidth( $page ); |
137 | } |
138 | |
139 | // We don't need to fill in a default thumbnail width here, since |
140 | // that is done by Parsoid. Parsoid always sets the width parameter |
141 | // for thumbnails. |
142 | } |
143 | |
144 | // Parser::makeImage() always sets this |
145 | $hp['targetlang'] = LanguageCode::bcp47ToInternal( |
146 | $pageConfig->getPageLanguageBcp47() |
147 | ); |
148 | |
149 | return $hp; |
150 | } |
151 | |
152 | /** @inheritDoc */ |
153 | public function getPageInfo( $pageConfigOrTitle, array $titles ): array { |
154 | if ( $pageConfigOrTitle instanceof IPageConfig ) { |
155 | $context_title = Title::newFromLinkTarget( |
156 | $pageConfigOrTitle->getLinkTarget() |
157 | ); |
158 | } elseif ( is_string( $pageConfigOrTitle ) ) { |
159 | // Temporary, deprecated. |
160 | $context_title = Title::newFromTextThrow( $pageConfigOrTitle ); |
161 | } elseif ( $pageConfigOrTitle instanceof ParsoidLinkTarget ) { |
162 | $context_title = Title::newFromLinkTarget( $pageConfigOrTitle ); |
163 | } else { |
164 | throw new UnreachableException( "Bad type for argument 1" ); |
165 | } |
166 | $titleObjs = []; |
167 | $pagemap = []; |
168 | $classes = []; |
169 | $ret = []; |
170 | foreach ( $titles as $name ) { |
171 | $t = Title::newFromText( $name ); |
172 | // Filter out invalid titles. Title::newFromText in core (not our bespoke |
173 | // version in src/Utils/Title.php) can return null for invalid titles. |
174 | if ( !$t ) { |
175 | // FIXME: This is a bandaid to patch up the fact that Env::makeTitle treats |
176 | // this as a valid title, but Title::newFromText treats it as invalid. |
177 | // T237535 |
178 | // This matches what ApiQuery::outputGeneralPageInfo() would |
179 | // return for an invalid title. |
180 | $ret[$name] = [ |
181 | 'pageId' => -1, |
182 | 'revId' => -1, |
183 | 'invalid' => true, |
184 | 'invalidreason' => 'The requested page title is invalid', |
185 | ]; |
186 | } else { |
187 | $titleObjs[$name] = $t; |
188 | } |
189 | } |
190 | $linkBatch = $this->linkBatchFactory->newLinkBatch( $titleObjs ); |
191 | $linkBatch->setCaller( __METHOD__ ); |
192 | $linkBatch->execute(); |
193 | |
194 | foreach ( $titleObjs as $obj ) { |
195 | $pdbk = $obj->getPrefixedDBkey(); |
196 | $pagemap[$obj->getArticleID()] = $pdbk; |
197 | $classes[$pdbk] = $obj->isRedirect() ? 'mw-redirect' : ''; |
198 | } |
199 | $this->hookRunner->onGetLinkColours( |
200 | # $classes is passed by reference and mutated |
201 | $pagemap, $classes, $context_title |
202 | ); |
203 | |
204 | foreach ( $titleObjs as $name => $obj ) { |
205 | /** @var Title $obj */ |
206 | $pdbk = $obj->getPrefixedDBkey(); |
207 | $c = preg_split( |
208 | '/\s+/', $classes[$pdbk] ?? '', -1, PREG_SPLIT_NO_EMPTY |
209 | ); |
210 | $ret[$name] = [ |
211 | 'pageId' => $obj->getArticleID(), |
212 | 'revId' => $obj->getLatestRevID(), |
213 | 'missing' => !$obj->exists(), |
214 | 'known' => $obj->isKnown(), |
215 | 'redirect' => $obj->isRedirect(), |
216 | 'linkclasses' => $c, # See ApiQueryInfo::getLinkClasses() in core |
217 | ]; |
218 | } |
219 | return $ret; |
220 | } |
221 | |
222 | /** @inheritDoc */ |
223 | public function getFileInfo( IPageConfig $pageConfig, array $files ): array { |
224 | $page = Title::newFromLinkTarget( $pageConfig->getLinkTarget() ); |
225 | |
226 | $keys = []; |
227 | foreach ( $files as $f ) { |
228 | $keys[] = $f[0]; |
229 | } |
230 | $fileObjs = $this->repoGroup->findFiles( $keys ); |
231 | |
232 | $ret = []; |
233 | foreach ( $files as $f ) { |
234 | $filename = $f[0]; |
235 | $dims = $f[1]; |
236 | |
237 | /** @var File $file */ |
238 | $file = $fileObjs[$filename] ?? null; |
239 | if ( !$file ) { |
240 | $ret[] = null; |
241 | continue; |
242 | } |
243 | |
244 | // See Linker::makeImageLink; 'page' is a key in $handlerParams |
245 | // core uses 'false' as the default then casts to (int) => 0 |
246 | $pageNum = $dims['page'] ?? 0; |
247 | |
248 | $result = [ |
249 | 'width' => $file->getWidth( $pageNum ), |
250 | 'height' => $file->getHeight( $pageNum ), |
251 | 'size' => $file->getSize(), |
252 | 'mediatype' => $file->getMediaType(), |
253 | 'mime' => $file->getMimeType(), |
254 | 'url' => $file->getFullUrl(), |
255 | 'mustRender' => $file->mustRender(), |
256 | 'badFile' => $this->badFileLookup->isBadFile( $filename, $page ), |
257 | 'timestamp' => $file->getTimestamp(), |
258 | 'sha1' => $file->getSha1(), |
259 | ]; |
260 | |
261 | $length = $file->getLength(); |
262 | if ( $length ) { |
263 | $result['duration'] = (float)$length; |
264 | } |
265 | |
266 | if ( isset( $dims['seek'] ) ) { |
267 | $dims['thumbtime'] = $dims['seek']; |
268 | } |
269 | |
270 | $txopts = $this->makeTransformOptions( $pageConfig, $file, $dims ); |
271 | $mto = $file->transform( $txopts ); |
272 | if ( $mto ) { |
273 | if ( $mto->isError() && $mto instanceof MediaTransformError ) { |
274 | $result['thumberror'] = $mto->toText(); |
275 | } else { |
276 | if ( $txopts ) { |
277 | // Do srcset scaling |
278 | Linker::processResponsiveImages( $file, $mto, $txopts ); |
279 | if ( count( $mto->responsiveUrls ) ) { |
280 | $result['responsiveUrls'] = []; |
281 | foreach ( $mto->responsiveUrls as $density => $url ) { |
282 | $result['responsiveUrls'][$density] = $url; |
283 | } |
284 | } |
285 | } |
286 | |
287 | // Proposed MediaTransformOutput serialization method for T51896 etc. |
288 | // Note that getAPIData(['fullurl']) would return |
289 | // wfExpandUrl(), which wouldn't respect the wiki's |
290 | // protocol preferences -- instead it would use the |
291 | // protocol used for the API request. |
292 | if ( is_callable( [ $mto, 'getAPIData' ] ) ) { |
293 | $result['thumbdata'] = $mto->getAPIData( [ 'withhash' ] ); |
294 | } |
295 | |
296 | $result['thumburl'] = $mto->getUrl(); |
297 | $result['thumbwidth'] = $mto->getWidth(); |
298 | $result['thumbheight'] = $mto->getHeight(); |
299 | } |
300 | } else { |
301 | $result['thumberror'] = "Presumably, invalid parameters, despite validation."; |
302 | } |
303 | |
304 | $ret[] = $result; |
305 | } |
306 | |
307 | return $ret; |
308 | } |
309 | |
310 | /** |
311 | * Prepare MediaWiki's parser for preprocessing or extension tag parsing, |
312 | * clearing its state if necessary. |
313 | * |
314 | * @param IPageConfig $pageConfig |
315 | * @param int $outputType |
316 | * @return Parser |
317 | */ |
318 | private function prepareParser( IPageConfig $pageConfig, int $outputType ) { |
319 | '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig |
320 | // Clear the state only when the PageConfig changes, so that Parser's internal caches can |
321 | // be retained. This should also provide better compatibility with extension tags. |
322 | $clearState = $this->previousPageConfig !== $pageConfig; |
323 | $this->previousPageConfig = $pageConfig; |
324 | // Use the same legacy parser object for all calls to extension tag |
325 | // processing, for greater compatibility. |
326 | $this->parser ??= $this->parserFactory->create(); |
327 | $this->parser->startExternalParse( |
328 | Title::newFromLinkTarget( $pageConfig->getLinkTarget() ), |
329 | $pageConfig->getParserOptions(), |
330 | $outputType, $clearState, $pageConfig->getRevisionId() ); |
331 | $this->parser->resetOutput(); |
332 | |
333 | // Retain a PPFrame object between preprocess requests since it contains |
334 | // some useful caches. |
335 | if ( $clearState ) { |
336 | $this->ppFrame = $this->parser->getPreprocessor()->newFrame(); |
337 | } |
338 | return $this->parser; |
339 | } |
340 | |
341 | /** @inheritDoc */ |
342 | public function doPst( IPageConfig $pageConfig, string $wikitext ): string { |
343 | '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig |
344 | // This could use prepareParser(), but it's only called once per page, |
345 | // so it's not essential. |
346 | $titleObj = Title::newFromLinkTarget( $pageConfig->getLinkTarget() ); |
347 | $user = $pageConfig->getParserOptions()->getUserIdentity(); |
348 | $content = ContentHandler::makeContent( $wikitext, $titleObj, CONTENT_MODEL_WIKITEXT ); |
349 | return $this->contentTransformer->preSaveTransform( |
350 | $content, |
351 | $titleObj, |
352 | $user, |
353 | $pageConfig->getParserOptions() |
354 | )->serialize(); |
355 | } |
356 | |
357 | /** @inheritDoc */ |
358 | public function parseWikitext( |
359 | IPageConfig $pageConfig, |
360 | ContentMetadataCollector $metadata, |
361 | string $wikitext |
362 | ): string { |
363 | $parser = $this->prepareParser( $pageConfig, Parser::OT_HTML ); |
364 | $html = $parser->parseExtensionTagAsTopLevelDoc( $wikitext ); |
365 | // XXX: Ideally we will eventually have the legacy parser use our |
366 | // ContentMetadataCollector instead of having a new ParserOutput |
367 | // created (implicitly in ::prepareParser()/Parser::resetOutput() ) |
368 | // which we then have to manually merge. |
369 | $out = $parser->getOutput(); |
370 | $out->setRawText( $html ); |
371 | $out->collectMetadata( $metadata ); # merges $out into $metadata |
372 | return Parser::extractBody( $out->getRawText() ); |
373 | } |
374 | |
375 | /** @inheritDoc */ |
376 | public function preprocessWikitext( |
377 | IPageConfig $pageConfig, |
378 | ContentMetadataCollector $metadata, |
379 | string $wikitext |
380 | ): string { |
381 | $parser = $this->prepareParser( $pageConfig, Parser::OT_PREPROCESS ); |
382 | $this->hookRunner->onParserBeforePreprocess( |
383 | # $wikitext is passed by reference and mutated |
384 | $parser, $wikitext, $parser->getStripState() |
385 | ); |
386 | $wikitext = $parser->replaceVariables( $wikitext, $this->ppFrame ); |
387 | // FIXME (T289545): StripState markers protect content that need to be protected from further |
388 | // "wikitext processing". So, where the result has strip state markers, we actually |
389 | // need to tunnel this content through rather than unwrap and let it go through the |
390 | // rest of the parsoid pipeline. For example, some parser functions might return HTML |
391 | // not wikitext, and where the content might contain wikitext characters, we are now |
392 | // going to potentially mangle that output. |
393 | $wikitext = $parser->getStripState()->unstripBoth( $wikitext ); |
394 | |
395 | // XXX: Ideally we will eventually have the legacy parser use our |
396 | // ContentMetadataCollector instead of having a new ParserOutput |
397 | // created (implicitly in ::prepareParser()/Parser::resetOutput() ) |
398 | // which we then have to manually merge. |
399 | $out = $parser->getOutput(); |
400 | $out->collectMetadata( $metadata ); # merges $out into $metadata |
401 | return $wikitext; |
402 | } |
403 | |
404 | /** @inheritDoc */ |
405 | public function fetchTemplateSource( |
406 | IPageConfig $pageConfig, $title |
407 | ): ?IPageContent { |
408 | '@phan-var PageConfig $pageConfig'; // @var PageConfig $pageConfig |
409 | if ( is_string( $title ) ) { |
410 | $titleObj = Title::newFromTextThrow( $title ); |
411 | } else { |
412 | $titleObj = Title::newFromLinkTarget( $title ); |
413 | } |
414 | |
415 | // Use the PageConfig to take advantage of custom template |
416 | // fetch hooks like FlaggedRevisions, etc. |
417 | $revRecord = $pageConfig->fetchRevisionRecordOfTemplate( $titleObj ); |
418 | |
419 | return $revRecord ? new PageContent( $revRecord ) : null; |
420 | } |
421 | |
422 | /** @inheritDoc */ |
423 | public function fetchTemplateData( IPageConfig $pageConfig, $title ): ?array { |
424 | $ret = []; |
425 | if ( !is_string( $title ) ) { |
426 | $titleObj = Title::newFromLinkTarget( $title ); |
427 | $title = $titleObj->getPrefixedText(); |
428 | } |
429 | // @todo: This hook needs some clean up: T304899 |
430 | $this->hookRunner->onParserFetchTemplateData( |
431 | [ $title ], |
432 | $ret # value returned by reference |
433 | ); |
434 | |
435 | // Cast value to array since the hook returns this as a stdclass |
436 | $tplData = $ret[$title] ?? null; |
437 | if ( $tplData ) { |
438 | // Deep convert to associative array |
439 | $tplData = json_decode( json_encode( $tplData ), true ); |
440 | } |
441 | return $tplData; |
442 | } |
443 | |
444 | /** @inheritDoc */ |
445 | public function logLinterData( IPageConfig $pageConfig, array $lints ): void { |
446 | if ( $this->readOnlyMode->isReadOnly() ) { |
447 | return; |
448 | } |
449 | |
450 | $revId = $pageConfig->getRevisionId(); |
451 | $title = Title::newFromLinkTarget( |
452 | $pageConfig->getLinkTarget() |
453 | )->getPrefixedText(); |
454 | $pageInfo = $this->getPageInfo( $pageConfig, [ $title ] ); |
455 | $latest = $pageInfo[$title]['revId']; |
456 | |
457 | // Only send the request if it the latest revision |
458 | if ( $revId !== null && $revId === $latest ) { |
459 | $this->hookRunner->onParserLogLinterData( |
460 | $title, $revId, $lints |
461 | ); |
462 | } |
463 | } |
464 | |
465 | } |