Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
81.93% |
195 / 238 |
|
57.14% |
8 / 14 |
CRAP | |
0.00% |
0 / 1 |
DataAccess | |
81.93% |
195 / 238 |
|
57.14% |
8 / 14 |
97.08 | |
0.00% |
0 / 1 |
getCache | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
setCache | |
42.86% |
3 / 7 |
|
0.00% |
0 / 1 |
4.68 | |||
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getPageInfo | |
95.12% |
39 / 41 |
|
0.00% |
0 / 1 |
12 | |||
getFileInfo | |
65.28% |
47 / 72 |
|
0.00% |
0 / 1 |
45.15 | |||
stripProto | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
4 | |||
doPst | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
mergeMetadata | |
47.06% |
8 / 17 |
|
0.00% |
0 / 1 |
17.50 | |||
parseWikitext | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
3 | |||
preprocessWikitext | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
3 | |||
fetchTemplateSource | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
3.00 | |||
fetchTemplateData | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
logLinterData | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
toPrefixedText | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Config\Api; |
6 | |
7 | use Wikimedia\Parsoid\Config\Api\SiteConfig as ApiSiteConfig; |
8 | use Wikimedia\Parsoid\Config\DataAccess as IDataAccess; |
9 | use Wikimedia\Parsoid\Config\PageConfig; |
10 | use Wikimedia\Parsoid\Config\PageContent; |
11 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
12 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
13 | use Wikimedia\Parsoid\Core\LinkTarget; |
14 | use Wikimedia\Parsoid\Mocks\MockPageContent; |
15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
16 | use Wikimedia\Parsoid\Utils\Title; |
17 | |
18 | /** |
19 | * DataAccess via MediaWiki's Action API |
20 | * |
21 | * Note this is intended for testing, not performance. |
22 | */ |
23 | class DataAccess extends IDataAccess { |
24 | |
25 | /** @var ApiHelper */ |
26 | private $api; |
27 | |
28 | /** |
29 | * @var bool Should we strip the protocol from returned URLs? |
30 | * Generally this should be true, since the protocol of the API |
31 | * request doesn't necessarily match the protocol of article |
32 | * access; ie, we could be using https to access the API but emit |
33 | * article content which can be read with http. But for running |
34 | * parserTests, we need to include the protocol in order to match |
35 | * the parserTest configuration in core. |
36 | */ |
37 | private $stripProto; |
38 | |
39 | /** |
40 | * @name Caching |
41 | * @todo Someone should librarize MediaWiki core's MapCacheLRU so we can |
42 | * pull it in via composer and use it here. |
43 | * @{ |
44 | */ |
45 | |
46 | private const MAX_CACHE_LEN = 100; |
47 | |
48 | /** |
49 | * @var array |
50 | */ |
51 | private $cache = []; |
52 | |
53 | private ISiteConfig $siteConfig; |
54 | |
55 | /** |
56 | * Get from cache |
57 | * @param string $key |
58 | * @return mixed |
59 | */ |
60 | private function getCache( string $key ) { |
61 | if ( isset( $this->cache[$key] ) ) { |
62 | $ret = $this->cache[$key]; |
63 | // The LRU cache uses position in the array to indicate recency, so |
64 | // move the accessed key to the end. |
65 | unset( $this->cache[$key] ); |
66 | $this->cache[$key] = $ret; |
67 | return $ret; |
68 | } |
69 | return null; |
70 | } |
71 | |
72 | /** |
73 | * Set a value into cache |
74 | * @param string $key |
75 | * @param mixed $value Not null. |
76 | */ |
77 | private function setCache( string $key, $value ): void { |
78 | if ( isset( $this->cache[$key] ) ) { |
79 | // The LRU cache uses position in the array to indicate recency, so |
80 | // remove the old entry so the new version goes at the end. |
81 | unset( $this->cache[$key] ); |
82 | } elseif ( count( $this->cache ) >= self::MAX_CACHE_LEN ) { |
83 | reset( $this->cache ); |
84 | $evictKey = key( $this->cache ); |
85 | unset( $this->cache[$evictKey] ); |
86 | } |
87 | $this->cache[$key] = $value; |
88 | } |
89 | |
90 | /** @} */ |
91 | |
92 | /** |
93 | * @param ApiHelper $api |
94 | * @param ISiteConfig $siteConfig |
95 | * @param array $opts |
96 | */ |
97 | public function __construct( ApiHelper $api, ISiteConfig $siteConfig, array $opts ) { |
98 | $this->api = $api; |
99 | $this->siteConfig = $siteConfig; |
100 | $this->stripProto = $opts['stripProto'] ?? true; |
101 | } |
102 | |
103 | /** @inheritDoc */ |
104 | public function getPageInfo( $pageConfigOrTitle, array $titles ): array { |
105 | $contextTitle = $pageConfigOrTitle instanceof PageConfig ? |
106 | $pageConfigOrTitle->getLinkTarget() : $pageConfigOrTitle; |
107 | |
108 | if ( !$titles ) { |
109 | return []; |
110 | } |
111 | |
112 | $ret = []; |
113 | $pageConfigTitle = $this->toPrefixedText( $contextTitle ); |
114 | foreach ( array_chunk( $titles, 50 ) as $batch ) { |
115 | $data = $this->api->makeRequest( [ |
116 | 'action' => 'query', |
117 | 'prop' => 'info', |
118 | 'inprop' => 'linkclasses', |
119 | 'inlinkcontext' => $pageConfigTitle, |
120 | 'titles' => implode( '|', $batch ), |
121 | ] )['query']; |
122 | $norm = []; |
123 | if ( isset( $data['normalized'] ) ) { |
124 | foreach ( $data['normalized'] as $n ) { |
125 | $from = $n['from']; |
126 | if ( $n['fromencoded'] ) { |
127 | $from = rawurldecode( $from ); |
128 | } |
129 | $norm[$from] = $n['to']; |
130 | } |
131 | } |
132 | $pages = []; |
133 | foreach ( $data['pages'] as $p ) { |
134 | $pages[$p['title']] = $p; |
135 | } |
136 | foreach ( $batch as $title ) { |
137 | $ttitle = $title; |
138 | while ( isset( $norm[$ttitle] ) ) { |
139 | $ttitle = $norm[$ttitle]; |
140 | } |
141 | $page = $pages[$ttitle] ?? []; |
142 | $ret[$title] = [ |
143 | 'pageId' => $page['pageid'] ?? null, |
144 | 'revId' => $page['lastrevid'] ?? null, |
145 | 'missing' => $page['missing'] ?? false, |
146 | 'known' => ( $page['known'] ?? false ), |
147 | 'redirect' => $page['redirect'] ?? false, |
148 | 'linkclasses' => $page['linkclasses'] ?? [], |
149 | 'invalid' => $page['invalid'] ?? false, |
150 | ]; |
151 | if ( !( $ret[$title]['missing'] || $ret[$title]['invalid'] ) ) { |
152 | $ret[$title]['known'] = true; |
153 | } |
154 | } |
155 | } |
156 | |
157 | return $ret; |
158 | } |
159 | |
160 | /** @inheritDoc */ |
161 | public function getFileInfo( PageConfig $pageConfig, array $files ): array { |
162 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
163 | $sc = $this->siteConfig; |
164 | if ( $sc instanceof ApiSiteConfig && $sc->hasVideoInfo() ) { |
165 | $prefix = "vi"; |
166 | $propName = "videoinfo"; |
167 | } else { |
168 | $prefix = "ii"; |
169 | $propName = "imageinfo"; |
170 | } |
171 | $apiArgs2 = [ |
172 | 'action' => 'query', |
173 | 'format' => 'json', |
174 | 'formatversion' => 2, |
175 | 'rawcontinue' => 1, |
176 | 'prop' => $propName, |
177 | "{$prefix}badfilecontexttitle" => $pageConfigTitle, |
178 | "{$prefix}prop" => implode( '|', [ |
179 | 'mediatype', 'mime', 'size', 'url', 'badfile', 'sha1', 'timestamp' |
180 | ] ) |
181 | ]; |
182 | if ( $prefix === 'vi' ) { |
183 | $apiArgs2["viprop"] .= '|derivatives|timedtext'; |
184 | } |
185 | $ret = []; |
186 | foreach ( $files as $file ) { |
187 | $apiArgs = $apiArgs2; // Copy since we modify it |
188 | $name = $file[0]; |
189 | $dims = $file[1]; |
190 | |
191 | $imgNS = $sc->namespaceName( $sc->canonicalNamespaceId( 'file' ) ); |
192 | $apiArgs['titles'] = "$imgNS:$name"; |
193 | $needsWidth = isset( $dims['page'] ) || isset( $dims['lang'] ); |
194 | if ( isset( $dims['width'] ) ) { |
195 | $apiArgs["{$prefix}urlwidth"] = $dims['width']; |
196 | if ( $needsWidth ) { |
197 | if ( isset( $dims['page'] ) ) { // PDF |
198 | $apiArgs["{$prefix}urlparam"] = "page{$dims['page']}-{$dims['width']}px"; |
199 | } elseif ( isset( $dims['lang'] ) ) { // SVG |
200 | $apiArgs["{$prefix}urlparam"] = "lang{$dims['lang']}-{$dims['width']}px"; |
201 | } |
202 | $needsWidth = false; |
203 | } |
204 | } |
205 | if ( isset( $dims['height'] ) ) { |
206 | $apiArgs["{$prefix}urlheight"] = $dims['height']; |
207 | } |
208 | if ( isset( $dims['seek'] ) ) { |
209 | $apiArgs["{$prefix}urlparam"] = "seek={$dims['seek']}"; |
210 | } |
211 | |
212 | do { |
213 | $data = $this->api->makeRequest( $apiArgs ); |
214 | // Expect exactly 1 row |
215 | $fileinfo = $data['query']['pages'][0][$propName][0]; |
216 | // Corner case: if page is set, the core ImageInfo API doesn't |
217 | // respect it *unless* width is set as well. So repeat the |
218 | // request if necessary. |
219 | if ( isset( $fileinfo['pagecount'] ) && !isset( $dims['page'] ) ) { |
220 | $dims['page'] = 1; # also ensures we won't get here again |
221 | $needsWidth = true; |
222 | } |
223 | if ( $needsWidth && !isset( $fileinfo['filemissing'] ) ) { |
224 | $needsWidth = false; # ensure we won't get here again |
225 | $width = $fileinfo['width']; |
226 | $apiArgs["{$prefix}urlwidth"] = $width; |
227 | if ( isset( $dims['page'] ) ) { // PDF |
228 | $apiArgs["{$prefix}urlparam"] = "page{$dims['page']}-{$width}px"; |
229 | } elseif ( isset( $dims['lang'] ) ) { // SVG |
230 | $apiArgs["{$prefix}urlparam"] = "lang{$dims['lang']}-{$width}px"; |
231 | } |
232 | continue; |
233 | } |
234 | break; |
235 | } while ( true ); |
236 | |
237 | if ( isset( $fileinfo['filemissing'] ) ) { |
238 | $fileinfo = null; |
239 | } else { |
240 | $fileinfo['badFile'] = $data['query']['pages'][0]['badfile']; |
241 | $this->stripProto( $fileinfo, 'url' ); |
242 | $this->stripProto( $fileinfo, 'thumburl' ); |
243 | $this->stripProto( $fileinfo, 'descriptionurl' ); |
244 | $this->stripProto( $fileinfo, 'descriptionshorturl' ); |
245 | foreach ( $fileinfo['responsiveUrls'] ?? [] as $density => $url ) { |
246 | $this->stripProto( $fileinfo['responsiveUrls'], (string)$density ); |
247 | } |
248 | if ( $prefix === 'vi' ) { |
249 | foreach ( $fileinfo['thumbdata']['derivatives'] ?? [] as $j => $d ) { |
250 | $this->stripProto( $fileinfo['thumbdata']['derivatives'][$j], 'src' ); |
251 | } |
252 | foreach ( $fileinfo['thumbdata']['timedtext'] ?? [] as $j => $d ) { |
253 | $this->stripProto( $fileinfo['thumbdata']['timedtext'][$j], 'src' ); |
254 | } |
255 | } |
256 | } |
257 | $ret[] = $fileinfo; |
258 | } |
259 | return $ret; |
260 | } |
261 | |
262 | /** |
263 | * Convert the given URL into protocol-relative form. |
264 | * |
265 | * @param ?array &$obj |
266 | * @param string $key |
267 | */ |
268 | private function stripProto( ?array &$obj, string $key ): void { |
269 | if ( $obj !== null && !empty( $obj[$key] ) && $this->stripProto ) { |
270 | $obj[$key] = preg_replace( '#^https?://#', '//', $obj[$key] ); |
271 | } |
272 | } |
273 | |
274 | /** @inheritDoc */ |
275 | public function doPst( PageConfig $pageConfig, string $wikitext ): string { |
276 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
277 | $key = implode( ':', [ 'pst', md5( $pageConfigTitle ), md5( $wikitext ) ] ); |
278 | $ret = $this->getCache( $key ); |
279 | if ( $ret === null ) { |
280 | $data = $this->api->makeRequest( [ |
281 | 'action' => 'parse', |
282 | 'title' => $pageConfigTitle, |
283 | 'text' => $wikitext, |
284 | 'contentmodel' => 'wikitext', |
285 | 'onlypst' => 1, |
286 | ] ); |
287 | $ret = $data['parse']['text']; |
288 | $this->setCache( $key, $ret ); |
289 | } |
290 | return $ret; |
291 | } |
292 | |
293 | /** |
294 | * Transfer the metadata returned in an API result into our |
295 | * ContentMetadataCollector. |
296 | * @param array $data |
297 | * @param ContentMetadataCollector $metadata |
298 | */ |
299 | private function mergeMetadata( array $data, ContentMetadataCollector $metadata ): void { |
300 | foreach ( ( $data['categories'] ?? [] ) as $c ) { |
301 | $metadata->addCategory( $c['category'], $c['sortkey'] ); |
302 | } |
303 | $metadata->addModules( $data['modules'] ?? [] ); |
304 | $metadata->addModuleStyles( $data['modulestyles'] ?? [] ); |
305 | foreach ( ( $data['jsconfigvars'] ?? [] ) as $key => $value ) { |
306 | $strategy = 'write-once'; |
307 | if ( is_array( $value ) ) { |
308 | // Strategy value will be exposed by change |
309 | // I974d9ecfb4ca8b22361d25c4c70fc5e55c39d5ed in core. |
310 | $strategy = $value['_mw-strategy'] ?? 'write-once'; |
311 | unset( $value['_mw-strategy'] ); |
312 | } |
313 | if ( $strategy === 'union' ) { |
314 | foreach ( $value as $item ) { |
315 | $metadata->appendJsConfigVar( $key, $item ); |
316 | } |
317 | } else { |
318 | $metadata->setJsConfigVar( $key, $value ); |
319 | } |
320 | } |
321 | foreach ( ( $data['externallinks'] ?? [] ) as $url ) { |
322 | $metadata->addExternalLink( $url ); |
323 | } |
324 | foreach ( ( $data['properties'] ?? [] ) as $name => $value ) { |
325 | $metadata->setPageProperty( $name, $value ); |
326 | } |
327 | } |
328 | |
329 | /** @inheritDoc */ |
330 | public function parseWikitext( |
331 | PageConfig $pageConfig, |
332 | ContentMetadataCollector $metadata, |
333 | string $wikitext |
334 | ): string { |
335 | $revid = $pageConfig->getRevisionId(); |
336 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
337 | $key = implode( ':', [ 'parse', md5( $pageConfigTitle ), md5( $wikitext ), $revid ] ); |
338 | $data = $this->getCache( $key ); |
339 | if ( $data === null ) { |
340 | $params = [ |
341 | 'action' => 'parse', |
342 | 'title' => $pageConfigTitle, |
343 | 'text' => $wikitext, |
344 | 'contentmodel' => 'wikitext', |
345 | 'prop' => 'text|modules|jsconfigvars|categories|properties|externallinks', |
346 | 'disablelimitreport' => 1, |
347 | 'wrapoutputclass' => '', |
348 | 'showstrategykeys' => 1, |
349 | ]; |
350 | if ( $revid !== null ) { |
351 | $params['revid'] = $revid; |
352 | } |
353 | $data = $this->api->makeRequest( $params )['parse']; |
354 | $this->setCache( $key, $data ); |
355 | } |
356 | $this->mergeMetadata( $data, $metadata ); |
357 | return $data['text']; # HTML |
358 | } |
359 | |
360 | /** @inheritDoc */ |
361 | public function preprocessWikitext( |
362 | PageConfig $pageConfig, |
363 | ContentMetadataCollector $metadata, |
364 | string $wikitext |
365 | ): string { |
366 | $revid = $pageConfig->getRevisionId(); |
367 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
368 | $key = implode( ':', [ 'preprocess', md5( $pageConfigTitle ), md5( $wikitext ), $revid ] ); |
369 | $data = $this->getCache( $key ); |
370 | if ( $data === null ) { |
371 | $params = [ |
372 | 'action' => 'expandtemplates', |
373 | 'title' => $pageConfigTitle, |
374 | 'text' => $wikitext, |
375 | 'prop' => 'wikitext|modules|jsconfigvars|categories|properties', |
376 | 'showstrategykeys' => 1, |
377 | ]; |
378 | if ( $revid !== null ) { |
379 | $params['revid'] = $revid; |
380 | } |
381 | $data = $this->api->makeRequest( $params )['expandtemplates']; |
382 | $this->setCache( $key, $data ); |
383 | } |
384 | |
385 | $this->mergeMetadata( $data, $metadata ); |
386 | |
387 | return $data['wikitext']; |
388 | } |
389 | |
390 | /** @inheritDoc */ |
391 | public function fetchTemplateSource( |
392 | PageConfig $pageConfig, LinkTarget $title |
393 | ): ?PageContent { |
394 | $title = $this->toPrefixedText( $title ); |
395 | $key = implode( ':', [ 'content', md5( $title ) ] ); |
396 | $ret = $this->getCache( $key ); |
397 | if ( $ret === null ) { |
398 | $params = [ |
399 | 'action' => 'query', |
400 | 'prop' => 'revisions', |
401 | 'rvprop' => 'content', |
402 | 'rvslots' => '*', |
403 | 'titles' => $title, |
404 | 'rvlimit' => 1, |
405 | ]; |
406 | |
407 | $data = $this->api->makeRequest( $params ); |
408 | $pageData = $data['query']['pages'][0]; |
409 | if ( isset( $pageData['missing'] ) ) { |
410 | return null; |
411 | } else { |
412 | $ret = $pageData['revisions'][0]['slots']; |
413 | // PORT-FIXME set the redirect field if needed |
414 | $this->setCache( $key, $ret ); |
415 | } |
416 | } |
417 | return new MockPageContent( $ret ); |
418 | } |
419 | |
420 | /** @inheritDoc */ |
421 | public function fetchTemplateData( PageConfig $pageConfig, LinkTarget $title ): ?array { |
422 | $title = $this->toPrefixedText( $title ); |
423 | $key = implode( ':', [ 'templatedata', md5( $title ) ] ); |
424 | $ret = $this->getCache( $key ); |
425 | if ( $ret === null ) { |
426 | $data = $this->api->makeRequest( [ |
427 | 'action' => 'templatedata', |
428 | 'includeMissingTitles' => 1, |
429 | 'titles' => $title, |
430 | 'redirects' => 1, |
431 | ] )['pages']; |
432 | $ret = reset( $data ); |
433 | $this->setCache( $key, $ret ); |
434 | } |
435 | return $ret; |
436 | } |
437 | |
438 | /** @inheritDoc */ |
439 | public function logLinterData( PageConfig $pageConfig, array $lints ): void { |
440 | foreach ( $lints as $l ) { |
441 | error_log( PHPUtils::jsonEncode( $l ) ); |
442 | } |
443 | } |
444 | |
445 | /** |
446 | * Helper to turn a LinkTarget object into the "prefixed text" title form |
447 | * expected by the MediaWiki action API. |
448 | * @param LinkTarget $linkTarget |
449 | * @return string The title, as prefixed text |
450 | */ |
451 | private function toPrefixedText( LinkTarget $linkTarget ): string { |
452 | return Title::newFromLinkTarget( |
453 | $linkTarget, $this->siteConfig |
454 | )->getPrefixedText(); |
455 | } |
456 | } |