Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
81.03% |
205 / 253 |
|
50.00% |
7 / 14 |
CRAP | |
0.00% |
0 / 1 |
DataAccess | |
81.03% |
205 / 253 |
|
50.00% |
7 / 14 |
109.39 | |
0.00% |
0 / 1 |
getCache | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
setCache | |
42.86% |
3 / 7 |
|
0.00% |
0 / 1 |
4.68 | |||
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getPageInfo | |
95.12% |
39 / 41 |
|
0.00% |
0 / 1 |
12 | |||
getFileInfo | |
65.28% |
47 / 72 |
|
0.00% |
0 / 1 |
45.15 | |||
stripProto | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
4 | |||
mergeMetadata | |
50.00% |
13 / 26 |
|
0.00% |
0 / 1 |
26.12 | |||
parseWikitext | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
3 | |||
preprocessWikitext | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
3 | |||
fetchTemplateSource | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
3.00 | |||
fetchTemplateData | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
logLinterData | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
toPrefixedText | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addTrackingCategory | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
3 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Config\Api; |
6 | |
7 | use Wikimedia\Parsoid\Config\Api\SiteConfig as ApiSiteConfig; |
8 | use Wikimedia\Parsoid\Config\DataAccess as IDataAccess; |
9 | use Wikimedia\Parsoid\Config\PageConfig; |
10 | use Wikimedia\Parsoid\Config\PageContent; |
11 | use Wikimedia\Parsoid\Config\SiteConfig as ISiteConfig; |
12 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
13 | use Wikimedia\Parsoid\Core\ContentMetadataCollectorStringSets as CMCSS; |
14 | use Wikimedia\Parsoid\Core\LinkTarget; |
15 | use Wikimedia\Parsoid\Mocks\MockPageContent; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\Title; |
18 | use Wikimedia\Parsoid\Utils\TitleValue; |
19 | |
20 | /** |
21 | * DataAccess via MediaWiki's Action API |
22 | * |
23 | * Note this is intended for testing, not performance. |
24 | */ |
25 | class DataAccess extends IDataAccess { |
26 | |
27 | /** @var ApiHelper */ |
28 | private $api; |
29 | |
30 | /** |
31 | * @var bool Should we strip the protocol from returned URLs? |
32 | * Generally this should be true, since the protocol of the API |
33 | * request doesn't necessarily match the protocol of article |
34 | * access; ie, we could be using https to access the API but emit |
35 | * article content which can be read with http. But for running |
36 | * parserTests, we need to include the protocol in order to match |
37 | * the parserTest configuration in core. |
38 | */ |
39 | private $stripProto; |
40 | |
41 | /** |
42 | * @name Caching |
43 | * @todo Someone should librarize MediaWiki core's MapCacheLRU so we can |
44 | * pull it in via composer and use it here. |
45 | * @{ |
46 | */ |
47 | |
48 | private const MAX_CACHE_LEN = 100; |
49 | |
50 | /** |
51 | * @var array |
52 | */ |
53 | private $cache = []; |
54 | |
55 | private ISiteConfig $siteConfig; |
56 | |
57 | /** |
58 | * Get from cache |
59 | * @param string $key |
60 | * @return mixed |
61 | */ |
62 | private function getCache( string $key ) { |
63 | if ( isset( $this->cache[$key] ) ) { |
64 | $ret = $this->cache[$key]; |
65 | // The LRU cache uses position in the array to indicate recency, so |
66 | // move the accessed key to the end. |
67 | unset( $this->cache[$key] ); |
68 | $this->cache[$key] = $ret; |
69 | return $ret; |
70 | } |
71 | return null; |
72 | } |
73 | |
74 | /** |
75 | * Set a value into cache |
76 | * @param string $key |
77 | * @param mixed $value Not null. |
78 | */ |
79 | private function setCache( string $key, $value ): void { |
80 | if ( isset( $this->cache[$key] ) ) { |
81 | // The LRU cache uses position in the array to indicate recency, so |
82 | // remove the old entry so the new version goes at the end. |
83 | unset( $this->cache[$key] ); |
84 | } elseif ( count( $this->cache ) >= self::MAX_CACHE_LEN ) { |
85 | reset( $this->cache ); |
86 | $evictKey = key( $this->cache ); |
87 | unset( $this->cache[$evictKey] ); |
88 | } |
89 | $this->cache[$key] = $value; |
90 | } |
91 | |
92 | /** @} */ |
93 | |
94 | /** |
95 | * @param ApiHelper $api |
96 | * @param ISiteConfig $siteConfig |
97 | * @param array $opts |
98 | */ |
99 | public function __construct( ApiHelper $api, ISiteConfig $siteConfig, array $opts ) { |
100 | $this->api = $api; |
101 | $this->siteConfig = $siteConfig; |
102 | $this->stripProto = $opts['stripProto'] ?? true; |
103 | } |
104 | |
105 | /** @inheritDoc */ |
106 | public function getPageInfo( $pageConfigOrTitle, array $titles ): array { |
107 | $contextTitle = $pageConfigOrTitle instanceof PageConfig ? |
108 | $pageConfigOrTitle->getLinkTarget() : $pageConfigOrTitle; |
109 | |
110 | if ( !$titles ) { |
111 | return []; |
112 | } |
113 | |
114 | $ret = []; |
115 | $pageConfigTitle = $this->toPrefixedText( $contextTitle ); |
116 | foreach ( array_chunk( $titles, 50 ) as $batch ) { |
117 | $data = $this->api->makeRequest( [ |
118 | 'action' => 'query', |
119 | 'prop' => 'info', |
120 | 'inprop' => 'linkclasses', |
121 | 'inlinkcontext' => $pageConfigTitle, |
122 | 'titles' => implode( '|', $batch ), |
123 | ] )['query']; |
124 | $norm = []; |
125 | if ( isset( $data['normalized'] ) ) { |
126 | foreach ( $data['normalized'] as $n ) { |
127 | $from = $n['from']; |
128 | if ( $n['fromencoded'] ) { |
129 | $from = rawurldecode( $from ); |
130 | } |
131 | $norm[$from] = $n['to']; |
132 | } |
133 | } |
134 | $pages = []; |
135 | foreach ( $data['pages'] as $p ) { |
136 | $pages[$p['title']] = $p; |
137 | } |
138 | foreach ( $batch as $title ) { |
139 | $ttitle = $title; |
140 | while ( isset( $norm[$ttitle] ) ) { |
141 | $ttitle = $norm[$ttitle]; |
142 | } |
143 | $page = $pages[$ttitle] ?? []; |
144 | $ret[$title] = [ |
145 | 'pageId' => $page['pageid'] ?? null, |
146 | 'revId' => $page['lastrevid'] ?? null, |
147 | 'missing' => $page['missing'] ?? false, |
148 | 'known' => ( $page['known'] ?? false ), |
149 | 'redirect' => $page['redirect'] ?? false, |
150 | 'linkclasses' => $page['linkclasses'] ?? [], |
151 | 'invalid' => $page['invalid'] ?? false, |
152 | ]; |
153 | if ( !( $ret[$title]['missing'] || $ret[$title]['invalid'] ) ) { |
154 | $ret[$title]['known'] = true; |
155 | } |
156 | } |
157 | } |
158 | |
159 | return $ret; |
160 | } |
161 | |
162 | /** @inheritDoc */ |
163 | public function getFileInfo( PageConfig $pageConfig, array $files ): array { |
164 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
165 | $sc = $this->siteConfig; |
166 | if ( $sc instanceof ApiSiteConfig && $sc->hasVideoInfo() ) { |
167 | $prefix = "vi"; |
168 | $propName = "videoinfo"; |
169 | } else { |
170 | $prefix = "ii"; |
171 | $propName = "imageinfo"; |
172 | } |
173 | $apiArgs2 = [ |
174 | 'action' => 'query', |
175 | 'format' => 'json', |
176 | 'formatversion' => 2, |
177 | 'rawcontinue' => 1, |
178 | 'prop' => $propName, |
179 | "{$prefix}badfilecontexttitle" => $pageConfigTitle, |
180 | "{$prefix}prop" => implode( '|', [ |
181 | 'mediatype', 'mime', 'size', 'url', 'badfile', 'sha1', 'timestamp' |
182 | ] ) |
183 | ]; |
184 | if ( $prefix === 'vi' ) { |
185 | $apiArgs2["viprop"] .= '|derivatives|timedtext'; |
186 | } |
187 | $ret = []; |
188 | foreach ( $files as $file ) { |
189 | $apiArgs = $apiArgs2; // Copy since we modify it |
190 | $name = $file[0]; |
191 | $dims = $file[1]; |
192 | |
193 | $imgNS = $sc->namespaceName( $sc->canonicalNamespaceId( 'file' ) ); |
194 | $apiArgs['titles'] = "$imgNS:$name"; |
195 | $needsWidth = isset( $dims['page'] ) || isset( $dims['lang'] ); |
196 | if ( isset( $dims['width'] ) ) { |
197 | $apiArgs["{$prefix}urlwidth"] = $dims['width']; |
198 | if ( $needsWidth ) { |
199 | if ( isset( $dims['page'] ) ) { // PDF |
200 | $apiArgs["{$prefix}urlparam"] = "page{$dims['page']}-{$dims['width']}px"; |
201 | } elseif ( isset( $dims['lang'] ) ) { // SVG |
202 | $apiArgs["{$prefix}urlparam"] = "lang{$dims['lang']}-{$dims['width']}px"; |
203 | } |
204 | $needsWidth = false; |
205 | } |
206 | } |
207 | if ( isset( $dims['height'] ) ) { |
208 | $apiArgs["{$prefix}urlheight"] = $dims['height']; |
209 | } |
210 | if ( isset( $dims['seek'] ) ) { |
211 | $apiArgs["{$prefix}urlparam"] = "seek={$dims['seek']}"; |
212 | } |
213 | |
214 | do { |
215 | $data = $this->api->makeRequest( $apiArgs ); |
216 | // Expect exactly 1 row |
217 | $fileinfo = $data['query']['pages'][0][$propName][0]; |
218 | // Corner case: if page is set, the core ImageInfo API doesn't |
219 | // respect it *unless* width is set as well. So repeat the |
220 | // request if necessary. |
221 | if ( isset( $fileinfo['pagecount'] ) && !isset( $dims['page'] ) ) { |
222 | $dims['page'] = 1; # also ensures we won't get here again |
223 | $needsWidth = true; |
224 | } |
225 | if ( $needsWidth && !isset( $fileinfo['filemissing'] ) ) { |
226 | $needsWidth = false; # ensure we won't get here again |
227 | $width = $fileinfo['width']; |
228 | $apiArgs["{$prefix}urlwidth"] = $width; |
229 | if ( isset( $dims['page'] ) ) { // PDF |
230 | $apiArgs["{$prefix}urlparam"] = "page{$dims['page']}-{$width}px"; |
231 | } elseif ( isset( $dims['lang'] ) ) { // SVG |
232 | $apiArgs["{$prefix}urlparam"] = "lang{$dims['lang']}-{$width}px"; |
233 | } |
234 | continue; |
235 | } |
236 | break; |
237 | } while ( true ); |
238 | |
239 | if ( isset( $fileinfo['filemissing'] ) ) { |
240 | $fileinfo = null; |
241 | } else { |
242 | $fileinfo['badFile'] = $data['query']['pages'][0]['badfile']; |
243 | $this->stripProto( $fileinfo, 'url' ); |
244 | $this->stripProto( $fileinfo, 'thumburl' ); |
245 | $this->stripProto( $fileinfo, 'descriptionurl' ); |
246 | $this->stripProto( $fileinfo, 'descriptionshorturl' ); |
247 | foreach ( $fileinfo['responsiveUrls'] ?? [] as $density => $url ) { |
248 | $this->stripProto( $fileinfo['responsiveUrls'], (string)$density ); |
249 | } |
250 | if ( $prefix === 'vi' ) { |
251 | foreach ( $fileinfo['thumbdata']['derivatives'] ?? [] as $j => $d ) { |
252 | $this->stripProto( $fileinfo['thumbdata']['derivatives'][$j], 'src' ); |
253 | } |
254 | foreach ( $fileinfo['thumbdata']['timedtext'] ?? [] as $j => $d ) { |
255 | $this->stripProto( $fileinfo['thumbdata']['timedtext'][$j], 'src' ); |
256 | } |
257 | } |
258 | } |
259 | $ret[] = $fileinfo; |
260 | } |
261 | return $ret; |
262 | } |
263 | |
264 | /** |
265 | * Convert the given URL into protocol-relative form. |
266 | * |
267 | * @param ?array &$obj |
268 | * @param string $key |
269 | */ |
270 | private function stripProto( ?array &$obj, string $key ): void { |
271 | if ( $obj !== null && !empty( $obj[$key] ) && $this->stripProto ) { |
272 | $obj[$key] = preg_replace( '#^https?://#', '//', $obj[$key] ); |
273 | } |
274 | } |
275 | |
276 | /** |
277 | * Transfer the metadata returned in an API result into our |
278 | * ContentMetadataCollector. |
279 | * @param array $data |
280 | * @param ContentMetadataCollector $metadata |
281 | */ |
282 | private function mergeMetadata( array $data, ContentMetadataCollector $metadata ): void { |
283 | foreach ( ( $data['categories'] ?? [] ) as $c ) { |
284 | $tv = TitleValue::tryNew( |
285 | 14, // NS_CATEGORY, |
286 | $c['category'] |
287 | ); |
288 | $metadata->addCategory( $tv, $c['sortkey'] ); |
289 | } |
290 | $metadata->appendOutputStrings( CMCSS::MODULE, $data['modules'] ?? [] ); |
291 | $metadata->appendOutputStrings( CMCSS::MODULE_STYLE, $data['modulestyles'] ?? [] ); |
292 | foreach ( ( $data['jsconfigvars'] ?? [] ) as $key => $value ) { |
293 | $strategy = 'write-once'; |
294 | if ( is_array( $value ) ) { |
295 | // Strategy value will be exposed by change |
296 | // I974d9ecfb4ca8b22361d25c4c70fc5e55c39d5ed in core. |
297 | $strategy = $value['_mw-strategy'] ?? 'write-once'; |
298 | unset( $value['_mw-strategy'] ); |
299 | } |
300 | if ( $strategy === 'union' ) { |
301 | foreach ( $value as $item => $ignore ) { |
302 | $metadata->appendJsConfigVar( $key, $item ); |
303 | } |
304 | } else { |
305 | $metadata->setJsConfigVar( $key, $value ); |
306 | } |
307 | } |
308 | foreach ( ( $data['externallinks'] ?? [] ) as $url ) { |
309 | $metadata->addExternalLink( $url ); |
310 | } |
311 | foreach ( ( $data['properties'] ?? [] ) as $name => $value ) { |
312 | if ( is_string( $value ) ) { |
313 | $metadata->setUnsortedPageProperty( $name, $value ); |
314 | } elseif ( is_numeric( $value ) ) { |
315 | $metadata->setNumericPageProperty( $name, $value ); |
316 | } elseif ( is_bool( $value ) ) { |
317 | // Deprecated back-compat |
318 | $metadata->setNumericPageProperty( $name, (int)$value ); |
319 | } else { |
320 | // Non-scalar values deprecatedin 1.42; drop them. |
321 | } |
322 | } |
323 | } |
324 | |
325 | /** @inheritDoc */ |
326 | public function parseWikitext( |
327 | PageConfig $pageConfig, |
328 | ContentMetadataCollector $metadata, |
329 | string $wikitext |
330 | ): string { |
331 | $revid = $pageConfig->getRevisionId(); |
332 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
333 | $key = implode( ':', [ 'parse', md5( $pageConfigTitle ), md5( $wikitext ), $revid ] ); |
334 | $data = $this->getCache( $key ); |
335 | if ( $data === null ) { |
336 | $params = [ |
337 | 'action' => 'parse', |
338 | 'title' => $pageConfigTitle, |
339 | 'text' => $wikitext, |
340 | 'contentmodel' => 'wikitext', |
341 | 'prop' => 'text|modules|jsconfigvars|categories|properties|externallinks', |
342 | 'disablelimitreport' => 1, |
343 | 'wrapoutputclass' => '', |
344 | 'showstrategykeys' => 1, |
345 | ]; |
346 | if ( $revid !== null ) { |
347 | $params['revid'] = $revid; |
348 | } |
349 | $data = $this->api->makeRequest( $params )['parse']; |
350 | $this->setCache( $key, $data ); |
351 | } |
352 | $this->mergeMetadata( $data, $metadata ); |
353 | return $data['text']; # HTML |
354 | } |
355 | |
356 | /** @inheritDoc */ |
357 | public function preprocessWikitext( |
358 | PageConfig $pageConfig, |
359 | ContentMetadataCollector $metadata, |
360 | string $wikitext |
361 | ): string { |
362 | $revid = $pageConfig->getRevisionId(); |
363 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
364 | $key = implode( ':', [ 'preprocess', md5( $pageConfigTitle ), md5( $wikitext ), $revid ] ); |
365 | $data = $this->getCache( $key ); |
366 | if ( $data === null ) { |
367 | $params = [ |
368 | 'action' => 'expandtemplates', |
369 | 'title' => $pageConfigTitle, |
370 | 'text' => $wikitext, |
371 | 'prop' => 'wikitext|modules|jsconfigvars|categories|properties', |
372 | 'showstrategykeys' => 1, |
373 | ]; |
374 | if ( $revid !== null ) { |
375 | $params['revid'] = $revid; |
376 | } |
377 | $data = $this->api->makeRequest( $params )['expandtemplates']; |
378 | $this->setCache( $key, $data ); |
379 | } |
380 | |
381 | $this->mergeMetadata( $data, $metadata ); |
382 | |
383 | return $data['wikitext']; |
384 | } |
385 | |
386 | /** @inheritDoc */ |
387 | public function fetchTemplateSource( |
388 | PageConfig $pageConfig, LinkTarget $title |
389 | ): ?PageContent { |
390 | $title = $this->toPrefixedText( $title ); |
391 | $key = implode( ':', [ 'content', md5( $title ) ] ); |
392 | $ret = $this->getCache( $key ); |
393 | if ( $ret === null ) { |
394 | $params = [ |
395 | 'action' => 'query', |
396 | 'prop' => 'revisions', |
397 | 'rvprop' => 'content', |
398 | 'rvslots' => '*', |
399 | 'titles' => $title, |
400 | 'rvlimit' => 1, |
401 | ]; |
402 | |
403 | $data = $this->api->makeRequest( $params ); |
404 | $pageData = $data['query']['pages'][0]; |
405 | if ( isset( $pageData['missing'] ) ) { |
406 | return null; |
407 | } else { |
408 | $ret = $pageData['revisions'][0]['slots']; |
409 | // PORT-FIXME set the redirect field if needed |
410 | $this->setCache( $key, $ret ); |
411 | } |
412 | } |
413 | return new MockPageContent( $ret ); |
414 | } |
415 | |
416 | /** @inheritDoc */ |
417 | public function fetchTemplateData( PageConfig $pageConfig, LinkTarget $title ): ?array { |
418 | $title = $this->toPrefixedText( $title ); |
419 | $key = implode( ':', [ 'templatedata', md5( $title ) ] ); |
420 | $ret = $this->getCache( $key ); |
421 | if ( $ret === null ) { |
422 | $data = $this->api->makeRequest( [ |
423 | 'action' => 'templatedata', |
424 | 'includeMissingTitles' => 1, |
425 | 'titles' => $title, |
426 | 'redirects' => 1, |
427 | ] )['pages']; |
428 | $ret = reset( $data ); |
429 | $this->setCache( $key, $ret ); |
430 | } |
431 | return $ret; |
432 | } |
433 | |
434 | /** @inheritDoc */ |
435 | public function logLinterData( PageConfig $pageConfig, array $lints ): void { |
436 | foreach ( $lints as $l ) { |
437 | error_log( PHPUtils::jsonEncode( $l ) ); |
438 | } |
439 | } |
440 | |
441 | /** |
442 | * Helper to turn a LinkTarget object into the "prefixed text" title form |
443 | * expected by the MediaWiki action API. |
444 | * @param LinkTarget $linkTarget |
445 | * @return string The title, as prefixed text |
446 | */ |
447 | private function toPrefixedText( LinkTarget $linkTarget ): string { |
448 | return Title::newFromLinkTarget( |
449 | $linkTarget, $this->siteConfig |
450 | )->getPrefixedText(); |
451 | } |
452 | |
453 | /** @inheritDoc */ |
454 | public function addTrackingCategory( |
455 | PageConfig $pageConfig, |
456 | ContentMetadataCollector $metadata, |
457 | string $key |
458 | ): void { |
459 | $pageConfigTitle = $this->toPrefixedText( $pageConfig->getLinkTarget() ); |
460 | $cacheKey = implode( ':', [ 'allmessages', md5( $pageConfigTitle ), md5( $key ) ] ); |
461 | $data = $this->getCache( $cacheKey ); |
462 | if ( $data === null ) { |
463 | $params = [ |
464 | 'action' => 'query', |
465 | 'meta' => 'allmessages', |
466 | 'amtitle' => $pageConfigTitle, |
467 | 'ammessages' => $key, |
468 | 'amenableparser' => 1, |
469 | ]; |
470 | $data = $this->api->makeRequest( $params )['query']['allmessages'][0]; |
471 | $this->setCache( $cacheKey, $data ); |
472 | } |
473 | if ( isset( $data['missing'] ) ) { |
474 | return; |
475 | } |
476 | $tv = TitleValue::tryNew( |
477 | 14, // NS_CATEGORY, |
478 | $data['content'] |
479 | ); |
480 | $metadata->addCategory( $tv ); |
481 | } |
482 | } |