Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
87.77% |
201 / 229 |
|
42.86% |
6 / 14 |
CRAP | |
0.00% |
0 / 1 |
ApiDetailRetriever | |
87.77% |
201 / 229 |
|
42.86% |
6 / 14 |
65.36 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
sendApiRequest | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
2.31 | |||
getImportDetails | |
98.21% |
55 / 56 |
|
0.00% |
0 / 1 |
8 | |||
reduceTitleList | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
1.01 | |||
getMoreRevisions | |
79.31% |
23 / 29 |
|
0.00% |
0 / 1 |
9.72 | |||
checkRevisionCount | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
checkMaxRevisionAggregatedBytes | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getFileRevisionsFromImageInfo | |
65.00% |
13 / 20 |
|
0.00% |
0 / 1 |
10.74 | |||
getTextRevisionsFromRevisionsInfo | |
61.11% |
11 / 18 |
|
0.00% |
0 / 1 |
9.88 | |||
getBaseParams | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
addTextRevisionsToParams | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
3 | |||
addFileRevisionsToParams | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
3 | |||
addTemplatesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
addCategoriesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use ConfigException; |
6 | use FileImporter\Data\FileRevision; |
7 | use FileImporter\Data\FileRevisions; |
8 | use FileImporter\Data\ImportDetails; |
9 | use FileImporter\Data\SourceUrl; |
10 | use FileImporter\Data\TextRevision; |
11 | use FileImporter\Data\TextRevisions; |
12 | use FileImporter\Exceptions\HttpRequestException; |
13 | use FileImporter\Exceptions\ImportException; |
14 | use FileImporter\Exceptions\LocalizedImportException; |
15 | use FileImporter\Interfaces\DetailRetriever; |
16 | use FileImporter\Services\Http\HttpRequestExecutor; |
17 | use MediaWiki\MediaWikiServices; |
18 | use Psr\Log\LoggerInterface; |
19 | use Psr\Log\NullLogger; |
20 | use TitleValue; |
21 | |
22 | /** |
23 | * @license GPL-2.0-or-later |
24 | * @author Addshore |
25 | */ |
26 | class ApiDetailRetriever implements DetailRetriever { |
27 | use MediaWikiSourceUrlParser; |
28 | |
29 | /** @var HttpApiLookup */ |
30 | private $httpApiLookup; |
31 | /** @var HttpRequestExecutor */ |
32 | private $httpRequestExecutor; |
33 | /** @var int */ |
34 | private $maxBytes; |
35 | /** @var LoggerInterface */ |
36 | private $logger; |
37 | /** |
38 | * @var string Placeholder name replacing usernames that have been suppressed as part of |
39 | * a steward action on the source site. |
40 | */ |
41 | private $suppressedUsername; |
42 | /** @var int */ |
43 | private $maxRevisions; |
44 | /** @var int */ |
45 | private $maxAggregatedBytes; |
46 | |
47 | private const API_RESULT_LIMIT = 500; |
48 | private const MAX_REVISIONS = 100; |
49 | private const MAX_AGGREGATED_BYTES = 250000000; |
50 | |
51 | /** |
52 | * @param HttpApiLookup $httpApiLookup |
53 | * @param HttpRequestExecutor $httpRequestExecutor |
54 | * @param int $maxBytes |
55 | * @param LoggerInterface|null $logger |
56 | * |
57 | * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid |
58 | */ |
59 | public function __construct( |
60 | HttpApiLookup $httpApiLookup, |
61 | HttpRequestExecutor $httpRequestExecutor, |
62 | $maxBytes, |
63 | LoggerInterface $logger = null |
64 | ) { |
65 | $this->httpApiLookup = $httpApiLookup; |
66 | $this->httpRequestExecutor = $httpRequestExecutor; |
67 | $this->maxBytes = $maxBytes; |
68 | $this->logger = $logger ?? new NullLogger(); |
69 | |
70 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
71 | |
72 | $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' ); |
73 | $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' ); |
74 | $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' ); |
75 | if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) { |
76 | throw new ConfigException( |
77 | 'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' . |
78 | $this->suppressedUsername . '"' |
79 | ); |
80 | } |
81 | } |
82 | |
83 | /** |
84 | * @param SourceUrl $sourceUrl |
85 | * @param array $apiParameters |
86 | * |
87 | * @return array[] |
88 | * @throws ImportException when the request failed |
89 | */ |
90 | private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) { |
91 | $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl ); |
92 | |
93 | try { |
94 | $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
95 | } catch ( HttpRequestException $e ) { |
96 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
97 | $apiUrl ], $e ); |
98 | } |
99 | $requestData = json_decode( $imageInfoRequest->getContent(), true ); |
100 | return $requestData; |
101 | } |
102 | |
103 | /** |
104 | * @param SourceUrl $sourceUrl |
105 | * |
106 | * @return ImportDetails |
107 | * @throws ImportException e.g. when the file couldn't be found |
108 | */ |
109 | public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails { |
110 | $params = $this->getBaseParams( $sourceUrl ); |
111 | $params = $this->addFileRevisionsToParams( $params ); |
112 | $params = $this->addTextRevisionsToParams( $params ); |
113 | $params = $this->addTemplatesToParams( $params ); |
114 | $params = $this->addCategoriesToParams( $params ); |
115 | |
116 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
117 | |
118 | if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) { |
119 | $this->logger->warning( |
120 | 'No pages returned by the API', |
121 | [ |
122 | 'sourceUrl' => $sourceUrl->getUrl(), |
123 | 'apiParameters' => $params, |
124 | ] |
125 | ); |
126 | throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' ); |
127 | } |
128 | |
129 | /** @var array $pageInfoData */ |
130 | $pageInfoData = end( $requestData['query']['pages'] ); |
131 | '@phan-var array $pageInfoData'; |
132 | |
133 | if ( array_key_exists( 'missing', $pageInfoData ) ) { |
134 | if ( |
135 | array_key_exists( 'imagerepository', $pageInfoData ) && |
136 | $pageInfoData['imagerepository'] == 'shared' |
137 | ) { |
138 | throw new LocalizedImportException( |
139 | [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ] |
140 | ); |
141 | } |
142 | throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' ); |
143 | } |
144 | |
145 | if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) { |
146 | $this->logger->warning( |
147 | 'Bad image or revision info returned by the API', |
148 | [ |
149 | 'sourceUrl' => $sourceUrl->getUrl(), |
150 | 'apiParameters' => $params, |
151 | ] |
152 | ); |
153 | throw new LocalizedImportException( 'fileimporter-api-badinfo' ); |
154 | } |
155 | |
156 | // FIXME: Isn't this misplaced here, *before* more revisions are fetched? |
157 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
158 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
159 | |
160 | while ( array_key_exists( 'continue', $requestData ) ) { |
161 | $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData ); |
162 | } |
163 | |
164 | $pageTitle = $pageInfoData['title']; |
165 | $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null; |
166 | |
167 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
168 | $imageInfoData = $pageInfoData['imageinfo']; |
169 | $revisionsData = $pageInfoData['revisions']; |
170 | $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle ); |
171 | $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle ); |
172 | $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE ); |
173 | $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY ); |
174 | |
175 | $splitTitle = explode( ':', $pageInfoData['title'] ); |
176 | $titleAfterColon = end( $splitTitle ); |
177 | |
178 | $importDetails = new ImportDetails( |
179 | $sourceUrl, |
180 | new TitleValue( NS_FILE, $titleAfterColon ), |
181 | $textRevisions, |
182 | $fileRevisions |
183 | ); |
184 | // FIXME: Better use constructor parameters instead of setters? |
185 | $importDetails->setPageLanguage( $pageLanguage ); |
186 | $importDetails->setTemplates( $templates ); |
187 | $importDetails->setCategories( $categories ); |
188 | |
189 | return $importDetails; |
190 | } |
191 | |
192 | /** |
193 | * @param array[] $titles |
194 | * @param int $namespace |
195 | * |
196 | * @return string[] |
197 | */ |
198 | private function reduceTitleList( array $titles, int $namespace ): array { |
199 | return array_map( |
200 | static function ( array $title ): string { |
201 | return $title['title']; |
202 | }, |
203 | array_filter( |
204 | $titles, |
205 | static function ( array $title ) use ( $namespace ): bool { |
206 | return $title['ns'] === $namespace; |
207 | } |
208 | ) |
209 | ); |
210 | } |
211 | |
212 | /** |
213 | * Fetches the next set of revisions unless the number of revisions |
214 | * exceeds the max revisions limit |
215 | * |
216 | * @param SourceUrl $sourceUrl |
217 | * @param array[] &$requestData |
218 | * @param array[] &$pageInfoData |
219 | * |
220 | * @throws ImportException |
221 | */ |
222 | private function getMoreRevisions( |
223 | SourceUrl $sourceUrl, |
224 | array &$requestData, |
225 | array &$pageInfoData |
226 | ): void { |
227 | $rvContinue = $requestData['continue']['rvcontinue'] ?? null; |
228 | $iiStart = $requestData['continue']['iistart'] ?? null; |
229 | $tlContinue = $requestData['continue']['tlcontinue'] ?? null; |
230 | $clContinue = $requestData['continue']['clcontinue'] ?? null; |
231 | |
232 | $params = $this->getBaseParams( $sourceUrl ); |
233 | |
234 | if ( $iiStart ) { |
235 | $params = $this->addFileRevisionsToParams( $params, $iiStart ); |
236 | } |
237 | |
238 | if ( $rvContinue ) { |
239 | $params = $this->addTextRevisionsToParams( $params, $rvContinue ); |
240 | } |
241 | |
242 | if ( $tlContinue ) { |
243 | $params = $this->addTemplatesToParams( $params, $tlContinue ); |
244 | } |
245 | |
246 | if ( $clContinue ) { |
247 | $params = $this->addCategoriesToParams( $params, $clContinue ); |
248 | } |
249 | |
250 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
251 | |
252 | $newPageInfoData = end( $requestData['query']['pages'] ); |
253 | |
254 | if ( array_key_exists( 'revisions', $newPageInfoData ) ) { |
255 | $pageInfoData['revisions'] = |
256 | array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] ); |
257 | } |
258 | |
259 | if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) { |
260 | $pageInfoData['imageinfo'] = |
261 | array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] ); |
262 | } |
263 | |
264 | if ( array_key_exists( 'templates', $newPageInfoData ) ) { |
265 | $pageInfoData['templates'] = |
266 | array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] ); |
267 | } |
268 | |
269 | if ( array_key_exists( 'categories', $newPageInfoData ) ) { |
270 | $pageInfoData['categories'] = |
271 | array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] ); |
272 | } |
273 | |
274 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
275 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
276 | } |
277 | |
278 | /** |
279 | * Throws an exception if the number of revisions to be imported exceeds |
280 | * the maximum revision limit |
281 | * |
282 | * @param SourceUrl $sourceUrl |
283 | * @param array[] $pageInfoData |
284 | * |
285 | * @throws ImportException when exceeding the acceptable maximum |
286 | */ |
287 | private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void { |
288 | if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions || |
289 | count( $pageInfoData['imageinfo'] ) > $this->maxRevisions || |
290 | count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS || |
291 | count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) { |
292 | $this->logger->warning( |
293 | 'Too many revisions were being fetched', |
294 | [ |
295 | 'sourceUrl' => $sourceUrl->getUrl(), |
296 | ] |
297 | ); |
298 | |
299 | throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' ); |
300 | } |
301 | } |
302 | |
303 | /** |
304 | * @param array[] $pageInfoData |
305 | * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData |
306 | * |
307 | * @throws ImportException when exceeding the maximum file size |
308 | */ |
309 | private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void { |
310 | $aggregatedFileBytes = 0; |
311 | foreach ( $pageInfoData['imageinfo'] as $fileVersion ) { |
312 | $aggregatedFileBytes += $fileVersion['size'] ?? 0; |
313 | if ( $aggregatedFileBytes > $this->maxAggregatedBytes || |
314 | $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) { |
315 | $versions = count( $pageInfoData['imageinfo'] ); |
316 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
317 | } |
318 | } |
319 | } |
320 | |
321 | /** |
322 | * @param array[] $imageInfo |
323 | * @param string $pageTitle |
324 | * |
325 | * @return FileRevisions |
326 | * @throws ImportException when the file is not acceptable, e.g. hidden or to big |
327 | */ |
328 | private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions { |
329 | $revisions = []; |
330 | foreach ( $imageInfo as $revisionInfo ) { |
331 | if ( array_key_exists( 'filehidden', $revisionInfo ) ) { |
332 | throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' ); |
333 | } |
334 | |
335 | if ( array_key_exists( 'filemissing', $revisionInfo ) ) { |
336 | throw new LocalizedImportException( 'fileimporter-filemissinginrevision' ); |
337 | } |
338 | |
339 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
340 | $revisionInfo['user'] = $this->suppressedUsername; |
341 | } |
342 | |
343 | if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) { |
344 | $versions = count( $imageInfo ); |
345 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
346 | } |
347 | |
348 | if ( isset( $revisionInfo['sha1'] ) ) { |
349 | // Convert from API sha1 format to DB sha1 format. The conversion can be se inside |
350 | // ApiQueryImageInfo. |
351 | // * API sha1 format is base 16 padded to 40 chars |
352 | // * DB sha1 format is base 36 padded to 31 chars |
353 | $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 ); |
354 | } |
355 | |
356 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
357 | $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' ) |
358 | ->plain(); |
359 | } |
360 | |
361 | $revisionInfo['name'] = $pageTitle; |
362 | $revisionInfo['description'] = $revisionInfo['comment'] ?? null; |
363 | |
364 | $revisions[] = new FileRevision( $revisionInfo ); |
365 | } |
366 | return new FileRevisions( $revisions ); |
367 | } |
368 | |
369 | /** |
370 | * @param array[] $revisionsInfo |
371 | * @param string $pageTitle |
372 | * |
373 | * @return TextRevisions |
374 | */ |
375 | private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions { |
376 | $revisions = []; |
377 | foreach ( $revisionsInfo as $revisionInfo ) { |
378 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
379 | $revisionInfo['user'] = $this->suppressedUsername; |
380 | } |
381 | |
382 | if ( array_key_exists( 'texthidden', $revisionInfo ) ) { |
383 | $revisionInfo['*'] = wfMessage( 'fileimporter-revision-removed-text' ) |
384 | ->plain(); |
385 | } |
386 | |
387 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
388 | $revisionInfo['comment'] = wfMessage( 'fileimporter-revision-removed-comment' ) |
389 | ->plain(); |
390 | } |
391 | |
392 | if ( !array_key_exists( 'contentmodel', $revisionInfo ) ) { |
393 | $revisionInfo['contentmodel'] = CONTENT_MODEL_WIKITEXT; |
394 | } |
395 | |
396 | if ( !array_key_exists( 'contentformat', $revisionInfo ) ) { |
397 | $revisionInfo['contentformat'] = CONTENT_FORMAT_WIKITEXT; |
398 | } |
399 | |
400 | $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo ); |
401 | $revisionInfo['title'] = $pageTitle; |
402 | $revisions[] = new TextRevision( $revisionInfo ); |
403 | } |
404 | return new TextRevisions( $revisions ); |
405 | } |
406 | |
407 | /** |
408 | * @param SourceUrl $sourceUrl |
409 | * @return string[] |
410 | */ |
411 | private function getBaseParams( SourceUrl $sourceUrl ): array { |
412 | return [ |
413 | 'action' => 'query', |
414 | 'errorformat' => 'plaintext', |
415 | 'format' => 'json', |
416 | 'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ), |
417 | 'prop' => 'info' |
418 | ]; |
419 | } |
420 | |
421 | /** |
422 | * Adds to params base the properties for getting Text Revisions |
423 | * |
424 | * @param array $params |
425 | * @param string|null $rvContinue |
426 | * |
427 | * @return array |
428 | */ |
429 | private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array { |
430 | $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions'; |
431 | |
432 | if ( $rvContinue ) { |
433 | $params['rvcontinue'] = $rvContinue; |
434 | } |
435 | |
436 | return $params + [ |
437 | 'rvlimit' => static::API_RESULT_LIMIT, |
438 | 'rvdir' => 'newer', |
439 | 'rvprop' => implode( |
440 | '|', |
441 | [ |
442 | 'flags', |
443 | 'timestamp', |
444 | 'user', |
445 | 'sha1', |
446 | 'contentmodel', |
447 | 'comment', |
448 | 'content', |
449 | 'tags', |
450 | ] |
451 | ) |
452 | ]; |
453 | } |
454 | |
455 | /** |
456 | * Adds to params base the properties for getting File Revisions |
457 | * |
458 | * @param array $params |
459 | * @param string|null $iiStart |
460 | * |
461 | * @return array |
462 | */ |
463 | private function addFileRevisionsToParams( array $params, string $iiStart = null ): array { |
464 | $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo'; |
465 | |
466 | if ( $iiStart ) { |
467 | $params['iistart'] = $iiStart; |
468 | } |
469 | |
470 | return $params + [ |
471 | 'iilimit' => static::API_RESULT_LIMIT, |
472 | 'iiurlwidth' => 800, |
473 | 'iiurlheight' => 400, |
474 | 'iiprop' => implode( |
475 | '|', |
476 | [ |
477 | 'timestamp', |
478 | 'user', |
479 | 'userid', |
480 | 'comment', |
481 | 'canonicaltitle', |
482 | 'url', |
483 | 'size', |
484 | 'sha1', |
485 | 'archivename', |
486 | ] |
487 | ) |
488 | ]; |
489 | } |
490 | |
491 | /** |
492 | * Adds to params base the properties for getting Templates |
493 | * |
494 | * @param array $params |
495 | * @param string|null $tlContinue |
496 | * |
497 | * @return array |
498 | */ |
499 | private function addTemplatesToParams( array $params, string $tlContinue = null ): array { |
500 | $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates'; |
501 | |
502 | if ( $tlContinue ) { |
503 | $params['tlcontinue'] = $tlContinue; |
504 | } |
505 | |
506 | return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ]; |
507 | } |
508 | |
509 | /** |
510 | * Adds to params base the properties for getting Categories |
511 | * |
512 | * @param array $params |
513 | * @param string|null $clContinue |
514 | * |
515 | * @return array |
516 | */ |
517 | private function addCategoriesToParams( array $params, string $clContinue = null ): array { |
518 | $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories'; |
519 | |
520 | if ( $clContinue ) { |
521 | $params['clcontinue'] = $clContinue; |
522 | } |
523 | |
524 | return $params + [ 'cllimit' => static::API_RESULT_LIMIT ]; |
525 | } |
526 | |
527 | } |