Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.55% |
201 / 227 |
|
42.86% |
6 / 14 |
CRAP | |
0.00% |
0 / 1 |
ApiDetailRetriever | |
88.55% |
201 / 227 |
|
42.86% |
6 / 14 |
61.88 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
sendApiRequest | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
2.31 | |||
getImportDetails | |
98.21% |
55 / 56 |
|
0.00% |
0 / 1 |
8 | |||
reduceTitleList | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
1.01 | |||
getMoreRevisions | |
79.31% |
23 / 29 |
|
0.00% |
0 / 1 |
9.72 | |||
checkRevisionCount | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
checkMaxRevisionAggregatedBytes | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getFileRevisionsFromImageInfo | |
65.00% |
13 / 20 |
|
0.00% |
0 / 1 |
10.74 | |||
getTextRevisionsFromRevisionsInfo | |
64.29% |
9 / 14 |
|
0.00% |
0 / 1 |
6.14 | |||
getBaseParams | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
addTextRevisionsToParams | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
3 | |||
addFileRevisionsToParams | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
3 | |||
addTemplatesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
addCategoriesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use FileImporter\Data\FileRevision; |
6 | use FileImporter\Data\FileRevisions; |
7 | use FileImporter\Data\ImportDetails; |
8 | use FileImporter\Data\SourceUrl; |
9 | use FileImporter\Data\TextRevision; |
10 | use FileImporter\Data\TextRevisions; |
11 | use FileImporter\Exceptions\HttpRequestException; |
12 | use FileImporter\Exceptions\ImportException; |
13 | use FileImporter\Exceptions\LocalizedImportException; |
14 | use FileImporter\Interfaces\DetailRetriever; |
15 | use FileImporter\Services\Http\HttpRequestExecutor; |
16 | use MediaWiki\Config\ConfigException; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\Revision\SlotRecord; |
19 | use MediaWiki\Title\TitleValue; |
20 | use Psr\Log\LoggerInterface; |
21 | use Psr\Log\NullLogger; |
22 | |
23 | /** |
24 | * @license GPL-2.0-or-later |
25 | * @author Addshore |
26 | */ |
27 | class ApiDetailRetriever implements DetailRetriever { |
28 | use MediaWikiSourceUrlParser; |
29 | |
30 | /** @var HttpApiLookup */ |
31 | private $httpApiLookup; |
32 | /** @var HttpRequestExecutor */ |
33 | private $httpRequestExecutor; |
34 | /** @var int */ |
35 | private $maxBytes; |
36 | /** @var LoggerInterface */ |
37 | private $logger; |
38 | /** |
39 | * @var string Placeholder name replacing usernames that have been suppressed as part of |
40 | * a steward action on the source site. |
41 | */ |
42 | private $suppressedUsername; |
43 | /** @var int */ |
44 | private $maxRevisions; |
45 | /** @var int */ |
46 | private $maxAggregatedBytes; |
47 | |
48 | private const API_RESULT_LIMIT = 500; |
49 | private const MAX_REVISIONS = 100; |
50 | private const MAX_AGGREGATED_BYTES = 250000000; |
51 | |
52 | /** |
53 | * @param HttpApiLookup $httpApiLookup |
54 | * @param HttpRequestExecutor $httpRequestExecutor |
55 | * @param int $maxBytes |
56 | * @param LoggerInterface|null $logger |
57 | * |
58 | * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid |
59 | */ |
60 | public function __construct( |
61 | HttpApiLookup $httpApiLookup, |
62 | HttpRequestExecutor $httpRequestExecutor, |
63 | $maxBytes, |
64 | LoggerInterface $logger = null |
65 | ) { |
66 | $this->httpApiLookup = $httpApiLookup; |
67 | $this->httpRequestExecutor = $httpRequestExecutor; |
68 | $this->maxBytes = $maxBytes; |
69 | $this->logger = $logger ?? new NullLogger(); |
70 | |
71 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
72 | |
73 | $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' ); |
74 | $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' ); |
75 | $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' ); |
76 | if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) { |
77 | throw new ConfigException( |
78 | 'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' . |
79 | $this->suppressedUsername . '"' |
80 | ); |
81 | } |
82 | } |
83 | |
84 | /** |
85 | * @return array[] |
86 | * @throws ImportException when the request failed |
87 | */ |
88 | private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) { |
89 | $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl ); |
90 | |
91 | try { |
92 | $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
93 | } catch ( HttpRequestException $e ) { |
94 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
95 | $apiUrl ], $e ); |
96 | } |
97 | $requestData = json_decode( $imageInfoRequest->getContent(), true ); |
98 | return $requestData; |
99 | } |
100 | |
101 | /** |
102 | * @throws ImportException e.g. when the file couldn't be found |
103 | */ |
104 | public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails { |
105 | $params = $this->getBaseParams( $sourceUrl ); |
106 | $params = $this->addFileRevisionsToParams( $params ); |
107 | $params = $this->addTextRevisionsToParams( $params ); |
108 | $params = $this->addTemplatesToParams( $params ); |
109 | $params = $this->addCategoriesToParams( $params ); |
110 | |
111 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
112 | |
113 | if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) { |
114 | $this->logger->warning( |
115 | 'No pages returned by the API', |
116 | [ |
117 | 'sourceUrl' => $sourceUrl->getUrl(), |
118 | 'apiParameters' => $params, |
119 | ] |
120 | ); |
121 | throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' ); |
122 | } |
123 | |
124 | /** @var array $pageInfoData */ |
125 | $pageInfoData = end( $requestData['query']['pages'] ); |
126 | '@phan-var array $pageInfoData'; |
127 | |
128 | if ( array_key_exists( 'missing', $pageInfoData ) ) { |
129 | if ( |
130 | array_key_exists( 'imagerepository', $pageInfoData ) && |
131 | $pageInfoData['imagerepository'] == 'shared' |
132 | ) { |
133 | throw new LocalizedImportException( |
134 | [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ] |
135 | ); |
136 | } |
137 | throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' ); |
138 | } |
139 | |
140 | if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) { |
141 | $this->logger->warning( |
142 | 'Bad image or revision info returned by the API', |
143 | [ |
144 | 'sourceUrl' => $sourceUrl->getUrl(), |
145 | 'apiParameters' => $params, |
146 | ] |
147 | ); |
148 | throw new LocalizedImportException( 'fileimporter-api-badinfo' ); |
149 | } |
150 | |
151 | // FIXME: Isn't this misplaced here, *before* more revisions are fetched? |
152 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
153 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
154 | |
155 | while ( array_key_exists( 'continue', $requestData ) ) { |
156 | $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData ); |
157 | } |
158 | |
159 | $pageTitle = $pageInfoData['title']; |
160 | $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null; |
161 | |
162 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
163 | $imageInfoData = $pageInfoData['imageinfo']; |
164 | $revisionsData = $pageInfoData['revisions']; |
165 | $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle ); |
166 | $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle ); |
167 | $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE ); |
168 | $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY ); |
169 | |
170 | $splitTitle = explode( ':', $pageInfoData['title'] ); |
171 | $titleAfterColon = end( $splitTitle ); |
172 | |
173 | $importDetails = new ImportDetails( |
174 | $sourceUrl, |
175 | new TitleValue( NS_FILE, $titleAfterColon ), |
176 | $textRevisions, |
177 | $fileRevisions |
178 | ); |
179 | // FIXME: Better use constructor parameters instead of setters? |
180 | $importDetails->setPageLanguage( $pageLanguage ); |
181 | $importDetails->setTemplates( $templates ); |
182 | $importDetails->setCategories( $categories ); |
183 | |
184 | return $importDetails; |
185 | } |
186 | |
187 | /** |
188 | * @param array[] $titles |
189 | * @param int $namespace |
190 | * |
191 | * @return string[] |
192 | */ |
193 | private function reduceTitleList( array $titles, int $namespace ): array { |
194 | return array_map( |
195 | static function ( array $title ): string { |
196 | return $title['title']; |
197 | }, |
198 | array_filter( |
199 | $titles, |
200 | static function ( array $title ) use ( $namespace ): bool { |
201 | return $title['ns'] === $namespace; |
202 | } |
203 | ) |
204 | ); |
205 | } |
206 | |
207 | /** |
208 | * Fetches the next set of revisions unless the number of revisions |
209 | * exceeds the max revisions limit |
210 | * |
211 | * @param SourceUrl $sourceUrl |
212 | * @param array[] &$requestData |
213 | * @param array[] &$pageInfoData |
214 | * |
215 | * @throws ImportException |
216 | */ |
217 | private function getMoreRevisions( |
218 | SourceUrl $sourceUrl, |
219 | array &$requestData, |
220 | array &$pageInfoData |
221 | ): void { |
222 | $rvContinue = $requestData['continue']['rvcontinue'] ?? null; |
223 | $iiStart = $requestData['continue']['iistart'] ?? null; |
224 | $tlContinue = $requestData['continue']['tlcontinue'] ?? null; |
225 | $clContinue = $requestData['continue']['clcontinue'] ?? null; |
226 | |
227 | $params = $this->getBaseParams( $sourceUrl ); |
228 | |
229 | if ( $iiStart ) { |
230 | $params = $this->addFileRevisionsToParams( $params, $iiStart ); |
231 | } |
232 | |
233 | if ( $rvContinue ) { |
234 | $params = $this->addTextRevisionsToParams( $params, $rvContinue ); |
235 | } |
236 | |
237 | if ( $tlContinue ) { |
238 | $params = $this->addTemplatesToParams( $params, $tlContinue ); |
239 | } |
240 | |
241 | if ( $clContinue ) { |
242 | $params = $this->addCategoriesToParams( $params, $clContinue ); |
243 | } |
244 | |
245 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
246 | |
247 | $newPageInfoData = end( $requestData['query']['pages'] ); |
248 | |
249 | if ( array_key_exists( 'revisions', $newPageInfoData ) ) { |
250 | $pageInfoData['revisions'] = |
251 | array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] ); |
252 | } |
253 | |
254 | if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) { |
255 | $pageInfoData['imageinfo'] = |
256 | array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] ); |
257 | } |
258 | |
259 | if ( array_key_exists( 'templates', $newPageInfoData ) ) { |
260 | $pageInfoData['templates'] = |
261 | array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] ); |
262 | } |
263 | |
264 | if ( array_key_exists( 'categories', $newPageInfoData ) ) { |
265 | $pageInfoData['categories'] = |
266 | array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] ); |
267 | } |
268 | |
269 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
270 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
271 | } |
272 | |
273 | /** |
274 | * Throws an exception if the number of revisions to be imported exceeds |
275 | * the maximum revision limit |
276 | * |
277 | * @param SourceUrl $sourceUrl |
278 | * @param array[] $pageInfoData |
279 | * |
280 | * @throws ImportException when exceeding the acceptable maximum |
281 | */ |
282 | private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void { |
283 | if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions || |
284 | count( $pageInfoData['imageinfo'] ) > $this->maxRevisions || |
285 | count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS || |
286 | count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) { |
287 | $this->logger->warning( |
288 | 'Too many revisions were being fetched', |
289 | [ |
290 | 'sourceUrl' => $sourceUrl->getUrl(), |
291 | ] |
292 | ); |
293 | |
294 | throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' ); |
295 | } |
296 | } |
297 | |
298 | /** |
299 | * @param array[] $pageInfoData |
300 | * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData |
301 | * |
302 | * @throws ImportException when exceeding the maximum file size |
303 | */ |
304 | private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void { |
305 | $aggregatedFileBytes = 0; |
306 | foreach ( $pageInfoData['imageinfo'] as $fileVersion ) { |
307 | $aggregatedFileBytes += $fileVersion['size'] ?? 0; |
308 | if ( $aggregatedFileBytes > $this->maxAggregatedBytes || |
309 | $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) { |
310 | $versions = count( $pageInfoData['imageinfo'] ); |
311 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
312 | } |
313 | } |
314 | } |
315 | |
316 | /** |
317 | * @param array[] $imageInfo |
318 | * @param string $pageTitle |
319 | * |
320 | * @throws ImportException when the file is not acceptable, e.g. hidden or to big |
321 | */ |
322 | private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions { |
323 | $revisions = []; |
324 | foreach ( $imageInfo as $revisionInfo ) { |
325 | if ( array_key_exists( 'filehidden', $revisionInfo ) ) { |
326 | throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' ); |
327 | } |
328 | |
329 | if ( array_key_exists( 'filemissing', $revisionInfo ) ) { |
330 | throw new LocalizedImportException( 'fileimporter-filemissinginrevision' ); |
331 | } |
332 | |
333 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
334 | $revisionInfo['user'] ??= $this->suppressedUsername; |
335 | } |
336 | |
337 | if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) { |
338 | $versions = count( $imageInfo ); |
339 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
340 | } |
341 | |
342 | if ( isset( $revisionInfo['sha1'] ) ) { |
343 | // Convert from API sha1 format to DB sha1 format. The conversion can be se inside |
344 | // ApiQueryImageInfo. |
345 | // * API sha1 format is base 16 padded to 40 chars |
346 | // * DB sha1 format is base 36 padded to 31 chars |
347 | $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 ); |
348 | } |
349 | |
350 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
351 | $revisionInfo['comment'] ??= |
352 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
353 | } |
354 | |
355 | $revisionInfo['name'] = $pageTitle; |
356 | $revisionInfo['description'] = $revisionInfo['comment'] ?? null; |
357 | |
358 | $revisions[] = new FileRevision( $revisionInfo ); |
359 | } |
360 | return new FileRevisions( $revisions ); |
361 | } |
362 | |
363 | /** |
364 | * @param array[] $revisionsInfo |
365 | * @param string $pageTitle |
366 | */ |
367 | private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions { |
368 | $revisions = []; |
369 | foreach ( $revisionsInfo as $revisionInfo ) { |
370 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
371 | $revisionInfo['user'] ??= $this->suppressedUsername; |
372 | } |
373 | |
374 | if ( array_key_exists( 'texthidden', $revisionInfo ) ) { |
375 | $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??= |
376 | wfMessage( 'fileimporter-revision-removed-text' )->plain(); |
377 | } |
378 | |
379 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
380 | $revisionInfo['comment'] ??= |
381 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
382 | } |
383 | |
384 | $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo ); |
385 | $revisionInfo['title'] = $pageTitle; |
386 | $revisions[] = new TextRevision( $revisionInfo ); |
387 | } |
388 | return new TextRevisions( $revisions ); |
389 | } |
390 | |
391 | private function getBaseParams( SourceUrl $sourceUrl ): array { |
392 | return [ |
393 | 'action' => 'query', |
394 | 'errorformat' => 'plaintext', |
395 | 'format' => 'json', |
396 | 'formatversion' => '2', |
397 | 'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ), |
398 | 'prop' => 'info' |
399 | ]; |
400 | } |
401 | |
402 | /** |
403 | * Adds to params base the properties for getting Text Revisions |
404 | */ |
405 | private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array { |
406 | $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions'; |
407 | |
408 | if ( $rvContinue ) { |
409 | $params['rvcontinue'] = $rvContinue; |
410 | } |
411 | |
412 | return $params + [ |
413 | 'rvlimit' => static::API_RESULT_LIMIT, |
414 | 'rvdir' => 'newer', |
415 | 'rvslots' => SlotRecord::MAIN, |
416 | 'rvprop' => implode( |
417 | '|', |
418 | [ |
419 | 'flags', |
420 | 'timestamp', |
421 | 'user', |
422 | 'sha1', |
423 | 'contentmodel', |
424 | 'comment', |
425 | 'content', |
426 | 'tags', |
427 | ] |
428 | ) |
429 | ]; |
430 | } |
431 | |
432 | /** |
433 | * Adds to params base the properties for getting File Revisions |
434 | */ |
435 | private function addFileRevisionsToParams( array $params, string $iiStart = null ): array { |
436 | $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo'; |
437 | |
438 | if ( $iiStart ) { |
439 | $params['iistart'] = $iiStart; |
440 | } |
441 | |
442 | return $params + [ |
443 | 'iilimit' => static::API_RESULT_LIMIT, |
444 | 'iiurlwidth' => 800, |
445 | 'iiurlheight' => 400, |
446 | 'iiprop' => implode( |
447 | '|', |
448 | [ |
449 | 'timestamp', |
450 | 'user', |
451 | 'userid', |
452 | 'comment', |
453 | 'canonicaltitle', |
454 | 'url', |
455 | 'size', |
456 | 'sha1', |
457 | 'archivename', |
458 | ] |
459 | ) |
460 | ]; |
461 | } |
462 | |
463 | /** |
464 | * Adds to params base the properties for getting Templates |
465 | */ |
466 | private function addTemplatesToParams( array $params, string $tlContinue = null ): array { |
467 | $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates'; |
468 | |
469 | if ( $tlContinue ) { |
470 | $params['tlcontinue'] = $tlContinue; |
471 | } |
472 | |
473 | return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ]; |
474 | } |
475 | |
476 | /** |
477 | * Adds to params base the properties for getting Categories |
478 | */ |
479 | private function addCategoriesToParams( array $params, string $clContinue = null ): array { |
480 | $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories'; |
481 | |
482 | if ( $clContinue ) { |
483 | $params['clcontinue'] = $clContinue; |
484 | } |
485 | |
486 | return $params + [ 'cllimit' => static::API_RESULT_LIMIT ]; |
487 | } |
488 | |
489 | } |