Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.24% |
195 / 221 |
|
42.86% |
6 / 14 |
CRAP | |
0.00% |
0 / 1 |
ApiDetailRetriever | |
88.24% |
195 / 221 |
|
42.86% |
6 / 14 |
64.67 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
sendApiRequest | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
2.31 | |||
getImportDetails | |
98.21% |
55 / 56 |
|
0.00% |
0 / 1 |
8 | |||
reduceTitleList | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
3.58 | |||
getMoreRevisions | |
79.31% |
23 / 29 |
|
0.00% |
0 / 1 |
9.72 | |||
checkRevisionCount | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
checkMaxRevisionAggregatedBytes | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getFileRevisionsFromImageInfo | |
65.00% |
13 / 20 |
|
0.00% |
0 / 1 |
10.74 | |||
getTextRevisionsFromRevisionsInfo | |
64.29% |
9 / 14 |
|
0.00% |
0 / 1 |
6.14 | |||
getBaseParams | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
addTextRevisionsToParams | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
3 | |||
addFileRevisionsToParams | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
3 | |||
addTemplatesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
addCategoriesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use FileImporter\Data\FileRevision; |
6 | use FileImporter\Data\FileRevisions; |
7 | use FileImporter\Data\ImportDetails; |
8 | use FileImporter\Data\SourceUrl; |
9 | use FileImporter\Data\TextRevision; |
10 | use FileImporter\Data\TextRevisions; |
11 | use FileImporter\Exceptions\HttpRequestException; |
12 | use FileImporter\Exceptions\ImportException; |
13 | use FileImporter\Exceptions\LocalizedImportException; |
14 | use FileImporter\Interfaces\DetailRetriever; |
15 | use FileImporter\Services\Http\HttpRequestExecutor; |
16 | use MediaWiki\Config\ConfigException; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\Revision\SlotRecord; |
19 | use MediaWiki\Title\TitleValue; |
20 | use Psr\Log\LoggerInterface; |
21 | use Psr\Log\NullLogger; |
22 | |
23 | /** |
24 | * @license GPL-2.0-or-later |
25 | * @author Addshore |
26 | */ |
27 | class ApiDetailRetriever implements DetailRetriever { |
28 | use MediaWikiSourceUrlParser; |
29 | |
30 | private HttpApiLookup $httpApiLookup; |
31 | private HttpRequestExecutor $httpRequestExecutor; |
32 | /** @var int */ |
33 | private $maxBytes; |
34 | private LoggerInterface $logger; |
35 | /** |
36 | * @var string Placeholder name replacing usernames that have been suppressed as part of |
37 | * a steward action on the source site. |
38 | */ |
39 | private $suppressedUsername; |
40 | /** @var int */ |
41 | private $maxRevisions; |
42 | /** @var int */ |
43 | private $maxAggregatedBytes; |
44 | |
45 | private const API_RESULT_LIMIT = 500; |
46 | private const MAX_REVISIONS = 100; |
47 | private const MAX_AGGREGATED_BYTES = 250000000; |
48 | |
49 | /** |
50 | * @param HttpApiLookup $httpApiLookup |
51 | * @param HttpRequestExecutor $httpRequestExecutor |
52 | * @param int $maxBytes |
53 | * @param LoggerInterface|null $logger |
54 | * |
55 | * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid |
56 | */ |
57 | public function __construct( |
58 | HttpApiLookup $httpApiLookup, |
59 | HttpRequestExecutor $httpRequestExecutor, |
60 | $maxBytes, |
61 | LoggerInterface $logger = null |
62 | ) { |
63 | $this->httpApiLookup = $httpApiLookup; |
64 | $this->httpRequestExecutor = $httpRequestExecutor; |
65 | $this->maxBytes = $maxBytes; |
66 | $this->logger = $logger ?? new NullLogger(); |
67 | |
68 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
69 | |
70 | $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' ); |
71 | $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' ); |
72 | $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' ); |
73 | if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) { |
74 | throw new ConfigException( |
75 | 'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' . |
76 | $this->suppressedUsername . '"' |
77 | ); |
78 | } |
79 | } |
80 | |
81 | /** |
82 | * @return array[] |
83 | * @throws ImportException when the request failed |
84 | */ |
85 | private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) { |
86 | $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl ); |
87 | |
88 | try { |
89 | $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
90 | } catch ( HttpRequestException $e ) { |
91 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
92 | $apiUrl ], $e ); |
93 | } |
94 | $requestData = json_decode( $imageInfoRequest->getContent(), true ); |
95 | return $requestData; |
96 | } |
97 | |
98 | /** |
99 | * @throws ImportException e.g. when the file couldn't be found |
100 | */ |
101 | public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails { |
102 | $params = $this->getBaseParams( $sourceUrl ); |
103 | $params = $this->addFileRevisionsToParams( $params ); |
104 | $params = $this->addTextRevisionsToParams( $params ); |
105 | $params = $this->addTemplatesToParams( $params ); |
106 | $params = $this->addCategoriesToParams( $params ); |
107 | |
108 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
109 | |
110 | if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) { |
111 | $this->logger->warning( |
112 | 'No pages returned by the API', |
113 | [ |
114 | 'sourceUrl' => $sourceUrl->getUrl(), |
115 | 'apiParameters' => $params, |
116 | ] |
117 | ); |
118 | throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' ); |
119 | } |
120 | |
121 | /** @var array $pageInfoData */ |
122 | $pageInfoData = end( $requestData['query']['pages'] ); |
123 | '@phan-var array $pageInfoData'; |
124 | |
125 | if ( array_key_exists( 'missing', $pageInfoData ) ) { |
126 | if ( |
127 | array_key_exists( 'imagerepository', $pageInfoData ) && |
128 | $pageInfoData['imagerepository'] == 'shared' |
129 | ) { |
130 | throw new LocalizedImportException( |
131 | [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ] |
132 | ); |
133 | } |
134 | throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' ); |
135 | } |
136 | |
137 | if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) { |
138 | $this->logger->warning( |
139 | 'Bad image or revision info returned by the API', |
140 | [ |
141 | 'sourceUrl' => $sourceUrl->getUrl(), |
142 | 'apiParameters' => $params, |
143 | ] |
144 | ); |
145 | throw new LocalizedImportException( 'fileimporter-api-badinfo' ); |
146 | } |
147 | |
148 | // FIXME: Isn't this misplaced here, *before* more revisions are fetched? |
149 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
150 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
151 | |
152 | while ( array_key_exists( 'continue', $requestData ) ) { |
153 | $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData ); |
154 | } |
155 | |
156 | $pageTitle = $pageInfoData['title']; |
157 | $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null; |
158 | |
159 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
160 | $imageInfoData = $pageInfoData['imageinfo']; |
161 | $revisionsData = $pageInfoData['revisions']; |
162 | $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle ); |
163 | $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle ); |
164 | $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE ); |
165 | $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY ); |
166 | |
167 | $splitTitle = explode( ':', $pageInfoData['title'] ); |
168 | $titleAfterColon = end( $splitTitle ); |
169 | |
170 | $importDetails = new ImportDetails( |
171 | $sourceUrl, |
172 | new TitleValue( NS_FILE, $titleAfterColon ), |
173 | $textRevisions, |
174 | $fileRevisions |
175 | ); |
176 | // FIXME: Better use constructor parameters instead of setters? |
177 | $importDetails->setPageLanguage( $pageLanguage ); |
178 | $importDetails->setTemplates( $templates ); |
179 | $importDetails->setCategories( $categories ); |
180 | |
181 | return $importDetails; |
182 | } |
183 | |
184 | /** |
185 | * @param array[] $results Result set as returned by the API |
186 | * @param int $namespace |
187 | * |
188 | * @return string[] |
189 | */ |
190 | private function reduceTitleList( array $results, int $namespace ): array { |
191 | $titles = []; |
192 | foreach ( $results as $result ) { |
193 | if ( $result['ns'] === $namespace ) { |
194 | $titles[] = $result['title']; |
195 | } |
196 | } |
197 | return $titles; |
198 | } |
199 | |
200 | /** |
201 | * Fetches the next set of revisions unless the number of revisions |
202 | * exceeds the max revisions limit |
203 | * |
204 | * @param SourceUrl $sourceUrl |
205 | * @param array[] &$requestData |
206 | * @param array[] &$pageInfoData |
207 | * |
208 | * @throws ImportException |
209 | */ |
210 | private function getMoreRevisions( |
211 | SourceUrl $sourceUrl, |
212 | array &$requestData, |
213 | array &$pageInfoData |
214 | ): void { |
215 | $rvContinue = $requestData['continue']['rvcontinue'] ?? null; |
216 | $iiStart = $requestData['continue']['iistart'] ?? null; |
217 | $tlContinue = $requestData['continue']['tlcontinue'] ?? null; |
218 | $clContinue = $requestData['continue']['clcontinue'] ?? null; |
219 | |
220 | $params = $this->getBaseParams( $sourceUrl ); |
221 | |
222 | if ( $iiStart ) { |
223 | $params = $this->addFileRevisionsToParams( $params, $iiStart ); |
224 | } |
225 | |
226 | if ( $rvContinue ) { |
227 | $params = $this->addTextRevisionsToParams( $params, $rvContinue ); |
228 | } |
229 | |
230 | if ( $tlContinue ) { |
231 | $params = $this->addTemplatesToParams( $params, $tlContinue ); |
232 | } |
233 | |
234 | if ( $clContinue ) { |
235 | $params = $this->addCategoriesToParams( $params, $clContinue ); |
236 | } |
237 | |
238 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
239 | |
240 | $newPageInfoData = end( $requestData['query']['pages'] ); |
241 | |
242 | if ( array_key_exists( 'revisions', $newPageInfoData ) ) { |
243 | $pageInfoData['revisions'] = |
244 | array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] ); |
245 | } |
246 | |
247 | if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) { |
248 | $pageInfoData['imageinfo'] = |
249 | array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] ); |
250 | } |
251 | |
252 | if ( array_key_exists( 'templates', $newPageInfoData ) ) { |
253 | $pageInfoData['templates'] = |
254 | array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] ); |
255 | } |
256 | |
257 | if ( array_key_exists( 'categories', $newPageInfoData ) ) { |
258 | $pageInfoData['categories'] = |
259 | array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] ); |
260 | } |
261 | |
262 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
263 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
264 | } |
265 | |
266 | /** |
267 | * Throws an exception if the number of revisions to be imported exceeds |
268 | * the maximum revision limit |
269 | * |
270 | * @param SourceUrl $sourceUrl |
271 | * @param array[] $pageInfoData |
272 | * |
273 | * @throws ImportException when exceeding the acceptable maximum |
274 | */ |
275 | private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void { |
276 | if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions || |
277 | count( $pageInfoData['imageinfo'] ) > $this->maxRevisions || |
278 | count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS || |
279 | count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) { |
280 | $this->logger->warning( |
281 | 'Too many revisions were being fetched', |
282 | [ |
283 | 'sourceUrl' => $sourceUrl->getUrl(), |
284 | ] |
285 | ); |
286 | |
287 | throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' ); |
288 | } |
289 | } |
290 | |
291 | /** |
292 | * @param array[] $pageInfoData |
293 | * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData |
294 | * |
295 | * @throws ImportException when exceeding the maximum file size |
296 | */ |
297 | private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void { |
298 | $aggregatedFileBytes = 0; |
299 | foreach ( $pageInfoData['imageinfo'] as $fileVersion ) { |
300 | $aggregatedFileBytes += $fileVersion['size'] ?? 0; |
301 | if ( $aggregatedFileBytes > $this->maxAggregatedBytes || |
302 | $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) { |
303 | $versions = count( $pageInfoData['imageinfo'] ); |
304 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
305 | } |
306 | } |
307 | } |
308 | |
309 | /** |
310 | * @param array[] $imageInfo |
311 | * @param string $pageTitle |
312 | * |
313 | * @throws ImportException when the file is not acceptable, e.g. hidden or to big |
314 | */ |
315 | private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions { |
316 | $revisions = []; |
317 | foreach ( $imageInfo as $revisionInfo ) { |
318 | if ( array_key_exists( 'filehidden', $revisionInfo ) ) { |
319 | throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' ); |
320 | } |
321 | |
322 | if ( array_key_exists( 'filemissing', $revisionInfo ) ) { |
323 | throw new LocalizedImportException( 'fileimporter-filemissinginrevision' ); |
324 | } |
325 | |
326 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
327 | $revisionInfo['user'] ??= $this->suppressedUsername; |
328 | } |
329 | |
330 | if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) { |
331 | $versions = count( $imageInfo ); |
332 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
333 | } |
334 | |
335 | if ( isset( $revisionInfo['sha1'] ) ) { |
336 | // Convert from API sha1 format to DB sha1 format. The conversion can be se inside |
337 | // ApiQueryImageInfo. |
338 | // * API sha1 format is base 16 padded to 40 chars |
339 | // * DB sha1 format is base 36 padded to 31 chars |
340 | $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 ); |
341 | } |
342 | |
343 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
344 | $revisionInfo['comment'] ??= |
345 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
346 | } |
347 | |
348 | $revisionInfo['name'] = $pageTitle; |
349 | $revisionInfo['description'] = $revisionInfo['comment'] ?? null; |
350 | |
351 | $revisions[] = new FileRevision( $revisionInfo ); |
352 | } |
353 | return new FileRevisions( $revisions ); |
354 | } |
355 | |
356 | /** |
357 | * @param array[] $revisionsInfo |
358 | * @param string $pageTitle |
359 | */ |
360 | private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions { |
361 | $revisions = []; |
362 | foreach ( $revisionsInfo as $revisionInfo ) { |
363 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
364 | $revisionInfo['user'] ??= $this->suppressedUsername; |
365 | } |
366 | |
367 | if ( array_key_exists( 'texthidden', $revisionInfo ) ) { |
368 | $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??= |
369 | wfMessage( 'fileimporter-revision-removed-text' )->plain(); |
370 | } |
371 | |
372 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
373 | $revisionInfo['comment'] ??= |
374 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
375 | } |
376 | |
377 | $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo ); |
378 | $revisionInfo['title'] = $pageTitle; |
379 | $revisions[] = new TextRevision( $revisionInfo ); |
380 | } |
381 | return new TextRevisions( $revisions ); |
382 | } |
383 | |
384 | private function getBaseParams( SourceUrl $sourceUrl ): array { |
385 | return [ |
386 | 'action' => 'query', |
387 | 'errorformat' => 'plaintext', |
388 | 'format' => 'json', |
389 | 'formatversion' => '2', |
390 | 'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ), |
391 | 'prop' => 'info' |
392 | ]; |
393 | } |
394 | |
395 | /** |
396 | * Adds to params base the properties for getting Text Revisions |
397 | */ |
398 | private function addTextRevisionsToParams( array $params, string $rvContinue = null ): array { |
399 | $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions'; |
400 | |
401 | if ( $rvContinue ) { |
402 | $params['rvcontinue'] = $rvContinue; |
403 | } |
404 | |
405 | return $params + [ |
406 | 'rvlimit' => static::API_RESULT_LIMIT, |
407 | 'rvdir' => 'newer', |
408 | 'rvslots' => SlotRecord::MAIN, |
409 | 'rvprop' => implode( |
410 | '|', |
411 | [ |
412 | 'flags', |
413 | 'timestamp', |
414 | 'user', |
415 | 'sha1', |
416 | 'contentmodel', |
417 | 'comment', |
418 | 'content', |
419 | 'tags', |
420 | ] |
421 | ) |
422 | ]; |
423 | } |
424 | |
425 | /** |
426 | * Adds to params base the properties for getting File Revisions |
427 | */ |
428 | private function addFileRevisionsToParams( array $params, string $iiStart = null ): array { |
429 | $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo'; |
430 | |
431 | if ( $iiStart ) { |
432 | $params['iistart'] = $iiStart; |
433 | } |
434 | |
435 | return $params + [ |
436 | 'iilimit' => static::API_RESULT_LIMIT, |
437 | 'iiurlwidth' => 800, |
438 | 'iiurlheight' => 400, |
439 | 'iiprop' => implode( |
440 | '|', |
441 | [ |
442 | 'timestamp', |
443 | 'user', |
444 | 'userid', |
445 | 'comment', |
446 | 'canonicaltitle', |
447 | 'url', |
448 | 'size', |
449 | 'sha1', |
450 | 'archivename', |
451 | ] |
452 | ) |
453 | ]; |
454 | } |
455 | |
456 | /** |
457 | * Adds to params base the properties for getting Templates |
458 | */ |
459 | private function addTemplatesToParams( array $params, string $tlContinue = null ): array { |
460 | $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates'; |
461 | |
462 | if ( $tlContinue ) { |
463 | $params['tlcontinue'] = $tlContinue; |
464 | } |
465 | |
466 | return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ]; |
467 | } |
468 | |
469 | /** |
470 | * Adds to params base the properties for getting Categories |
471 | */ |
472 | private function addCategoriesToParams( array $params, string $clContinue = null ): array { |
473 | $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories'; |
474 | |
475 | if ( $clContinue ) { |
476 | $params['clcontinue'] = $clContinue; |
477 | } |
478 | |
479 | return $params + [ 'cllimit' => static::API_RESULT_LIMIT ]; |
480 | } |
481 | |
482 | } |