Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.24% |
195 / 221 |
|
42.86% |
6 / 14 |
CRAP | |
0.00% |
0 / 1 |
ApiDetailRetriever | |
88.24% |
195 / 221 |
|
42.86% |
6 / 14 |
64.67 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
sendApiRequest | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
2.31 | |||
getImportDetails | |
98.21% |
55 / 56 |
|
0.00% |
0 / 1 |
8 | |||
reduceTitleList | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
3.58 | |||
getMoreRevisions | |
79.31% |
23 / 29 |
|
0.00% |
0 / 1 |
9.72 | |||
checkRevisionCount | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
checkMaxRevisionAggregatedBytes | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getFileRevisionsFromImageInfo | |
65.00% |
13 / 20 |
|
0.00% |
0 / 1 |
10.74 | |||
getTextRevisionsFromRevisionsInfo | |
64.29% |
9 / 14 |
|
0.00% |
0 / 1 |
6.14 | |||
getBaseParams | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
addTextRevisionsToParams | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
3 | |||
addFileRevisionsToParams | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
3 | |||
addTemplatesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
addCategoriesToParams | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 |
1 | <?php |
2 | |
3 | namespace FileImporter\Remote\MediaWiki; |
4 | |
5 | use FileImporter\Data\FileRevision; |
6 | use FileImporter\Data\FileRevisions; |
7 | use FileImporter\Data\ImportDetails; |
8 | use FileImporter\Data\SourceUrl; |
9 | use FileImporter\Data\TextRevision; |
10 | use FileImporter\Data\TextRevisions; |
11 | use FileImporter\Exceptions\HttpRequestException; |
12 | use FileImporter\Exceptions\ImportException; |
13 | use FileImporter\Exceptions\LocalizedImportException; |
14 | use FileImporter\Interfaces\DetailRetriever; |
15 | use FileImporter\Services\Http\HttpRequestExecutor; |
16 | use MediaWiki\Config\ConfigException; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\Revision\SlotRecord; |
19 | use MediaWiki\Title\TitleValue; |
20 | use Psr\Log\LoggerInterface; |
21 | use Psr\Log\NullLogger; |
22 | |
23 | /** |
24 | * @license GPL-2.0-or-later |
25 | * @author Addshore |
26 | */ |
27 | class ApiDetailRetriever implements DetailRetriever { |
28 | use MediaWikiSourceUrlParser; |
29 | |
30 | private HttpApiLookup $httpApiLookup; |
31 | private HttpRequestExecutor $httpRequestExecutor; |
32 | private int $maxBytes; |
33 | private LoggerInterface $logger; |
34 | /** |
35 | * @var string Placeholder name replacing usernames that have been suppressed as part of |
36 | * a steward action on the source site. |
37 | */ |
38 | private $suppressedUsername; |
39 | private int $maxRevisions; |
40 | private int $maxAggregatedBytes; |
41 | |
42 | private const API_RESULT_LIMIT = 500; |
43 | private const MAX_REVISIONS = 100; |
44 | private const MAX_AGGREGATED_BYTES = 250000000; |
45 | |
46 | /** |
47 | * @throws ConfigException when $wgFileImporterAccountForSuppressedUsername is invalid |
48 | */ |
49 | public function __construct( |
50 | HttpApiLookup $httpApiLookup, |
51 | HttpRequestExecutor $httpRequestExecutor, |
52 | int $maxBytes, |
53 | ?LoggerInterface $logger = null |
54 | ) { |
55 | $this->httpApiLookup = $httpApiLookup; |
56 | $this->httpRequestExecutor = $httpRequestExecutor; |
57 | $this->maxBytes = $maxBytes; |
58 | $this->logger = $logger ?? new NullLogger(); |
59 | |
60 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
61 | |
62 | $this->maxRevisions = (int)$config->get( 'FileImporterMaxRevisions' ); |
63 | $this->maxAggregatedBytes = (int)$config->get( 'FileImporterMaxAggregatedBytes' ); |
64 | $this->suppressedUsername = $config->get( 'FileImporterAccountForSuppressedUsername' ); |
65 | if ( !MediaWikiServices::getInstance()->getUserNameUtils()->isValid( $this->suppressedUsername ) ) { |
66 | throw new ConfigException( |
67 | 'Invalid username configured in wgFileImporterAccountForSuppressedUsername: "' . |
68 | $this->suppressedUsername . '"' |
69 | ); |
70 | } |
71 | } |
72 | |
73 | /** |
74 | * @return array[] |
75 | * @throws ImportException when the request failed |
76 | */ |
77 | private function sendApiRequest( SourceUrl $sourceUrl, array $apiParameters ) { |
78 | $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl ); |
79 | |
80 | try { |
81 | $imageInfoRequest = $this->httpRequestExecutor->execute( $apiUrl, $apiParameters ); |
82 | } catch ( HttpRequestException $e ) { |
83 | throw new LocalizedImportException( [ 'fileimporter-api-failedtogetinfo', |
84 | $apiUrl ], $e ); |
85 | } |
86 | $requestData = json_decode( $imageInfoRequest->getContent(), true ); |
87 | return $requestData; |
88 | } |
89 | |
90 | /** |
91 | * @throws ImportException e.g. when the file couldn't be found |
92 | */ |
93 | public function getImportDetails( SourceUrl $sourceUrl ): ImportDetails { |
94 | $params = $this->getBaseParams( $sourceUrl ); |
95 | $params = $this->addFileRevisionsToParams( $params ); |
96 | $params = $this->addTextRevisionsToParams( $params ); |
97 | $params = $this->addTemplatesToParams( $params ); |
98 | $params = $this->addCategoriesToParams( $params ); |
99 | |
100 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
101 | |
102 | if ( count( $requestData['query']['pages'] ?? [] ) !== 1 ) { |
103 | $this->logger->warning( |
104 | 'No pages returned by the API', |
105 | [ |
106 | 'sourceUrl' => $sourceUrl->getUrl(), |
107 | 'apiParameters' => $params, |
108 | ] |
109 | ); |
110 | throw new LocalizedImportException( 'fileimporter-api-nopagesreturned' ); |
111 | } |
112 | |
113 | /** @var array $pageInfoData */ |
114 | $pageInfoData = end( $requestData['query']['pages'] ); |
115 | '@phan-var array $pageInfoData'; |
116 | |
117 | if ( array_key_exists( 'missing', $pageInfoData ) ) { |
118 | if ( |
119 | array_key_exists( 'imagerepository', $pageInfoData ) && |
120 | $pageInfoData['imagerepository'] == 'shared' |
121 | ) { |
122 | throw new LocalizedImportException( |
123 | [ 'fileimporter-cantimportfromsharedrepo', $sourceUrl->getHost() ] |
124 | ); |
125 | } |
126 | throw new LocalizedImportException( 'fileimporter-cantimportmissingfile' ); |
127 | } |
128 | |
129 | if ( empty( $pageInfoData['imageinfo'] ) || empty( $pageInfoData['revisions'] ) ) { |
130 | $this->logger->warning( |
131 | 'Bad image or revision info returned by the API', |
132 | [ |
133 | 'sourceUrl' => $sourceUrl->getUrl(), |
134 | 'apiParameters' => $params, |
135 | ] |
136 | ); |
137 | throw new LocalizedImportException( 'fileimporter-api-badinfo' ); |
138 | } |
139 | |
140 | // FIXME: Isn't this misplaced here, *before* more revisions are fetched? |
141 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
142 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
143 | |
144 | while ( array_key_exists( 'continue', $requestData ) ) { |
145 | $this->getMoreRevisions( $sourceUrl, $requestData, $pageInfoData ); |
146 | } |
147 | |
148 | $pageTitle = $pageInfoData['title']; |
149 | $pageLanguage = $pageInfoData['pagelanguagehtmlcode'] ?? null; |
150 | |
151 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset |
152 | $imageInfoData = $pageInfoData['imageinfo']; |
153 | $revisionsData = $pageInfoData['revisions']; |
154 | $fileRevisions = $this->getFileRevisionsFromImageInfo( $imageInfoData, $pageTitle ); |
155 | $textRevisions = $this->getTextRevisionsFromRevisionsInfo( $revisionsData, $pageTitle ); |
156 | $templates = $this->reduceTitleList( $pageInfoData['templates'] ?? [], NS_TEMPLATE ); |
157 | $categories = $this->reduceTitleList( $pageInfoData['categories'] ?? [], NS_CATEGORY ); |
158 | |
159 | $splitTitle = explode( ':', $pageInfoData['title'] ); |
160 | $titleAfterColon = end( $splitTitle ); |
161 | |
162 | $importDetails = new ImportDetails( |
163 | $sourceUrl, |
164 | new TitleValue( NS_FILE, $titleAfterColon ), |
165 | $textRevisions, |
166 | $fileRevisions |
167 | ); |
168 | // FIXME: Better use constructor parameters instead of setters? |
169 | $importDetails->setPageLanguage( $pageLanguage ); |
170 | $importDetails->setTemplates( $templates ); |
171 | $importDetails->setCategories( $categories ); |
172 | |
173 | return $importDetails; |
174 | } |
175 | |
176 | /** |
177 | * @param array[] $results Result set as returned by the API |
178 | * @param int $namespace |
179 | * |
180 | * @return string[] |
181 | */ |
182 | private function reduceTitleList( array $results, int $namespace ): array { |
183 | $titles = []; |
184 | foreach ( $results as $result ) { |
185 | if ( $result['ns'] === $namespace ) { |
186 | $titles[] = $result['title']; |
187 | } |
188 | } |
189 | return $titles; |
190 | } |
191 | |
192 | /** |
193 | * Fetches the next set of revisions unless the number of revisions |
194 | * exceeds the max revisions limit |
195 | * |
196 | * @param SourceUrl $sourceUrl |
197 | * @param array[] &$requestData |
198 | * @param array[] &$pageInfoData |
199 | * |
200 | * @throws ImportException |
201 | */ |
202 | private function getMoreRevisions( |
203 | SourceUrl $sourceUrl, |
204 | array &$requestData, |
205 | array &$pageInfoData |
206 | ): void { |
207 | $rvContinue = $requestData['continue']['rvcontinue'] ?? null; |
208 | $iiStart = $requestData['continue']['iistart'] ?? null; |
209 | $tlContinue = $requestData['continue']['tlcontinue'] ?? null; |
210 | $clContinue = $requestData['continue']['clcontinue'] ?? null; |
211 | |
212 | $params = $this->getBaseParams( $sourceUrl ); |
213 | |
214 | if ( $iiStart ) { |
215 | $params = $this->addFileRevisionsToParams( $params, $iiStart ); |
216 | } |
217 | |
218 | if ( $rvContinue ) { |
219 | $params = $this->addTextRevisionsToParams( $params, $rvContinue ); |
220 | } |
221 | |
222 | if ( $tlContinue ) { |
223 | $params = $this->addTemplatesToParams( $params, $tlContinue ); |
224 | } |
225 | |
226 | if ( $clContinue ) { |
227 | $params = $this->addCategoriesToParams( $params, $clContinue ); |
228 | } |
229 | |
230 | $requestData = $this->sendApiRequest( $sourceUrl, $params ); |
231 | |
232 | $newPageInfoData = end( $requestData['query']['pages'] ); |
233 | |
234 | if ( array_key_exists( 'revisions', $newPageInfoData ) ) { |
235 | $pageInfoData['revisions'] = |
236 | array_merge( $pageInfoData['revisions'], $newPageInfoData['revisions'] ); |
237 | } |
238 | |
239 | if ( array_key_exists( 'imageinfo', $newPageInfoData ) ) { |
240 | $pageInfoData['imageinfo'] = |
241 | array_merge( $pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] ); |
242 | } |
243 | |
244 | if ( array_key_exists( 'templates', $newPageInfoData ) ) { |
245 | $pageInfoData['templates'] = |
246 | array_merge( $pageInfoData['templates'], $newPageInfoData['templates'] ); |
247 | } |
248 | |
249 | if ( array_key_exists( 'categories', $newPageInfoData ) ) { |
250 | $pageInfoData['categories'] = |
251 | array_merge( $pageInfoData['categories'], $newPageInfoData['categories'] ); |
252 | } |
253 | |
254 | $this->checkRevisionCount( $sourceUrl, $pageInfoData ); |
255 | $this->checkMaxRevisionAggregatedBytes( $pageInfoData ); |
256 | } |
257 | |
258 | /** |
259 | * Throws an exception if the number of revisions to be imported exceeds |
260 | * the maximum revision limit |
261 | * |
262 | * @param SourceUrl $sourceUrl |
263 | * @param array[] $pageInfoData |
264 | * |
265 | * @throws ImportException when exceeding the acceptable maximum |
266 | */ |
267 | private function checkRevisionCount( SourceUrl $sourceUrl, array $pageInfoData ): void { |
268 | if ( count( $pageInfoData['revisions'] ) > $this->maxRevisions || |
269 | count( $pageInfoData['imageinfo'] ) > $this->maxRevisions || |
270 | count( $pageInfoData['revisions'] ) > static::MAX_REVISIONS || |
271 | count( $pageInfoData['imageinfo'] ) > static::MAX_REVISIONS ) { |
272 | $this->logger->warning( |
273 | 'Too many revisions were being fetched', |
274 | [ |
275 | 'sourceUrl' => $sourceUrl->getUrl(), |
276 | ] |
277 | ); |
278 | |
279 | throw new LocalizedImportException( 'fileimporter-api-toomanyrevisions' ); |
280 | } |
281 | } |
282 | |
283 | /** |
284 | * @param array[] $pageInfoData |
285 | * @phan-param array{imageinfo:array{size:int}[]} $pageInfoData |
286 | * |
287 | * @throws ImportException when exceeding the maximum file size |
288 | */ |
289 | private function checkMaxRevisionAggregatedBytes( array $pageInfoData ): void { |
290 | $aggregatedFileBytes = 0; |
291 | foreach ( $pageInfoData['imageinfo'] as $fileVersion ) { |
292 | $aggregatedFileBytes += $fileVersion['size'] ?? 0; |
293 | if ( $aggregatedFileBytes > $this->maxAggregatedBytes || |
294 | $aggregatedFileBytes > static::MAX_AGGREGATED_BYTES ) { |
295 | $versions = count( $pageInfoData['imageinfo'] ); |
296 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
297 | } |
298 | } |
299 | } |
300 | |
301 | /** |
302 | * @param array[] $imageInfo |
303 | * @param string $pageTitle |
304 | * |
305 | * @throws ImportException when the file is not acceptable, e.g. hidden or to big |
306 | */ |
307 | private function getFileRevisionsFromImageInfo( array $imageInfo, string $pageTitle ): FileRevisions { |
308 | $revisions = []; |
309 | foreach ( $imageInfo as $revisionInfo ) { |
310 | if ( array_key_exists( 'filehidden', $revisionInfo ) ) { |
311 | throw new LocalizedImportException( 'fileimporter-cantimportfilehidden' ); |
312 | } |
313 | |
314 | if ( array_key_exists( 'filemissing', $revisionInfo ) ) { |
315 | throw new LocalizedImportException( 'fileimporter-filemissinginrevision' ); |
316 | } |
317 | |
318 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
319 | $revisionInfo['user'] ??= $this->suppressedUsername; |
320 | } |
321 | |
322 | if ( ( $revisionInfo['size'] ?? 0 ) > $this->maxBytes ) { |
323 | $versions = count( $imageInfo ); |
324 | throw new LocalizedImportException( [ 'fileimporter-filetoolarge', $versions ] ); |
325 | } |
326 | |
327 | if ( isset( $revisionInfo['sha1'] ) ) { |
328 | // Convert from API sha1 format to DB sha1 format. The conversion can be se inside |
329 | // ApiQueryImageInfo. |
330 | // * API sha1 format is base 16 padded to 40 chars |
331 | // * DB sha1 format is base 36 padded to 31 chars |
332 | $revisionInfo['sha1'] = \Wikimedia\base_convert( $revisionInfo['sha1'], 16, 36, 31 ); |
333 | } |
334 | |
335 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
336 | $revisionInfo['comment'] ??= |
337 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
338 | } |
339 | |
340 | $revisionInfo['name'] = $pageTitle; |
341 | $revisionInfo['description'] = $revisionInfo['comment'] ?? null; |
342 | |
343 | $revisions[] = new FileRevision( $revisionInfo ); |
344 | } |
345 | return new FileRevisions( $revisions ); |
346 | } |
347 | |
348 | /** |
349 | * @param array[] $revisionsInfo |
350 | * @param string $pageTitle |
351 | */ |
352 | private function getTextRevisionsFromRevisionsInfo( array $revisionsInfo, string $pageTitle ): TextRevisions { |
353 | $revisions = []; |
354 | foreach ( $revisionsInfo as $revisionInfo ) { |
355 | if ( array_key_exists( 'userhidden', $revisionInfo ) ) { |
356 | $revisionInfo['user'] ??= $this->suppressedUsername; |
357 | } |
358 | |
359 | if ( array_key_exists( 'texthidden', $revisionInfo ) ) { |
360 | $revisionInfo['slots'][SlotRecord::MAIN]['content'] ??= |
361 | wfMessage( 'fileimporter-revision-removed-text' )->plain(); |
362 | } |
363 | |
364 | if ( array_key_exists( 'commenthidden', $revisionInfo ) ) { |
365 | $revisionInfo['comment'] ??= |
366 | wfMessage( 'fileimporter-revision-removed-comment' )->plain(); |
367 | } |
368 | |
369 | $revisionInfo['minor'] = array_key_exists( 'minor', $revisionInfo ); |
370 | $revisionInfo['title'] = $pageTitle; |
371 | $revisions[] = new TextRevision( $revisionInfo ); |
372 | } |
373 | return new TextRevisions( $revisions ); |
374 | } |
375 | |
376 | private function getBaseParams( SourceUrl $sourceUrl ): array { |
377 | return [ |
378 | 'action' => 'query', |
379 | 'errorformat' => 'plaintext', |
380 | 'format' => 'json', |
381 | 'formatversion' => '2', |
382 | 'titles' => $this->parseTitleFromSourceUrl( $sourceUrl ), |
383 | 'prop' => 'info' |
384 | ]; |
385 | } |
386 | |
387 | /** |
388 | * Adds to params base the properties for getting Text Revisions |
389 | */ |
390 | private function addTextRevisionsToParams( array $params, ?string $rvContinue = null ): array { |
391 | $params['prop'] .= ( $params['prop'] ) ? '|revisions' : 'revisions'; |
392 | |
393 | if ( $rvContinue ) { |
394 | $params['rvcontinue'] = $rvContinue; |
395 | } |
396 | |
397 | return $params + [ |
398 | 'rvlimit' => static::API_RESULT_LIMIT, |
399 | 'rvdir' => 'newer', |
400 | 'rvslots' => SlotRecord::MAIN, |
401 | 'rvprop' => implode( |
402 | '|', |
403 | [ |
404 | 'flags', |
405 | 'timestamp', |
406 | 'user', |
407 | 'sha1', |
408 | 'contentmodel', |
409 | 'comment', |
410 | 'content', |
411 | 'tags', |
412 | ] |
413 | ) |
414 | ]; |
415 | } |
416 | |
417 | /** |
418 | * Adds to params base the properties for getting File Revisions |
419 | */ |
420 | private function addFileRevisionsToParams( array $params, ?string $iiStart = null ): array { |
421 | $params['prop'] .= ( $params['prop'] ) ? '|imageinfo' : 'imageinfo'; |
422 | |
423 | if ( $iiStart ) { |
424 | $params['iistart'] = $iiStart; |
425 | } |
426 | |
427 | return $params + [ |
428 | 'iilimit' => static::API_RESULT_LIMIT, |
429 | 'iiurlwidth' => 800, |
430 | 'iiurlheight' => 400, |
431 | 'iiprop' => implode( |
432 | '|', |
433 | [ |
434 | 'timestamp', |
435 | 'user', |
436 | 'userid', |
437 | 'comment', |
438 | 'canonicaltitle', |
439 | 'url', |
440 | 'size', |
441 | 'sha1', |
442 | 'archivename', |
443 | ] |
444 | ) |
445 | ]; |
446 | } |
447 | |
448 | /** |
449 | * Adds to params base the properties for getting Templates |
450 | */ |
451 | private function addTemplatesToParams( array $params, ?string $tlContinue = null ): array { |
452 | $params['prop'] .= ( $params['prop'] ) ? '|templates' : 'templates'; |
453 | |
454 | if ( $tlContinue ) { |
455 | $params['tlcontinue'] = $tlContinue; |
456 | } |
457 | |
458 | return $params + [ 'tlnamespace' => NS_TEMPLATE, 'tllimit' => static::API_RESULT_LIMIT ]; |
459 | } |
460 | |
461 | /** |
462 | * Adds to params base the properties for getting Categories |
463 | */ |
464 | private function addCategoriesToParams( array $params, ?string $clContinue = null ): array { |
465 | $params['prop'] .= ( $params['prop'] ) ? '|categories' : 'categories'; |
466 | |
467 | if ( $clContinue ) { |
468 | $params['clcontinue'] = $clContinue; |
469 | } |
470 | |
471 | return $params + [ 'cllimit' => static::API_RESULT_LIMIT ]; |
472 | } |
473 | |
474 | } |