Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.61% |
196 / 205 |
|
33.33% |
2 / 6 |
CRAP | |
0.00% |
0 / 1 |
MediaModerationImageContentsLookup | |
95.61% |
196 / 205 |
|
33.33% |
2 / 6 |
42 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getImageContents | |
100.00% |
36 / 36 |
|
100.00% |
1 / 1 |
9 | |||
getThumbnailMimeType | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
5 | |||
getThumbnailForFile | |
92.31% |
60 / 65 |
|
0.00% |
0 / 1 |
9.04 | |||
getThumbnailContents | |
97.22% |
35 / 36 |
|
0.00% |
0 / 1 |
8 | |||
getFileContents | |
94.87% |
37 / 39 |
|
0.00% |
0 / 1 |
10.01 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\MediaModeration\Services; |
4 | |
5 | use ArchivedFile; |
6 | use File; |
7 | use FileBackend; |
8 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
9 | use LocalRepo; |
10 | use MediaTransformError; |
11 | use MediaWiki\Config\ServiceOptions; |
12 | use MediaWiki\Extension\MediaModeration\Media\ThumborThumbnailImage; |
13 | use MediaWiki\Extension\MediaModeration\Status\ImageContentsLookupStatus; |
14 | use MediaWiki\Http\HttpRequestFactory; |
15 | use MediaWiki\Language\RawMessage; |
16 | use MimeAnalyzer; |
17 | use StatusValue; |
18 | use ThumbnailImage; |
19 | |
20 | /** |
21 | * This service looks up the contents of the given $file, either by getting a thumbnail |
22 | * with width as specified in the wgMediaModerationThumbnailWidth config or if this fails |
23 | * using the original source file. |
24 | */ |
25 | class MediaModerationImageContentsLookup { |
26 | |
27 | public const CONSTRUCTOR_OPTIONS = [ |
28 | 'MediaModerationThumbnailWidth', |
29 | ]; |
30 | |
31 | private FileBackend $fileBackend; |
32 | private StatsdDataFactoryInterface $perDbNameStatsdDataFactory; |
33 | private MimeAnalyzer $mimeAnalyzer; |
34 | private LocalRepo $localRepo; |
35 | |
36 | private HttpRequestFactory $httpRequestFactory; |
37 | |
38 | private int $thumbnailWidth; |
39 | |
40 | public function __construct( |
41 | ServiceOptions $options, |
42 | FileBackend $fileBackend, |
43 | StatsdDataFactoryInterface $perDbNameStatsdDataFactory, |
44 | MimeAnalyzer $mimeAnalyzer, |
45 | LocalRepo $localRepo, |
46 | HttpRequestFactory $httpRequestFactory |
47 | ) { |
48 | $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
49 | $this->fileBackend = $fileBackend; |
50 | $this->perDbNameStatsdDataFactory = $perDbNameStatsdDataFactory; |
51 | $this->mimeAnalyzer = $mimeAnalyzer; |
52 | $this->localRepo = $localRepo; |
53 | $this->thumbnailWidth = $options->get( 'MediaModerationThumbnailWidth' ); |
54 | $this->httpRequestFactory = $httpRequestFactory; |
55 | } |
56 | |
57 | /** |
58 | * Gets the image contents as a string for the given $file to be sent to PhotoDNA. |
59 | * |
60 | * This function first tries to get a thumbnail for the $file and return the contents of the |
61 | * thumbnail. If this fails, then the function tries to return the contents of the $file |
62 | * if the $file is in a format supported by PhotoDNA. |
63 | * |
64 | * @param File|ArchivedFile $file |
65 | * @return ImageContentsLookupStatus |
66 | */ |
67 | public function getImageContents( $file ): ImageContentsLookupStatus { |
68 | // Create a status that will be returned, and if it is good will contain the |
69 | // thumbnail/original file contents and mime type. |
70 | $returnStatus = new ImageContentsLookupStatus(); |
71 | if ( $file instanceof File ) { |
72 | // Only try to use the thumbnail if the $file is an instance of the File class, |
73 | // as support for generating a thumbnail for deleted files is not implemented. |
74 | $thumbnailStatus = $this->getThumbnailForFile( $file ); |
75 | if ( $thumbnailStatus->isGood() ) { |
76 | // If we could get the ThumbnailImage object for the $file, then |
77 | // try to get the contents of the thumbnail along with the mime type. |
78 | $thumbnail = $thumbnailStatus->getValue(); |
79 | $thumbnailContentsStatus = $this->getThumbnailContents( $thumbnail ); |
80 | $thumbnailMimeTypeStatus = $this->getThumbnailMimeType( $thumbnail ); |
81 | if ( $thumbnailContentsStatus->isGood() && $thumbnailMimeTypeStatus->isGood() ) { |
82 | // If we were able to get the thumbnail contents and mime type, then return with them. |
83 | return $returnStatus |
84 | ->setImageContents( $thumbnailContentsStatus->getValue() ) |
85 | ->setMimeType( $file->getMimeType() ); |
86 | } |
87 | // Add the failures to the return status for the caller to see. |
88 | $returnStatus |
89 | ->merge( $thumbnailMimeTypeStatus ) |
90 | ->merge( $thumbnailContentsStatus ); |
91 | } |
92 | // Add the failures to the return status for the caller to see. |
93 | $returnStatus->merge( $thumbnailStatus ); |
94 | } |
95 | // If no thumbnail could be generated for the $file or the $file is an ArchivedFile instance, then we will |
96 | // reach here. Now try to get the contents of the $file if the mime type type is supported by PhotoDNA. |
97 | if ( in_array( $file->getMimeType(), MediaModerationFileProcessor::ALLOWED_MIME_TYPES, true ) ) { |
98 | $fileContentsStatus = $this->getFileContents( $file ); |
99 | if ( $fileContentsStatus->isGood() ) { |
100 | // We were able to get the contents of the $file |
101 | if ( $file instanceof File ) { |
102 | // Add to the RuntimeException count if $file was a File, as we should |
103 | // have been able to generate a thumbnail for it. |
104 | $this->perDbNameStatsdDataFactory->increment( |
105 | 'MediaModeration.PhotoDNAServiceProvider.Execute.SourceFileUsedForFileObject' |
106 | ); |
107 | } |
108 | // Set the result as OK as we got the original file, but still include the |
109 | // errors from the thumbnail generation for tracking. |
110 | return $returnStatus |
111 | ->setOK( true ) |
112 | ->setImageContents( $fileContentsStatus->getValue() ) |
113 | ->setMimeType( $file->getMimeType() ); |
114 | } |
115 | // If we were unable to get the contents of the $file, then add the errors from |
116 | // this to the return status. |
117 | $returnStatus->merge( $fileContentsStatus ); |
118 | } |
119 | // If we get here, then we have failed to get any image contents and so should return a fatal status. |
120 | if ( $returnStatus->isOK() ) { |
121 | // The $returnStatus can be good and have no message if the image was deleted and the source image is |
122 | // not supported by PhotoDNA (such as a deleted SVG). |
123 | $returnStatus->fatal( new RawMessage( |
124 | 'Failed to get image contents for $1', |
125 | [ $file->getName() ] |
126 | ) ); |
127 | } |
128 | // Increment the RuntimeException statsd counter, as we have reached a point where |
129 | // we could not generate a thumbnail where we should have been able to. |
130 | $this->perDbNameStatsdDataFactory->increment( |
131 | 'MediaModeration.PhotoDNAServiceProvider.Execute.RuntimeException' |
132 | ); |
133 | return $returnStatus; |
134 | } |
135 | |
136 | /** |
137 | * Gets the mime type (or best guess for it) of the given $thumbnail. |
138 | * |
139 | * @param ThumbnailImage $thumbnail |
140 | * @return StatusValue |
141 | */ |
142 | protected function getThumbnailMimeType( ThumbnailImage $thumbnail ): StatusValue { |
143 | // Attempt to work out what the mime type of the file is based on the extension, and if that |
144 | // fails then try based on the contents of the thumbnail. |
145 | $thumbnailMimeType = $thumbnail instanceof ThumborThumbnailImage ? |
146 | $thumbnail->getContentType() : |
147 | $this->mimeAnalyzer->getMimeTypeFromExtensionOrNull( $thumbnail->getExtension() ); |
148 | if ( $thumbnailMimeType === null ) { |
149 | $thumbnailMimeType = $this->mimeAnalyzer->guessMimeType( $thumbnail->getLocalCopyPath() ); |
150 | } |
151 | if ( !$thumbnailMimeType ) { |
152 | // We cannot send a request to PhotoDNA without knowing what the mime type is. |
153 | $this->perDbNameStatsdDataFactory->increment( |
154 | 'MediaModeration.ImageContentsLookup.Thumbnail.MimeType.LookupFailed' |
155 | ); |
156 | return StatusValue::newFatal( new RawMessage( |
157 | 'Could not get mime type of thumbnail for $1', |
158 | [ $thumbnail->getFile()->getName() ] |
159 | ) ); |
160 | } |
161 | if ( !in_array( $thumbnailMimeType, MediaModerationFileProcessor::ALLOWED_MIME_TYPES, true ) ) { |
162 | // We cannot send a request to PhotoDNA with a thumbnail type that is unsupported by the API. |
163 | $this->perDbNameStatsdDataFactory->increment( |
164 | 'MediaModeration.ImageContentsLookup.Thumbnail.MimeType.Unsupported' |
165 | ); |
166 | return StatusValue::newFatal( new RawMessage( |
167 | 'Mime type of thumbnail for $1 is not supported by PhotoDNA.', |
168 | [ $thumbnail->getFile()->getName() ] |
169 | ) ); |
170 | } |
171 | return StatusValue::newGood( $thumbnailMimeType ); |
172 | } |
173 | |
174 | /** |
175 | * @param File $file |
176 | * @return StatusValue<ThumbnailImage|ThumborThumbnailImage> A StatusValue with a ThumbnailImage object as the value |
177 | * if it is a good status. |
178 | */ |
179 | protected function getThumbnailForFile( File $file ): StatusValue { |
180 | $genericErrorMessage = 'Could not transform file ' . $file->getName(); |
181 | $start = microtime( true ); |
182 | $thumbName = $file->thumbName( [ 'width' => $this->thumbnailWidth ] ); |
183 | $thumbProxyUrl = $file->getRepo()->getThumbProxyUrl(); |
184 | $secret = $file->getRepo()->getThumbProxySecret(); |
185 | if ( $thumbProxyUrl && $secret ) { |
186 | // Specific to Wikimedia setup only: proxy the request to Thumbor, |
187 | // which should result in the thumbnail being generated on disk |
188 | // @see wfProxyThumbnailRequest() |
189 | $req = $this->httpRequestFactory->create( |
190 | $thumbProxyUrl . $file->getThumbRel( $thumbName ) |
191 | ); |
192 | $req->setHeader( 'X-Swift-Secret', $secret ); |
193 | $result = $req->execute(); |
194 | if ( $result->isGood() ) { |
195 | $imageContent = $req->getContent(); |
196 | // getimagesizefromstring() can return a PHP Notice if |
197 | // the contents are invalid. Suppress the notice, and check |
198 | // instead of the result is truthy. |
199 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
200 | $imageMetadata = @getimagesizefromstring( $imageContent ); |
201 | if ( $imageMetadata ) { |
202 | $thumbnail = new ThumborThumbnailImage( |
203 | $file, |
204 | $file->getThumbUrl( $thumbName ), |
205 | [ |
206 | 'width' => $imageMetadata[0], |
207 | 'height' => $imageMetadata[1] |
208 | ], |
209 | $imageContent, |
210 | $req->getResponseHeader( 'content-type' ) |
211 | ); |
212 | $this->perDbNameStatsdDataFactory->timing( |
213 | 'MediaModeration.PhotoDNAServiceProviderThumbnailTransformThumborRequest', |
214 | 1000 * ( microtime( true ) - $start ) |
215 | ); |
216 | return StatusValue::newGood( $thumbnail ); |
217 | } |
218 | } |
219 | // The request failed, so increment the failure counter and use regular ::transform |
220 | // for checks done farther on. |
221 | $this->perDbNameStatsdDataFactory->increment( |
222 | 'MediaModeration.ImageContentsLookup.Thumbnail.ThumborTransform.Failed' |
223 | ); |
224 | $thumbnail = $file->transform( [ 'width' => $this->thumbnailWidth ] ); |
225 | } else { |
226 | // For non Wikimedia setups, use RENDER_NOW to ensure we have |
227 | // a file to work with. |
228 | $thumbnail = $file->transform( [ 'width' => $this->thumbnailWidth ], File::RENDER_NOW ); |
229 | } |
230 | $delay = microtime( true ) - $start; |
231 | $this->perDbNameStatsdDataFactory->timing( |
232 | 'MediaModeration.PhotoDNAServiceProviderThumbnailTransform', |
233 | 1000 * $delay |
234 | ); |
235 | if ( !$thumbnail ) { |
236 | $this->perDbNameStatsdDataFactory->increment( |
237 | 'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed' |
238 | ); |
239 | return StatusValue::newFatal( new RawMessage( $genericErrorMessage ) ); |
240 | } |
241 | if ( $thumbnail instanceof MediaTransformError ) { |
242 | $this->perDbNameStatsdDataFactory->increment( |
243 | 'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed' |
244 | ); |
245 | return StatusValue::newFatal( new RawMessage( $genericErrorMessage . ': ' . $thumbnail->toText() ) ); |
246 | } |
247 | if ( !( $thumbnail instanceof ThumbnailImage ) ) { |
248 | $this->perDbNameStatsdDataFactory->increment( |
249 | 'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed' |
250 | ); |
251 | return StatusValue::newFatal( new RawMessage( |
252 | $genericErrorMessage . ': not an instance of ThumbnailImage, got ' . get_class( $thumbnail ) |
253 | ) ); |
254 | } |
255 | if ( !$thumbnail->hasFile() ) { |
256 | $this->perDbNameStatsdDataFactory->increment( |
257 | 'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed' |
258 | ); |
259 | return StatusValue::newFatal( new RawMessage( |
260 | $genericErrorMessage . ', got a ' . get_class( $thumbnail ) . ' but ::hasFile() returns false.' |
261 | ) ); |
262 | } |
263 | return StatusValue::newGood( $thumbnail ); |
264 | } |
265 | |
266 | protected function getThumbnailContents( ThumbnailImage $thumbnail ): StatusValue { |
267 | if ( $thumbnail->getHeight() < 160 || $thumbnail->getWidth() < 160 ) { |
268 | $this->perDbNameStatsdDataFactory->increment( |
269 | 'MediaModeration.ImageContentsLookup.Thumbnail.Contents.TooSmall' |
270 | ); |
271 | // PhotoDNA requires that images be at least 160px by 160px, so don't use the |
272 | // thumbnail if either dimension is too small. |
273 | return StatusValue::newFatal( new RawMessage( |
274 | 'Thumbnail does not meet dimension requirements for $1', |
275 | [ $thumbnail->getFile()->getName() ] |
276 | ) ); |
277 | } |
278 | if ( !( $thumbnail instanceof ThumborThumbnailImage ) && !$thumbnail->getStoragePath() ) { |
279 | $this->perDbNameStatsdDataFactory->increment( |
280 | 'MediaModeration.ImageContentsLookup.Thumbnail.Contents.LookupFailed' |
281 | ); |
282 | return StatusValue::newFatal( new RawMessage( |
283 | 'Could not get storage path of thumbnail for $1', |
284 | [ $thumbnail->getFile()->getName() ] |
285 | ) ); |
286 | } |
287 | $fileContents = $thumbnail instanceof ThumborThumbnailImage ? |
288 | $thumbnail->getContent() : |
289 | $this->fileBackend->getFileContents( [ 'src' => $thumbnail->getStoragePath() ] ); |
290 | if ( !$fileContents ) { |
291 | $this->perDbNameStatsdDataFactory->increment( |
292 | 'MediaModeration.ImageContentsLookup.Thumbnail.Contents.LookupFailed' |
293 | ); |
294 | return StatusValue::newFatal( new RawMessage( |
295 | 'Could not get thumbnail contents for $1', |
296 | [ $thumbnail->getFile()->getName() ] |
297 | ) ); |
298 | } |
299 | if ( strlen( $fileContents ) > 4000000 ) { |
300 | $this->perDbNameStatsdDataFactory->increment( |
301 | 'MediaModeration.ImageContentsLookup.Thumbnail.Contents.TooLarge' |
302 | ); |
303 | // Check that the size of the file does not exceed 4MB, as PhotoDNA returns an |
304 | // error for files that are any larger. |
305 | // strlen returns the size of the string in bytes and 4MB is 4,000,000 bytes. |
306 | return StatusValue::newFatal( new RawMessage( |
307 | 'Original file contents exceeds 4MB for $1', |
308 | [ $thumbnail->getFile()->getName() ] |
309 | ) ); |
310 | } |
311 | return StatusValue::newGood( $fileContents ); |
312 | } |
313 | |
314 | /** |
315 | * Gets the contents of the originally uploaded file referenced by $file. |
316 | * |
317 | * @param File|ArchivedFile $file |
318 | * @return StatusValue |
319 | */ |
320 | protected function getFileContents( $file ): StatusValue { |
321 | if ( $file->getSize() && $file->getSize() > 4000000 ) { |
322 | $this->perDbNameStatsdDataFactory->increment( |
323 | 'MediaModeration.ImageContentsLookup.File.Contents.TooLarge' |
324 | ); |
325 | // Check that the size of the file does not exceed 4MB, as PhotoDNA returns an |
326 | // error for files that are any larger. |
327 | return StatusValue::newFatal( new RawMessage( |
328 | 'Original file contents exceeds 4MB for $1', |
329 | [ $file->getName() ] |
330 | ) ); |
331 | } |
332 | if ( |
333 | ( $file->getHeight() && $file->getHeight() < 160 ) || |
334 | ( $file->getWidth() && $file->getWidth() < 160 ) |
335 | ) { |
336 | $this->perDbNameStatsdDataFactory->increment( |
337 | 'MediaModeration.ImageContentsLookup.File.Contents.TooSmall' |
338 | ); |
339 | // Check that the height and width is at least 160px by 160px |
340 | // as PhotoDNA requires that the file be at least that size. |
341 | // If the height or width is false, then just ignore this check |
342 | // as PhotoDNA will verify this for us. |
343 | return StatusValue::newFatal( new RawMessage( |
344 | 'Original file does not meet dimension requirements for $1', |
345 | [ $file->getName() ] |
346 | ) ); |
347 | } |
348 | if ( $file instanceof ArchivedFile ) { |
349 | // Format for the URL is copied from SpecialUndelete::showFile |
350 | $filePath = $this->localRepo->getZonePath( 'deleted' ) . '/' . |
351 | $this->localRepo->getDeletedHashPath( $file->getStorageKey() ) . $file->getStorageKey(); |
352 | } else { |
353 | $filePath = $file->getPath(); |
354 | } |
355 | if ( !$filePath ) { |
356 | $this->perDbNameStatsdDataFactory->increment( |
357 | 'MediaModeration.ImageContentsLookup.File.Contents.LookupFailed' |
358 | ); |
359 | return StatusValue::newFatal( new RawMessage( |
360 | 'Could not get storage path of original file for $1', |
361 | [ $file->getName() ] |
362 | ) ); |
363 | } |
364 | $fileContents = $this->fileBackend->getFileContents( [ 'src' => $filePath ] ); |
365 | if ( !$fileContents ) { |
366 | $this->perDbNameStatsdDataFactory->increment( |
367 | 'MediaModeration.ImageContentsLookup.File.Contents.LookupFailed' |
368 | ); |
369 | return StatusValue::newFatal( new RawMessage( |
370 | 'Could not get original file contents for $1', |
371 | [ $file->getName() ] |
372 | ) ); |
373 | } |
374 | return StatusValue::newGood( $fileContents ); |
375 | } |
376 | } |