Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
95.61% covered (success)
95.61%
196 / 205
33.33% covered (danger)
33.33%
2 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
MediaModerationImageContentsLookup
95.61% covered (success)
95.61%
196 / 205
33.33% covered (danger)
33.33%
2 / 6
42
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
 getImageContents
100.00% covered (success)
100.00%
36 / 36
100.00% covered (success)
100.00%
1 / 1
9
 getThumbnailMimeType
95.45% covered (success)
95.45%
21 / 22
0.00% covered (danger)
0.00%
0 / 1
5
 getThumbnailForFile
92.31% covered (success)
92.31%
60 / 65
0.00% covered (danger)
0.00%
0 / 1
9.04
 getThumbnailContents
97.22% covered (success)
97.22%
35 / 36
0.00% covered (danger)
0.00%
0 / 1
8
 getFileContents
94.87% covered (success)
94.87%
37 / 39
0.00% covered (danger)
0.00%
0 / 1
10.01
1<?php
2
3namespace MediaWiki\Extension\MediaModeration\Services;
4
5use ArchivedFile;
6use File;
7use FileBackend;
8use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
9use LocalRepo;
10use MediaTransformError;
11use MediaWiki\Config\ServiceOptions;
12use MediaWiki\Extension\MediaModeration\Media\ThumborThumbnailImage;
13use MediaWiki\Extension\MediaModeration\Status\ImageContentsLookupStatus;
14use MediaWiki\Http\HttpRequestFactory;
15use MediaWiki\Language\RawMessage;
16use MimeAnalyzer;
17use StatusValue;
18use ThumbnailImage;
19
20/**
21 * This service looks up the contents of the given $file, either by getting a thumbnail
22 * with width as specified in the wgMediaModerationThumbnailWidth config or if this fails
23 * using the original source file.
24 */
25class MediaModerationImageContentsLookup {
26
27    public const CONSTRUCTOR_OPTIONS = [
28        'MediaModerationThumbnailWidth',
29    ];
30
31    private FileBackend $fileBackend;
32    private StatsdDataFactoryInterface $perDbNameStatsdDataFactory;
33    private MimeAnalyzer $mimeAnalyzer;
34    private LocalRepo $localRepo;
35
36    private HttpRequestFactory $httpRequestFactory;
37
38    private int $thumbnailWidth;
39
40    public function __construct(
41        ServiceOptions $options,
42        FileBackend $fileBackend,
43        StatsdDataFactoryInterface $perDbNameStatsdDataFactory,
44        MimeAnalyzer $mimeAnalyzer,
45        LocalRepo $localRepo,
46        HttpRequestFactory $httpRequestFactory
47    ) {
48        $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
49        $this->fileBackend = $fileBackend;
50        $this->perDbNameStatsdDataFactory = $perDbNameStatsdDataFactory;
51        $this->mimeAnalyzer = $mimeAnalyzer;
52        $this->localRepo = $localRepo;
53        $this->thumbnailWidth = $options->get( 'MediaModerationThumbnailWidth' );
54        $this->httpRequestFactory = $httpRequestFactory;
55    }
56
57    /**
58     * Gets the image contents as a string for the given $file to be sent to PhotoDNA.
59     *
60     * This function first tries to get a thumbnail for the $file and return the contents of the
61     * thumbnail. If this fails, then the function tries to return the contents of the $file
62     * if the $file is in a format supported by PhotoDNA.
63     *
64     * @param File|ArchivedFile $file
65     * @return ImageContentsLookupStatus
66     */
67    public function getImageContents( $file ): ImageContentsLookupStatus {
68        // Create a status that will be returned, and if it is good will contain the
69        // thumbnail/original file contents and mime type.
70        $returnStatus = new ImageContentsLookupStatus();
71        if ( $file instanceof File ) {
72            // Only try to use the thumbnail if the $file is an instance of the File class,
73            // as support for generating a thumbnail for deleted files is not implemented.
74            $thumbnailStatus = $this->getThumbnailForFile( $file );
75            if ( $thumbnailStatus->isGood() ) {
76                // If we could get the ThumbnailImage object for the $file, then
77                // try to get the contents of the thumbnail along with the mime type.
78                $thumbnail = $thumbnailStatus->getValue();
79                $thumbnailContentsStatus = $this->getThumbnailContents( $thumbnail );
80                $thumbnailMimeTypeStatus = $this->getThumbnailMimeType( $thumbnail );
81                if ( $thumbnailContentsStatus->isGood() && $thumbnailMimeTypeStatus->isGood() ) {
82                    // If we were able to get the thumbnail contents and mime type, then return with them.
83                    return $returnStatus
84                        ->setImageContents( $thumbnailContentsStatus->getValue() )
85                        ->setMimeType( $file->getMimeType() );
86                }
87                // Add the failures to the return status for the caller to see.
88                $returnStatus
89                    ->merge( $thumbnailMimeTypeStatus )
90                    ->merge( $thumbnailContentsStatus );
91            }
92            // Add the failures to the return status for the caller to see.
93            $returnStatus->merge( $thumbnailStatus );
94        }
95        // If no thumbnail could be generated for the $file or the $file is an ArchivedFile instance, then we will
96        // reach here. Now try to get the contents of the $file if the mime type type is supported by PhotoDNA.
97        if ( in_array( $file->getMimeType(), MediaModerationFileProcessor::ALLOWED_MIME_TYPES, true ) ) {
98            $fileContentsStatus = $this->getFileContents( $file );
99            if ( $fileContentsStatus->isGood() ) {
100                // We were able to get the contents of the $file
101                if ( $file instanceof File ) {
102                    // Add to the RuntimeException count if $file was a File, as we should
103                    // have been able to generate a thumbnail for it.
104                    $this->perDbNameStatsdDataFactory->increment(
105                        'MediaModeration.PhotoDNAServiceProvider.Execute.SourceFileUsedForFileObject'
106                    );
107                }
108                // Set the result as OK as we got the original file, but still include the
109                // errors from the thumbnail generation for tracking.
110                return $returnStatus
111                    ->setOK( true )
112                    ->setImageContents( $fileContentsStatus->getValue() )
113                    ->setMimeType( $file->getMimeType() );
114            }
115            // If we were unable to get the contents of the $file, then add the errors from
116            // this to the return status.
117            $returnStatus->merge( $fileContentsStatus );
118        }
119        // If we get here, then we have failed to get any image contents and so should return a fatal status.
120        if ( $returnStatus->isOK() ) {
121            // The $returnStatus can be good and have no message if the image was deleted and the source image is
122            // not supported by PhotoDNA (such as a deleted SVG).
123            $returnStatus->fatal( new RawMessage(
124                'Failed to get image contents for $1',
125                [ $file->getName() ]
126            ) );
127        }
128        // Increment the RuntimeException statsd counter, as we have reached a point where
129        // we could not generate a thumbnail where we should have been able to.
130        $this->perDbNameStatsdDataFactory->increment(
131            'MediaModeration.PhotoDNAServiceProvider.Execute.RuntimeException'
132        );
133        return $returnStatus;
134    }
135
136    /**
137     * Gets the mime type (or best guess for it) of the given $thumbnail.
138     *
139     * @param ThumbnailImage $thumbnail
140     * @return StatusValue
141     */
142    protected function getThumbnailMimeType( ThumbnailImage $thumbnail ): StatusValue {
143        // Attempt to work out what the mime type of the file is based on the extension, and if that
144        // fails then try based on the contents of the thumbnail.
145        $thumbnailMimeType = $thumbnail instanceof ThumborThumbnailImage ?
146            $thumbnail->getContentType() :
147            $this->mimeAnalyzer->getMimeTypeFromExtensionOrNull( $thumbnail->getExtension() );
148        if ( $thumbnailMimeType === null ) {
149            $thumbnailMimeType = $this->mimeAnalyzer->guessMimeType( $thumbnail->getLocalCopyPath() );
150        }
151        if ( !$thumbnailMimeType ) {
152            // We cannot send a request to PhotoDNA without knowing what the mime type is.
153            $this->perDbNameStatsdDataFactory->increment(
154                'MediaModeration.ImageContentsLookup.Thumbnail.MimeType.LookupFailed'
155            );
156            return StatusValue::newFatal( new RawMessage(
157                'Could not get mime type of thumbnail for $1',
158                [ $thumbnail->getFile()->getName() ]
159            ) );
160        }
161        if ( !in_array( $thumbnailMimeType, MediaModerationFileProcessor::ALLOWED_MIME_TYPES, true ) ) {
162            // We cannot send a request to PhotoDNA with a thumbnail type that is unsupported by the API.
163            $this->perDbNameStatsdDataFactory->increment(
164                'MediaModeration.ImageContentsLookup.Thumbnail.MimeType.Unsupported'
165            );
166            return StatusValue::newFatal( new RawMessage(
167                'Mime type of thumbnail for $1 is not supported by PhotoDNA.',
168                [ $thumbnail->getFile()->getName() ]
169            ) );
170        }
171        return StatusValue::newGood( $thumbnailMimeType );
172    }
173
174    /**
175     * @param File $file
176     * @return StatusValue<ThumbnailImage|ThumborThumbnailImage> A StatusValue with a ThumbnailImage object as the value
177     *   if it is a good status.
178     */
179    protected function getThumbnailForFile( File $file ): StatusValue {
180        $genericErrorMessage = 'Could not transform file ' . $file->getName();
181        $start = microtime( true );
182        $thumbName = $file->thumbName( [ 'width' => $this->thumbnailWidth ] );
183        $thumbProxyUrl = $file->getRepo()->getThumbProxyUrl();
184        $secret = $file->getRepo()->getThumbProxySecret();
185        if ( $thumbProxyUrl && $secret ) {
186            // Specific to Wikimedia setup only: proxy the request to Thumbor,
187            // which should result in the thumbnail being generated on disk
188            // @see wfProxyThumbnailRequest()
189            $req = $this->httpRequestFactory->create(
190                $thumbProxyUrl . $file->getThumbRel( $thumbName )
191            );
192            $req->setHeader( 'X-Swift-Secret', $secret );
193            $result = $req->execute();
194            if ( $result->isGood() ) {
195                $imageContent = $req->getContent();
196                // getimagesizefromstring() can return a PHP Notice if
197                // the contents are invalid. Suppress the notice, and check
198                // instead of the result is truthy.
199                // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
200                $imageMetadata = @getimagesizefromstring( $imageContent );
201                if ( $imageMetadata ) {
202                    $thumbnail = new ThumborThumbnailImage(
203                        $file,
204                        $file->getThumbUrl( $thumbName ),
205                        [
206                            'width' => $imageMetadata[0],
207                            'height' => $imageMetadata[1]
208                        ],
209                        $imageContent,
210                        $req->getResponseHeader( 'content-type' )
211                    );
212                    $this->perDbNameStatsdDataFactory->timing(
213                        'MediaModeration.PhotoDNAServiceProviderThumbnailTransformThumborRequest',
214                        1000 * ( microtime( true ) - $start )
215                    );
216                    return StatusValue::newGood( $thumbnail );
217                }
218            }
219            // The request failed, so increment the failure counter and use regular ::transform
220            // for checks done farther on.
221            $this->perDbNameStatsdDataFactory->increment(
222                'MediaModeration.ImageContentsLookup.Thumbnail.ThumborTransform.Failed'
223            );
224            $thumbnail = $file->transform( [ 'width' => $this->thumbnailWidth ] );
225        } else {
226            // For non Wikimedia setups, use RENDER_NOW to ensure we have
227            // a file to work with.
228            $thumbnail = $file->transform( [ 'width' => $this->thumbnailWidth ], File::RENDER_NOW );
229        }
230        $delay = microtime( true ) - $start;
231        $this->perDbNameStatsdDataFactory->timing(
232            'MediaModeration.PhotoDNAServiceProviderThumbnailTransform',
233            1000 * $delay
234        );
235        if ( !$thumbnail ) {
236            $this->perDbNameStatsdDataFactory->increment(
237                'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed'
238            );
239            return StatusValue::newFatal( new RawMessage( $genericErrorMessage ) );
240        }
241        if ( $thumbnail instanceof MediaTransformError ) {
242            $this->perDbNameStatsdDataFactory->increment(
243                'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed'
244            );
245            return StatusValue::newFatal( new RawMessage( $genericErrorMessage . ': ' . $thumbnail->toText() ) );
246        }
247        if ( !( $thumbnail instanceof ThumbnailImage ) ) {
248            $this->perDbNameStatsdDataFactory->increment(
249                'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed'
250            );
251            return StatusValue::newFatal( new RawMessage(
252                $genericErrorMessage . ': not an instance of ThumbnailImage, got ' . get_class( $thumbnail )
253            ) );
254        }
255        if ( !$thumbnail->hasFile() ) {
256            $this->perDbNameStatsdDataFactory->increment(
257                'MediaModeration.ImageContentsLookup.Thumbnail.Transform.Failed'
258            );
259            return StatusValue::newFatal( new RawMessage(
260                $genericErrorMessage . ', got a ' . get_class( $thumbnail ) . ' but ::hasFile() returns false.'
261            ) );
262        }
263        return StatusValue::newGood( $thumbnail );
264    }
265
266    protected function getThumbnailContents( ThumbnailImage $thumbnail ): StatusValue {
267        if ( $thumbnail->getHeight() < 160 || $thumbnail->getWidth() < 160 ) {
268            $this->perDbNameStatsdDataFactory->increment(
269                'MediaModeration.ImageContentsLookup.Thumbnail.Contents.TooSmall'
270            );
271            // PhotoDNA requires that images be at least 160px by 160px, so don't use the
272            // thumbnail if either dimension is too small.
273            return StatusValue::newFatal( new RawMessage(
274                'Thumbnail does not meet dimension requirements for $1',
275                [ $thumbnail->getFile()->getName() ]
276            ) );
277        }
278        if ( !( $thumbnail instanceof ThumborThumbnailImage ) && !$thumbnail->getStoragePath() ) {
279            $this->perDbNameStatsdDataFactory->increment(
280                'MediaModeration.ImageContentsLookup.Thumbnail.Contents.LookupFailed'
281            );
282            return StatusValue::newFatal( new RawMessage(
283                'Could not get storage path of thumbnail for $1',
284                [ $thumbnail->getFile()->getName() ]
285            ) );
286        }
287        $fileContents = $thumbnail instanceof ThumborThumbnailImage ?
288            $thumbnail->getContent() :
289            $this->fileBackend->getFileContents( [ 'src' => $thumbnail->getStoragePath() ] );
290        if ( !$fileContents ) {
291            $this->perDbNameStatsdDataFactory->increment(
292                'MediaModeration.ImageContentsLookup.Thumbnail.Contents.LookupFailed'
293            );
294            return StatusValue::newFatal( new RawMessage(
295                'Could not get thumbnail contents for $1',
296                [ $thumbnail->getFile()->getName() ]
297            ) );
298        }
299        if ( strlen( $fileContents ) > 4000000 ) {
300            $this->perDbNameStatsdDataFactory->increment(
301                'MediaModeration.ImageContentsLookup.Thumbnail.Contents.TooLarge'
302            );
303            // Check that the size of the file does not exceed 4MB, as PhotoDNA returns an
304            // error for files that are any larger.
305            // strlen returns the size of the string in bytes and 4MB is 4,000,000 bytes.
306            return StatusValue::newFatal( new RawMessage(
307                'Original file contents exceeds 4MB for $1',
308                [ $thumbnail->getFile()->getName() ]
309            ) );
310        }
311        return StatusValue::newGood( $fileContents );
312    }
313
314    /**
315     * Gets the contents of the originally uploaded file referenced by $file.
316     *
317     * @param File|ArchivedFile $file
318     * @return StatusValue
319     */
320    protected function getFileContents( $file ): StatusValue {
321        if ( $file->getSize() && $file->getSize() > 4000000 ) {
322            $this->perDbNameStatsdDataFactory->increment(
323                'MediaModeration.ImageContentsLookup.File.Contents.TooLarge'
324            );
325            // Check that the size of the file does not exceed 4MB, as PhotoDNA returns an
326            // error for files that are any larger.
327            return StatusValue::newFatal( new RawMessage(
328                'Original file contents exceeds 4MB for $1',
329                [ $file->getName() ]
330            ) );
331        }
332        if (
333            ( $file->getHeight() && $file->getHeight() < 160 ) ||
334            ( $file->getWidth() && $file->getWidth() < 160 )
335        ) {
336            $this->perDbNameStatsdDataFactory->increment(
337                'MediaModeration.ImageContentsLookup.File.Contents.TooSmall'
338            );
339            // Check that the height and width is at least 160px by 160px
340            // as PhotoDNA requires that the file be at least that size.
341            // If the height or width is false, then just ignore this check
342            // as PhotoDNA will verify this for us.
343            return StatusValue::newFatal( new RawMessage(
344                'Original file does not meet dimension requirements for $1',
345                [ $file->getName() ]
346            ) );
347        }
348        if ( $file instanceof ArchivedFile ) {
349            // Format for the URL is copied from SpecialUndelete::showFile
350            $filePath = $this->localRepo->getZonePath( 'deleted' ) . '/' .
351                $this->localRepo->getDeletedHashPath( $file->getStorageKey() ) . $file->getStorageKey();
352        } else {
353            $filePath = $file->getPath();
354        }
355        if ( !$filePath ) {
356            $this->perDbNameStatsdDataFactory->increment(
357                'MediaModeration.ImageContentsLookup.File.Contents.LookupFailed'
358            );
359            return StatusValue::newFatal( new RawMessage(
360                'Could not get storage path of original file for $1',
361                [ $file->getName() ]
362            ) );
363        }
364        $fileContents = $this->fileBackend->getFileContents( [ 'src' => $filePath ] );
365        if ( !$fileContents ) {
366            $this->perDbNameStatsdDataFactory->increment(
367                'MediaModeration.ImageContentsLookup.File.Contents.LookupFailed'
368            );
369            return StatusValue::newFatal( new RawMessage(
370                'Could not get original file contents for $1',
371                [ $file->getName() ]
372            ) );
373        }
374        return StatusValue::newGood( $fileContents );
375    }
376}