MediaWiki master
WebPHandler.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Media;
11
16use Wikimedia\XMPReader\Reader as XMPReader;
17
27 private const BROKEN_FILE = '0';
31 private const MINIMUM_CHUNK_HEADER_LENGTH = 18;
35 private const MAX_METADATA_CHUNK_SIZE = 1024 * 1024 * 2;
39 private const _MW_WEBP_VERSION = 2;
40
41 private const VP8X_ICC = 32;
42 private const VP8X_ALPHA = 16;
43 private const VP8X_EXIF = 8;
44 private const VP8X_XMP = 4;
45 private const VP8X_ANIM = 2;
46
48 public function getSizeAndMetadata( $state, $filename ) {
49 $parsedWebPData = self::extractMetadata( $filename );
50 if ( !$parsedWebPData ) {
51 return [ 'metadata' => [ '_error' => self::BROKEN_FILE ] ];
52 }
53
54 $parsedWebPData['metadata']['_MW_WEBP_VERSION'] = self::_MW_WEBP_VERSION;
55 $info = [
56 'width' => $parsedWebPData['width'],
57 'height' => $parsedWebPData['height'],
58 'metadata' => $parsedWebPData
59 ];
60 return $info;
61 }
62
64 public function getMetadataType( $image ) {
65 return 'parsed-webp';
66 }
67
69 public function isFileMetadataValid( $image ) {
70 $data = $image->getMetadataArray();
71 if ( $data === [ '_error' => self::BROKEN_FILE ] ) {
72 // Do not repetitivly regenerate metadata on broken file.
74 }
75
76 if ( !$data || !isset( $data['_error'] ) ) {
77 wfDebug( __METHOD__ . " invalid WebP metadata" );
78
79 return self::METADATA_BAD;
80 }
81
82 if ( !isset( $data['metadata']['_MW_WEBP_VERSION'] )
83 || $data['metadata']['_MW_WEBP_VERSION'] != self::_MW_WEBP_VERSION
84 ) {
85 wfDebug( __METHOD__ . " old but compatible WebP metadata" );
86
88 }
90 }
91
100 public static function extractMetadata( $filename ) {
101 wfDebugLog( 'WebP', __METHOD__ . ": Extracting metadata from $filename" );
102
103 $info = RiffExtractor::findChunksFromFile( $filename, 100 );
104 if ( $info === false ) {
105 wfDebugLog( 'WebP', __METHOD__ . ": Not a valid RIFF file" );
106 return false;
107 }
108
109 if ( $info['fourCC'] !== 'WEBP' ) {
110 wfDebugLog( 'WebP', __METHOD__ . ': FourCC was not WEBP: ' .
111 bin2hex( $info['fourCC'] ) );
112 return false;
113 }
114 $metadata = self::extractMetadataFromChunks( $info['chunks'], $filename );
115 if ( !$metadata ) {
116 wfDebugLog( 'WebP', __METHOD__ . ": No VP8 chunks found" );
117 return false;
118 }
119
120 return $metadata;
121 }
122
130 public static function extractMetadataFromChunks( $chunks, $filename ) {
131 $vp8Info = [];
132 $exifData = null;
133 $xmpData = null;
134
135 foreach ( $chunks as $chunk ) {
136 // Note, spec says it should be 'XMP ' but some real life files use "XMP\0"
137 if ( !in_array( $chunk['fourCC'], [ 'VP8 ', 'VP8L', 'VP8X', 'EXIF', 'XMP ', "XMP\0" ] ) ) {
138 // Not a chunk containing interesting metadata
139 continue;
140 }
141
142 $chunkHeader = file_get_contents( $filename, false, null,
143 $chunk['start'], self::MINIMUM_CHUNK_HEADER_LENGTH );
144 wfDebugLog( 'WebP', __METHOD__ . ": {$chunk['fourCC']}" );
145
146 switch ( $chunk['fourCC'] ) {
147 case 'VP8 ':
148 $vp8Info = array_merge( $vp8Info,
149 self::decodeLossyChunkHeader( $chunkHeader ) );
150 break;
151 case 'VP8L':
152 $vp8Info = array_merge( $vp8Info,
153 self::decodeLosslessChunkHeader( $chunkHeader ) );
154 break;
155 case 'VP8X':
156 $vp8Info = array_merge( $vp8Info,
157 self::decodeExtendedChunkHeader( $chunkHeader ) );
158 // Continue looking for other chunks to improve the metadata
159 break;
160 case 'EXIF':
161 // Spec says ignore all but first one
162 $exifData ??= self::extractChunk( $chunk, $filename );
163 break;
164 case 'XMP ':
165 case "XMP\0":
166 $xmpData ??= self::extractChunk( $chunk, $filename );
167 break;
168 }
169 }
170 $vp8Info = array_merge( $vp8Info,
171 self::decodeMediaMetadata( $exifData, $xmpData, $filename ) );
172 return $vp8Info;
173 }
174
183 private static function decodeMediaMetadata( $exifData, $xmpData, $filename ) {
184 if ( $exifData === null && $xmpData === null ) {
185 // Nothing to do
186 return [];
187 }
188 $bitmapMetadataHandler = new BitmapMetadataHandler;
189
190 if ( $xmpData && XMPReader::isSupported() ) {
191 $xmpReader = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
192 $xmpReader->parse( $xmpData );
193 $res = $xmpReader->getResults();
194 foreach ( $res as $type => $array ) {
195 $bitmapMetadataHandler->addMetadata( $array, $type );
196 }
197 }
198
199 if ( $exifData ) {
200 // The Exif section of a webp file is basically a tiff file without an image.
201 // Some files start with an Exif\0\0. This is wrong according to standard and
202 // will prevent us from reading file, so remove for compatibility.
203 if ( str_starts_with( $exifData, "Exif\x00\x00" ) ) {
204 $exifData = substr( $exifData, 6 );
205 }
206 $tmpFile = MediaWikiServices::getInstance()->
207 getTempFSFileFactory()->
208 newTempFSFile( 'webp-exif_', 'tiff' );
209
210 $exifDataFile = $tmpFile->getPath();
211 file_put_contents( $exifDataFile, $exifData );
212 $byteOrder = BitmapMetadataHandler::getTiffByteOrder( $exifDataFile );
213 $bitmapMetadataHandler->getExif( $exifDataFile, $byteOrder );
214 }
215 return [ 'media-metadata' => $bitmapMetadataHandler->getMetadataArray() ];
216 }
217
223 private static function extractChunk( $chunk, $filename ) {
224 if ( $chunk['size'] > self::MAX_METADATA_CHUNK_SIZE || $chunk['size'] < 1 ) {
225 return null;
226 }
227
228 // Skip first 8 bytes as that is the fourCC header followed by size of chunk.
229 return file_get_contents( $filename, false, null, $chunk['start'] + 8, $chunk['size'] );
230 }
231
237 protected static function decodeLossyChunkHeader( $header ) {
238 // Bytes 0-3 are 'VP8 '
239 // Bytes 4-7 are the VP8 stream size
240 // Bytes 8-10 are the frame tag
241 // Bytes 11-13 are 0x9D 0x01 0x2A called the sync code
242 $syncCode = substr( $header, 11, 3 );
243 if ( $syncCode !== "\x9D\x01\x2A" ) {
244 wfDebugLog( 'WebP', __METHOD__ . ': Invalid sync code: ' .
245 bin2hex( $syncCode ) );
246 return [];
247 }
248 // Bytes 14-17 are image size
249 $imageSize = unpack( 'v2', substr( $header, 14, 4 ) );
250 // Image sizes are 14 bit, 2 MSB are scaling parameters which are ignored here
251 return [
252 'compression' => 'lossy',
253 'width' => $imageSize[1] & 0x3FFF,
254 'height' => $imageSize[2] & 0x3FFF
255 ];
256 }
257
263 public static function decodeLosslessChunkHeader( $header ) {
264 // Bytes 0-3 are 'VP8L'
265 // Bytes 4-7 are chunk stream size
266 // Byte 8 is 0x2F called the signature
267 if ( $header[8] !== "\x2F" ) {
268 wfDebugLog( 'WebP', __METHOD__ . ': Invalid signature: ' .
269 bin2hex( $header[8] ) );
270 return [];
271 }
272 // Bytes 9-12 contain the image size
273 // Bits 0-13 are width-1; bits 14-27 are height-1
274 $imageSize = unpack( 'C4', substr( $header, 9, 4 ) );
275 return [
276 'compression' => 'lossless',
277 'width' => ( $imageSize[1] | ( ( $imageSize[2] & 0x3F ) << 8 ) ) + 1,
278 'height' => ( ( ( $imageSize[2] & 0xC0 ) >> 6 ) |
279 ( $imageSize[3] << 2 ) | ( ( $imageSize[4] & 0x0F ) << 10 ) ) + 1
280 ];
281 }
282
288 public static function decodeExtendedChunkHeader( $header ) {
289 // Bytes 0-3 are 'VP8X'
290 // Byte 4-7 are chunk length
291 // Byte 8-11 are a flag bytes
292 $flags = unpack( 'c', substr( $header, 8, 1 ) );
293
294 // Byte 12-17 are image size (24 bits)
295 $width = unpack( 'V', substr( $header, 12, 3 ) . "\x00" );
296 $height = unpack( 'V', substr( $header, 15, 3 ) . "\x00" );
297
298 return [
299 'compression' => 'unknown',
300 'animated' => ( $flags[1] & self::VP8X_ANIM ) === self::VP8X_ANIM,
301 'transparency' => ( $flags[1] & self::VP8X_ALPHA ) === self::VP8X_ALPHA,
302 'width' => ( $width[1] & 0xFFFFFF ) + 1,
303 'height' => ( $height[1] & 0xFFFFFF ) + 1
304 ];
305 }
306
311 public function mustRender( $file ) {
312 return true;
313 }
314
319 public function canRender( $file ) {
320 if ( $this->isAnimatedImage( $file ) ) {
321 return false;
322 }
323 return true;
324 }
325
330 public function isAnimatedImage( $image ) {
331 $metadata = $image->getMetadataArray();
332 if ( isset( $metadata['animated'] ) && $metadata['animated'] === true ) {
333 return true;
334 }
335
336 return false;
337 }
338
340 public function canAnimateThumbnail( $file ) {
341 return false;
342 }
343
352 public function getThumbType( $ext, $mime, $params = null ) {
353 return [ 'png', 'image/png' ];
354 }
355
357 protected function hasGDSupport() {
358 return function_exists( 'gd_info' ) && ( gd_info()['WebP Support'] ?? false );
359 }
360
362 public function getCommonMetaArray( File $image ) {
363 $meta = $image->getMetadataArray();
364 return $meta['media-metadata'] ?? [];
365 }
366
368 public function formatMetadata( $image, $context = false ) {
369 $meta = $this->getCommonMetaArray( $image );
370 if ( !$meta ) {
371 return false;
372 }
373
374 return $this->formatMetadataHelper( $meta, $context );
375 }
376}
377
379class_alias( WebPHandler::class, 'WebPHandler' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
getMetadataArray()
Get the unserialized handler-specific metadata STUB.
Definition File.php:778
Create PSR-3 logger objects.
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Generic handler for bitmap images.
static getTiffByteOrder( $filename)
Read the first 2 bytes of a tiff file to figure out Little Endian or Big Endian.
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
Handler for Google's WebP format https://developers.google.com/speed/webp/
isFileMetadataValid( $image)
Check if the metadata is valid for this handler.If it returns MediaHandler::METADATA_BAD (or false),...
getThumbType( $ext, $mime, $params=null)
Render files as PNG.
hasGDSupport()
Whether the php-gd extension supports this type of file.to override bool
formatMetadata( $image, $context=false)
Get an array structure that looks like this:[ 'visible' => [ 'Human-readable name' => 'Human readable...
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.to overrideThis method is currentl...
static extractMetadata( $filename)
Extracts the image size and WebP type from a file.
static decodeLossyChunkHeader( $header)
Decodes a lossy chunk header.
getCommonMetaArray(File $image)
Get an array of standard (FormatMetadata type) metadata values.The returned data is largely the same ...
static extractMetadataFromChunks( $chunks, $filename)
Extracts the image size and WebP type from a file based on the chunk list.
getSizeAndMetadata( $state, $filename)
Get image size information and metadata array.If this returns null, the caller will fall back to getI...
canAnimateThumbnail( $file)
If the material is animated, we can animate the thumbnail.1.20to overridebool If material is not anim...
static decodeExtendedChunkHeader( $header)
Decodes an extended chunk header.
static decodeLosslessChunkHeader( $header)
Decodes a lossless chunk header.
Extractor for the Resource Interchange File Format.
static findChunksFromFile( $filename, $maxChunks=-1)