MediaWiki master
WebPHandler.php
Go to the documentation of this file.
1<?php
26use Wikimedia\XMPReader\Reader as XMPReader;
27
37 private const BROKEN_FILE = '0';
41 private const MINIMUM_CHUNK_HEADER_LENGTH = 18;
45 private const MAX_METADATA_CHUNK_SIZE = 1024 * 1024 * 2;
49 private const _MW_WEBP_VERSION = 2;
50
51 private const VP8X_ICC = 32;
52 private const VP8X_ALPHA = 16;
53 private const VP8X_EXIF = 8;
54 private const VP8X_XMP = 4;
55 private const VP8X_ANIM = 2;
56
57 public function getSizeAndMetadata( $state, $filename ) {
58 $parsedWebPData = self::extractMetadata( $filename );
59 if ( !$parsedWebPData ) {
60 return [ 'metadata' => [ '_error' => self::BROKEN_FILE ] ];
61 }
62
63 $parsedWebPData['metadata']['_MW_WEBP_VERSION'] = self::_MW_WEBP_VERSION;
64 $info = [
65 'width' => $parsedWebPData['width'],
66 'height' => $parsedWebPData['height'],
67 'metadata' => $parsedWebPData
68 ];
69 return $info;
70 }
71
72 public function getMetadataType( $image ) {
73 return 'parsed-webp';
74 }
75
76 public function isFileMetadataValid( $image ) {
77 $data = $image->getMetadataArray();
78 if ( $data === [ '_error' => self::BROKEN_FILE ] ) {
79 // Do not repetitivly regenerate metadata on broken file.
81 }
82
83 if ( !$data || !isset( $data['_error'] ) ) {
84 wfDebug( __METHOD__ . " invalid WebP metadata" );
85
86 return self::METADATA_BAD;
87 }
88
89 if ( !isset( $data['metadata']['_MW_WEBP_VERSION'] )
90 || $data['metadata']['_MW_WEBP_VERSION'] != self::_MW_WEBP_VERSION
91 ) {
92 wfDebug( __METHOD__ . " old but compatible WebP metadata" );
93
95 }
97 }
98
107 public static function extractMetadata( $filename ) {
108 wfDebugLog( 'WebP', __METHOD__ . ": Extracting metadata from $filename" );
109
110 $info = RiffExtractor::findChunksFromFile( $filename, 100 );
111 if ( $info === false ) {
112 wfDebugLog( 'WebP', __METHOD__ . ": Not a valid RIFF file" );
113 return false;
114 }
115
116 if ( $info['fourCC'] !== 'WEBP' ) {
117 wfDebugLog( 'WebP', __METHOD__ . ': FourCC was not WEBP: ' .
118 bin2hex( $info['fourCC'] ) );
119 return false;
120 }
121 $metadata = self::extractMetadataFromChunks( $info['chunks'], $filename );
122 if ( !$metadata ) {
123 wfDebugLog( 'WebP', __METHOD__ . ": No VP8 chunks found" );
124 return false;
125 }
126
127 return $metadata;
128 }
129
137 public static function extractMetadataFromChunks( $chunks, $filename ) {
138 $vp8Info = [];
139 $exifData = null;
140 $xmpData = null;
141
142 foreach ( $chunks as $chunk ) {
143 // Note, spec says it should be 'XMP ' but some real life files use "XMP\0"
144 if ( !in_array( $chunk['fourCC'], [ 'VP8 ', 'VP8L', 'VP8X', 'EXIF', 'XMP ', "XMP\0" ] ) ) {
145 // Not a chunk containing interesting metadata
146 continue;
147 }
148
149 $chunkHeader = file_get_contents( $filename, false, null,
150 $chunk['start'], self::MINIMUM_CHUNK_HEADER_LENGTH );
151 wfDebugLog( 'WebP', __METHOD__ . ": {$chunk['fourCC']}" );
152
153 switch ( $chunk['fourCC'] ) {
154 case 'VP8 ':
155 $vp8Info = array_merge( $vp8Info,
156 self::decodeLossyChunkHeader( $chunkHeader ) );
157 break;
158 case 'VP8L':
159 $vp8Info = array_merge( $vp8Info,
160 self::decodeLosslessChunkHeader( $chunkHeader ) );
161 break;
162 case 'VP8X':
163 $vp8Info = array_merge( $vp8Info,
164 self::decodeExtendedChunkHeader( $chunkHeader ) );
165 // Continue looking for other chunks to improve the metadata
166 break;
167 case 'EXIF':
168 // Spec says ignore all but first one
169 $exifData ??= self::extractChunk( $chunk, $filename );
170 break;
171 case 'XMP ':
172 case "XMP\0":
173 $xmpData ??= self::extractChunk( $chunk, $filename );
174 break;
175 }
176 }
177 $vp8Info = array_merge( $vp8Info,
178 self::decodeMediaMetadata( $exifData, $xmpData, $filename ) );
179 return $vp8Info;
180 }
181
190 private static function decodeMediaMetadata( $exifData, $xmpData, $filename ) {
191 if ( $exifData === null && $xmpData === null ) {
192 // Nothing to do
193 return [];
194 }
195 $bitmapMetadataHandler = new BitmapMetadataHandler;
196
197 if ( $xmpData && XMPReader::isSupported() ) {
198 $xmpReader = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
199 $xmpReader->parse( $xmpData );
200 $res = $xmpReader->getResults();
201 foreach ( $res as $type => $array ) {
202 $bitmapMetadataHandler->addMetadata( $array, $type );
203 }
204 }
205
206 if ( $exifData ) {
207 // The Exif section of a webp file is basically a tiff file without an image.
208 // Some files start with an Exif\0\0. This is wrong according to standard and
209 // will prevent us from reading file, so remove for compatibility.
210 if ( substr( $exifData, 0, 6 ) === "Exif\x00\x00" ) {
211 $exifData = substr( $exifData, 6 );
212 }
213 $tmpFile = MediaWikiServices::getInstance()->
214 getTempFSFileFactory()->
215 newTempFSFile( 'webp-exif_', 'tiff' );
216
217 $exifDataFile = $tmpFile->getPath();
218 file_put_contents( $exifDataFile, $exifData );
219 $byteOrder = BitmapMetadataHandler::getTiffByteOrder( $exifDataFile );
220 $bitmapMetadataHandler->getExif( $exifDataFile, $byteOrder );
221 }
222 return [ 'media-metadata' => $bitmapMetadataHandler->getMetadataArray() ];
223 }
224
230 private static function extractChunk( $chunk, $filename ) {
231 if ( $chunk['size'] > self::MAX_METADATA_CHUNK_SIZE || $chunk['size'] < 1 ) {
232 return null;
233 }
234
235 // Skip first 8 bytes as that is the fourCC header followed by size of chunk.
236 return file_get_contents( $filename, false, null, $chunk['start'] + 8, $chunk['size'] );
237 }
238
244 protected static function decodeLossyChunkHeader( $header ) {
245 // Bytes 0-3 are 'VP8 '
246 // Bytes 4-7 are the VP8 stream size
247 // Bytes 8-10 are the frame tag
248 // Bytes 11-13 are 0x9D 0x01 0x2A called the sync code
249 $syncCode = substr( $header, 11, 3 );
250 if ( $syncCode !== "\x9D\x01\x2A" ) {
251 wfDebugLog( 'WebP', __METHOD__ . ': Invalid sync code: ' .
252 bin2hex( $syncCode ) );
253 return [];
254 }
255 // Bytes 14-17 are image size
256 $imageSize = unpack( 'v2', substr( $header, 14, 4 ) );
257 // Image sizes are 14 bit, 2 MSB are scaling parameters which are ignored here
258 return [
259 'compression' => 'lossy',
260 'width' => $imageSize[1] & 0x3FFF,
261 'height' => $imageSize[2] & 0x3FFF
262 ];
263 }
264
270 public static function decodeLosslessChunkHeader( $header ) {
271 // Bytes 0-3 are 'VP8L'
272 // Bytes 4-7 are chunk stream size
273 // Byte 8 is 0x2F called the signature
274 if ( $header[8] !== "\x2F" ) {
275 wfDebugLog( 'WebP', __METHOD__ . ': Invalid signature: ' .
276 bin2hex( $header[8] ) );
277 return [];
278 }
279 // Bytes 9-12 contain the image size
280 // Bits 0-13 are width-1; bits 15-27 are height-1
281 $imageSize = unpack( 'C4', substr( $header, 9, 4 ) );
282 return [
283 'compression' => 'lossless',
284 'width' => ( $imageSize[1] | ( ( $imageSize[2] & 0x3F ) << 8 ) ) + 1,
285 'height' => ( ( ( $imageSize[2] & 0xC0 ) >> 6 ) |
286 ( $imageSize[3] << 2 ) | ( ( $imageSize[4] & 0x03 ) << 10 ) ) + 1
287 ];
288 }
289
295 public static function decodeExtendedChunkHeader( $header ) {
296 // Bytes 0-3 are 'VP8X'
297 // Byte 4-7 are chunk length
298 // Byte 8-11 are a flag bytes
299 $flags = unpack( 'c', substr( $header, 8, 1 ) );
300
301 // Byte 12-17 are image size (24 bits)
302 $width = unpack( 'V', substr( $header, 12, 3 ) . "\x00" );
303 $height = unpack( 'V', substr( $header, 15, 3 ) . "\x00" );
304
305 return [
306 'compression' => 'unknown',
307 'animated' => ( $flags[1] & self::VP8X_ANIM ) === self::VP8X_ANIM,
308 'transparency' => ( $flags[1] & self::VP8X_ALPHA ) === self::VP8X_ALPHA,
309 'width' => ( $width[1] & 0xFFFFFF ) + 1,
310 'height' => ( $height[1] & 0xFFFFFF ) + 1
311 ];
312 }
313
318 public function mustRender( $file ) {
319 return true;
320 }
321
326 public function canRender( $file ) {
327 if ( $this->isAnimatedImage( $file ) ) {
328 return false;
329 }
330 return true;
331 }
332
337 public function isAnimatedImage( $image ) {
338 $metadata = $image->getMetadataArray();
339 if ( isset( $metadata['animated'] ) && $metadata['animated'] === true ) {
340 return true;
341 }
342
343 return false;
344 }
345
346 public function canAnimateThumbnail( $file ) {
347 return false;
348 }
349
358 public function getThumbType( $ext, $mime, $params = null ) {
359 return [ 'png', 'image/png' ];
360 }
361
362 protected function hasGDSupport() {
363 return function_exists( 'gd_info' ) && ( gd_info()['WebP Support'] ?? false );
364 }
365
366 public function getCommonMetaArray( File $image ) {
367 $meta = $image->getMetadataArray();
368 return $meta['media-metadata'] ?? [];
369 }
370
371 public function formatMetadata( $image, $context = false ) {
372 $meta = $this->getCommonMetaArray( $image );
373 if ( !$meta ) {
374 return false;
375 }
376
377 return $this->formatMetadataHelper( $meta, $context );
378 }
379}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
array $params
The job parameters.
Generic handler for bitmap images.
Class to deal with reconciling and extracting metadata from bitmap images.
static getTiffByteOrder( $filename)
Read the first 2 bytes of a tiff file to figure out Little Endian or Big Endian.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
getMetadataArray()
Get the unserialized handler-specific metadata STUB.
Definition File.php:777
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
Create PSR-3 logger objects.
Service locator for MediaWiki core services.
static findChunksFromFile( $filename, $maxChunks=-1)
Handler for Google's WebP format https://developers.google.com/speed/webp/
mustRender( $file)
getCommonMetaArray(File $image)
Get an array of standard (FormatMetadata type) metadata values.
canAnimateThumbnail( $file)
If the material is animated, we can animate the thumbnail.
isFileMetadataValid( $image)
Check if the metadata is valid for this handler.
static extractMetadataFromChunks( $chunks, $filename)
Extracts the image size and WebP type from a file based on the chunk list.
isAnimatedImage( $image)
static decodeLossyChunkHeader( $header)
Decodes a lossy chunk header.
getSizeAndMetadata( $state, $filename)
Get image size information and metadata array.
canRender( $file)
formatMetadata( $image, $context=false)
Get an array structure that looks like this:
getThumbType( $ext, $mime, $params=null)
Render files as PNG.
static decodeLosslessChunkHeader( $header)
Decodes a lossless chunk header.
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
hasGDSupport()
Whether the php-gd extension supports this type of file.
static extractMetadata( $filename)
Extracts the image size and WebP type from a file.
static decodeExtendedChunkHeader( $header)
Decodes an extended chunk header.
$header