MediaWiki master
WebPHandler.php
Go to the documentation of this file.
1<?php
26use Wikimedia\XMPReader\Reader as XMPReader;
27
37 private const BROKEN_FILE = '0';
41 private const MINIMUM_CHUNK_HEADER_LENGTH = 18;
45 private const MAX_METADATA_CHUNK_SIZE = 1024 * 1024 * 2;
49 private const _MW_WEBP_VERSION = 2;
50
51 private const VP8X_ICC = 32;
52 private const VP8X_ALPHA = 16;
53 private const VP8X_EXIF = 8;
54 private const VP8X_XMP = 4;
55 private const VP8X_ANIM = 2;
56
57 public function getSizeAndMetadata( $state, $filename ) {
58 $parsedWebPData = self::extractMetadata( $filename );
59 if ( !$parsedWebPData ) {
60 return [ 'metadata' => [ '_error' => self::BROKEN_FILE ] ];
61 }
62
63 $parsedWebPData['metadata']['_MW_WEBP_VERSION'] = self::_MW_WEBP_VERSION;
64 $info = [
65 'width' => $parsedWebPData['width'],
66 'height' => $parsedWebPData['height'],
67 'metadata' => $parsedWebPData
68 ];
69 return $info;
70 }
71
72 public function getMetadataType( $image ) {
73 return 'parsed-webp';
74 }
75
76 public function isFileMetadataValid( $image ) {
77 $data = $image->getMetadataArray();
78 if ( $data === [ '_error' => self::BROKEN_FILE ] ) {
79 // Do not repetitivly regenerate metadata on broken file.
81 }
82
83 if ( !$data || !isset( $data['_error'] ) ) {
84 wfDebug( __METHOD__ . " invalid WebP metadata" );
85
86 return self::METADATA_BAD;
87 }
88
89 if ( !isset( $data['metadata']['_MW_WEBP_VERSION'] )
90 || $data['metadata']['_MW_WEBP_VERSION'] != self::_MW_WEBP_VERSION
91 ) {
92 wfDebug( __METHOD__ . " old but compatible WebP metadata" );
93
95 }
97 }
98
107 public static function extractMetadata( $filename ) {
108 wfDebugLog( 'WebP', __METHOD__ . ": Extracting metadata from $filename" );
109
110 $info = RiffExtractor::findChunksFromFile( $filename, 100 );
111 if ( $info === false ) {
112 wfDebugLog( 'WebP', __METHOD__ . ": Not a valid RIFF file" );
113 return false;
114 }
115
116 if ( $info['fourCC'] !== 'WEBP' ) {
117 wfDebugLog( 'WebP', __METHOD__ . ': FourCC was not WEBP: ' .
118 bin2hex( $info['fourCC'] ) );
119 return false;
120 }
121 $metadata = self::extractMetadataFromChunks( $info['chunks'], $filename );
122 if ( !$metadata ) {
123 wfDebugLog( 'WebP', __METHOD__ . ": No VP8 chunks found" );
124 return false;
125 }
126
127 return $metadata;
128 }
129
137 public static function extractMetadataFromChunks( $chunks, $filename ) {
138 $vp8Info = [];
139 $exifData = null;
140 $xmpData = null;
141
142 foreach ( $chunks as $chunk ) {
143 // Note, spec says it should be 'XMP ' but some real life files use "XMP\0"
144 if ( !in_array( $chunk['fourCC'], [ 'VP8 ', 'VP8L', 'VP8X', 'EXIF', 'XMP ', "XMP\0" ] ) ) {
145 // Not a chunk containing interesting metadata
146 continue;
147 }
148
149 $chunkHeader = file_get_contents( $filename, false, null,
150 $chunk['start'], self::MINIMUM_CHUNK_HEADER_LENGTH );
151 wfDebugLog( 'WebP', __METHOD__ . ": {$chunk['fourCC']}" );
152
153 switch ( $chunk['fourCC'] ) {
154 case 'VP8 ':
155 $vp8Info = array_merge( $vp8Info,
156 self::decodeLossyChunkHeader( $chunkHeader ) );
157 break;
158 case 'VP8L':
159 $vp8Info = array_merge( $vp8Info,
160 self::decodeLosslessChunkHeader( $chunkHeader ) );
161 break;
162 case 'VP8X':
163 $vp8Info = array_merge( $vp8Info,
164 self::decodeExtendedChunkHeader( $chunkHeader ) );
165 // Continue looking for other chunks to improve the metadata
166 break;
167 case 'EXIF':
168 // Spec says ignore all but first one
169 if ( $exifData === null ) {
170 $exifData = self::extractChunk( $chunk, $filename );
171 }
172 break;
173 case 'XMP ':
174 case "XMP\0":
175 if ( $xmpData === null ) {
176 $xmpData = self::extractChunk( $chunk, $filename );
177 }
178 break;
179 }
180 }
181 $vp8Info = array_merge( $vp8Info,
182 self::decodeMediaMetadata( $exifData, $xmpData, $filename ) );
183 return $vp8Info;
184 }
185
194 private static function decodeMediaMetadata( $exifData, $xmpData, $filename ) {
195 if ( $exifData === null && $xmpData === null ) {
196 // Nothing to do
197 return [];
198 }
199 $bitmapMetadataHandler = new BitmapMetadataHandler;
200
201 if ( $xmpData && XMPReader::isSupported() ) {
202 $xmpReader = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
203 $xmpReader->parse( $xmpData );
204 $res = $xmpReader->getResults();
205 foreach ( $res as $type => $array ) {
206 $bitmapMetadataHandler->addMetadata( $array, $type );
207 }
208 }
209
210 if ( $exifData ) {
211 // The Exif section of a webp file is basically a tiff file without an image.
212 // Some files start with an Exif\0\0. This is wrong according to standard and
213 // will prevent us from reading file, so remove for compatibility.
214 if ( substr( $exifData, 0, 6 ) === "Exif\x00\x00" ) {
215 $exifData = substr( $exifData, 6 );
216 }
217 $tmpFile = MediaWikiServices::getInstance()->
218 getTempFSFileFactory()->
219 newTempFSFile( 'webp-exif_', 'tiff' );
220
221 $exifDataFile = $tmpFile->getPath();
222 file_put_contents( $exifDataFile, $exifData );
223 $byteOrder = BitmapMetadataHandler::getTiffByteOrder( $exifDataFile );
224 $bitmapMetadataHandler->getExif( $exifDataFile, $byteOrder );
225 }
226 return [ 'media-metadata' => $bitmapMetadataHandler->getMetadataArray() ];
227 }
228
234 private static function extractChunk( $chunk, $filename ) {
235 if ( $chunk['size'] > self::MAX_METADATA_CHUNK_SIZE || $chunk['size'] < 1 ) {
236 return null;
237 }
238
239 // Skip first 8 bytes as that is the fourCC header followed by size of chunk.
240 return file_get_contents( $filename, false, null, $chunk['start'] + 8, $chunk['size'] );
241 }
242
248 protected static function decodeLossyChunkHeader( $header ) {
249 // Bytes 0-3 are 'VP8 '
250 // Bytes 4-7 are the VP8 stream size
251 // Bytes 8-10 are the frame tag
252 // Bytes 11-13 are 0x9D 0x01 0x2A called the sync code
253 $syncCode = substr( $header, 11, 3 );
254 if ( $syncCode !== "\x9D\x01\x2A" ) {
255 wfDebugLog( 'WebP', __METHOD__ . ': Invalid sync code: ' .
256 bin2hex( $syncCode ) );
257 return [];
258 }
259 // Bytes 14-17 are image size
260 $imageSize = unpack( 'v2', substr( $header, 14, 4 ) );
261 // Image sizes are 14 bit, 2 MSB are scaling parameters which are ignored here
262 return [
263 'compression' => 'lossy',
264 'width' => $imageSize[1] & 0x3FFF,
265 'height' => $imageSize[2] & 0x3FFF
266 ];
267 }
268
274 public static function decodeLosslessChunkHeader( $header ) {
275 // Bytes 0-3 are 'VP8L'
276 // Bytes 4-7 are chunk stream size
277 // Byte 8 is 0x2F called the signature
278 if ( $header[8] !== "\x2F" ) {
279 wfDebugLog( 'WebP', __METHOD__ . ': Invalid signature: ' .
280 bin2hex( $header[8] ) );
281 return [];
282 }
283 // Bytes 9-12 contain the image size
284 // Bits 0-13 are width-1; bits 15-27 are height-1
285 $imageSize = unpack( 'C4', substr( $header, 9, 4 ) );
286 return [
287 'compression' => 'lossless',
288 'width' => ( $imageSize[1] | ( ( $imageSize[2] & 0x3F ) << 8 ) ) + 1,
289 'height' => ( ( ( $imageSize[2] & 0xC0 ) >> 6 ) |
290 ( $imageSize[3] << 2 ) | ( ( $imageSize[4] & 0x03 ) << 10 ) ) + 1
291 ];
292 }
293
299 public static function decodeExtendedChunkHeader( $header ) {
300 // Bytes 0-3 are 'VP8X'
301 // Byte 4-7 are chunk length
302 // Byte 8-11 are a flag bytes
303 $flags = unpack( 'c', substr( $header, 8, 1 ) );
304
305 // Byte 12-17 are image size (24 bits)
306 $width = unpack( 'V', substr( $header, 12, 3 ) . "\x00" );
307 $height = unpack( 'V', substr( $header, 15, 3 ) . "\x00" );
308
309 return [
310 'compression' => 'unknown',
311 'animated' => ( $flags[1] & self::VP8X_ANIM ) === self::VP8X_ANIM,
312 'transparency' => ( $flags[1] & self::VP8X_ALPHA ) === self::VP8X_ALPHA,
313 'width' => ( $width[1] & 0xFFFFFF ) + 1,
314 'height' => ( $height[1] & 0xFFFFFF ) + 1
315 ];
316 }
317
322 public function mustRender( $file ) {
323 return true;
324 }
325
330 public function canRender( $file ) {
331 if ( $this->isAnimatedImage( $file ) ) {
332 return false;
333 }
334 return true;
335 }
336
341 public function isAnimatedImage( $image ) {
342 $metadata = $image->getMetadataArray();
343 if ( isset( $metadata['animated'] ) && $metadata['animated'] === true ) {
344 return true;
345 }
346
347 return false;
348 }
349
350 public function canAnimateThumbnail( $file ) {
351 return false;
352 }
353
362 public function getThumbType( $ext, $mime, $params = null ) {
363 return [ 'png', 'image/png' ];
364 }
365
366 protected function hasGDSupport() {
367 return function_exists( 'gd_info' ) && ( gd_info()['WebP Support'] ?? false );
368 }
369
370 public function getCommonMetaArray( File $image ) {
371 $meta = $image->getMetadataArray();
372 return $meta['media-metadata'] ?? [];
373 }
374
375 public function formatMetadata( $image, $context = false ) {
376 $meta = $this->getCommonMetaArray( $image );
377 if ( !$meta ) {
378 return false;
379 }
380
381 return $this->formatMetadataHelper( $meta, $context );
382 }
383}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
array $params
The job parameters.
Generic handler for bitmap images.
Class to deal with reconciling and extracting metadata from bitmap images.
static getTiffByteOrder( $filename)
Read the first 2 bytes of a tiff file to figure out Little Endian or Big Endian.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:74
getMetadataArray()
Get the unserialized handler-specific metadata STUB.
Definition File.php:760
const METADATA_COMPATIBLE
formatMetadataHelper( $metadataArray, $context=false)
sorts the visible/invisible field.
const METADATA_GOOD
Create PSR-3 logger objects.
Service locator for MediaWiki core services.
static findChunksFromFile( $filename, $maxChunks=-1)
Handler for Google's WebP format https://developers.google.com/speed/webp/
mustRender( $file)
getCommonMetaArray(File $image)
Get an array of standard (FormatMetadata type) metadata values.
canAnimateThumbnail( $file)
If the material is animated, we can animate the thumbnail.
isFileMetadataValid( $image)
Check if the metadata is valid for this handler.
static extractMetadataFromChunks( $chunks, $filename)
Extracts the image size and WebP type from a file based on the chunk list.
isAnimatedImage( $image)
static decodeLossyChunkHeader( $header)
Decodes a lossy chunk header.
getSizeAndMetadata( $state, $filename)
Get image size information and metadata array.
canRender( $file)
formatMetadata( $image, $context=false)
Get an array structure that looks like this:
getThumbType( $ext, $mime, $params=null)
Render files as PNG.
static decodeLosslessChunkHeader( $header)
Decodes a lossless chunk header.
getMetadataType( $image)
Get a string describing the type of metadata, for display purposes.
hasGDSupport()
Whether the php-gd extension supports this type of file.
static extractMetadata( $filename)
Extracts the image size and WebP type from a file.
static decodeExtendedChunkHeader( $header)
Decodes an extended chunk header.
$header