MediaWiki master
BitmapMetadataHandler.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Media;
11
15use Wikimedia\XMPReader\Reader as XMPReader;
16
33 private $metadata = [];
34
36 private $metaPriority = [
37 20 => [ 'other' ],
38 40 => [ 'native' ],
39 60 => [ 'iptc-good-hash', 'iptc-no-hash' ],
40 70 => [ 'xmp-deprecated' ],
41 80 => [ 'xmp-general' ],
42 90 => [ 'xmp-exif' ],
43 100 => [ 'iptc-bad-hash' ],
44 120 => [ 'exif' ],
45 ];
46
48 private $iptcType = 'iptc-no-hash';
49
58 private function doApp13( $app13 ) {
59 try {
60 $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
61 } catch ( InvalidPSIRException $e ) {
62 // Error reading the iptc hash information.
63 // This probably means the App13 segment is something other than what we expect.
64 // However, still try to read it, and treat it as if the hash didn't exist.
65 wfDebug( "Error parsing iptc data of file: " . $e->getMessage() );
66 $this->iptcType = 'iptc-no-hash';
67 }
68
69 $iptc = IPTC::parse( $app13 );
70 $this->addMetadata( $iptc, $this->iptcType );
71 }
72
83 public function getExif( $filename, $byteOrder ) {
84 $showEXIF = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ShowEXIF );
85 if ( file_exists( $filename ) && $showEXIF ) {
86 $exif = new Exif( $filename, $byteOrder );
87 $data = $exif->getFilteredData();
88 if ( $data ) {
89 $this->addMetadata( $data, 'exif' );
90 }
91 }
92 }
93
100 public function addMetadata( $metaArray, $type = 'other' ) {
101 if ( isset( $this->metadata[$type] ) ) {
102 /* merge with old data */
103 $metaArray += $this->metadata[$type];
104 }
105
106 $this->metadata[$type] = $metaArray;
107 }
108
118 public function getMetadataArray() {
119 // this seems a bit ugly... This is all so its merged in right order
120 // based on the MWG recommendation.
121 $temp = [];
122 krsort( $this->metaPriority );
123 foreach ( $this->metaPriority as $pri ) {
124 foreach ( $pri as $type ) {
125 if ( isset( $this->metadata[$type] ) ) {
126 // Do some special casing for multilingual values.
127 // Don't discard translations if also as a simple value.
128 foreach ( $this->metadata[$type] as $itemName => $item ) {
129 if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' &&
130 isset( $temp[$itemName] ) && !is_array( $temp[$itemName] )
131 ) {
132 $default = $temp[$itemName];
133 $temp[$itemName] = $item;
134 $temp[$itemName]['x-default'] = $default;
135 unset( $this->metadata[$type][$itemName] );
136 }
137 }
138
139 $temp += $this->metadata[$type];
140 }
141 }
142 }
143
144 return $temp;
145 }
146
153 public static function Jpeg( $filename ) {
154 $showXMP = XMPReader::isSupported();
155 $meta = new self();
156
157 $seg = JpegMetadataExtractor::segmentSplitter( $filename );
158
159 if ( isset( $seg['SOF'] ) ) {
160 $meta->addMetadata( [ 'SOF' => $seg['SOF'] ] );
161 }
162 if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
163 $meta->addMetadata( [ 'JPEGFileComment' => $seg['COM'] ], 'native' );
164 }
165 if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) {
166 foreach ( $seg['PSIR'] as $curPSIRValue ) {
167 $meta->doApp13( $curPSIRValue );
168 }
169 }
170 if ( isset( $seg['XMP'] ) && $showXMP ) {
171 $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
172 $xmp->parse( $seg['XMP'] );
173 foreach ( $seg['XMP_ext'] as $xmpExt ) {
174 /* Support for extended xmp in jpeg files
175 * is not well tested and a bit fragile.
176 */
177 $xmp->parseExtended( $xmpExt );
178 }
179 $res = $xmp->getResults();
180 foreach ( $res as $type => $array ) {
181 $meta->addMetadata( $array, $type );
182 }
183 }
184
185 $meta->getExif( $filename, $seg['byteOrder'] ?? 'BE' );
186
187 return $meta->getMetadataArray();
188 }
189
198 public static function PNG( $filename ) {
199 $showXMP = XMPReader::isSupported();
200
201 $meta = new self();
202 $array = PNGMetadataExtractor::getMetadata( $filename );
203 if ( isset( $array['text']['xmp']['x-default'] )
204 && $array['text']['xmp']['x-default'] !== '' && $showXMP
205 ) {
206 $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
207 $xmp->parse( $array['text']['xmp']['x-default'] );
208 $xmpRes = $xmp->getResults();
209 foreach ( $xmpRes as $type => $xmpSection ) {
210 $meta->addMetadata( $xmpSection, $type );
211 }
212 }
213
214 if ( $array['exif'] ) {
215 // The Exif section is essentially an embedded tiff file,
216 // so just extract it and read it.
217 $tmpFile = MediaWikiServices::getInstance()->
218 getTempFSFileFactory()->
219 newTempFSFile( 'png-exif_', 'tiff' );
220 $exifDataFile = $tmpFile->getPath();
221 file_put_contents( $exifDataFile, $array['exif'] );
222 $byteOrder = self::getTiffByteOrder( $exifDataFile );
223 $meta->getExif( $exifDataFile, $byteOrder );
224 }
225 unset( $array['exif'] );
226 unset( $array['text']['xmp'] );
227 $meta->addMetadata( $array['text'], 'native' );
228 unset( $array['text'] );
229 $array['metadata'] = $meta->getMetadataArray();
230 $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION;
231
232 return $array;
233 }
234
243 public static function GIF( $filename ) {
244 $meta = new self();
245 $baseArray = GIFMetadataExtractor::getMetadata( $filename );
246
247 if ( count( $baseArray['comment'] ) > 0 ) {
248 $meta->addMetadata( [ 'GIFFileComment' => $baseArray['comment'] ], 'native' );
249 }
250
251 if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) {
252 $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
253 $xmp->parse( $baseArray['xmp'] );
254 $xmpRes = $xmp->getResults();
255 foreach ( $xmpRes as $type => $xmpSection ) {
256 $meta->addMetadata( $xmpSection, $type );
257 }
258 }
259
260 unset( $baseArray['comment'] );
261 unset( $baseArray['xmp'] );
262
263 $baseArray['metadata'] = $meta->getMetadataArray();
264 $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
265
266 return $baseArray;
267 }
268
282 public static function Tiff( $filename ) {
283 if ( file_exists( $filename ) ) {
284 $byteOrder = self::getTiffByteOrder( $filename );
285 if ( !$byteOrder ) {
286 throw new InvalidTiffException(
287 'Error determining byte order of {filename}',
288 [ 'filename' => $filename ]
289 );
290 }
291 $exif = new Exif( $filename, $byteOrder );
292 $data = $exif->getFilteredData();
293 if ( $data ) {
294 $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
295
296 return $data;
297 } else {
298 throw new InvalidTiffException(
299 'Could not extract data from tiff file {filename}',
300 [ 'filename' => $filename ]
301 );
302 }
303 } else {
304 throw new InvalidTiffException(
305 "File {filename} doesn't exist",
306 [ 'filename' => $filename ]
307 );
308 }
309 }
310
318 public static function getTiffByteOrder( $filename ) {
319 $fh = fopen( $filename, 'rb' );
320 if ( !$fh ) {
321 return false;
322 }
323 $head = fread( $fh, 2 );
324 fclose( $fh );
325
326 switch ( $head ) {
327 case 'II':
328 return 'LE'; // II for intel.
329 case 'MM':
330 return 'BE'; // MM for motorla.
331 default:
332 return false; // Something went wrong.
333
334 }
335 }
336}
337
339class_alias( BitmapMetadataHandler::class, 'BitmapMetadataHandler' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ShowEXIF
Name constant for the ShowEXIF setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Class to deal with reconciling and extracting metadata from bitmap images.
static Jpeg( $filename)
Main entry point for jpeg's.
getExif( $filename, $byteOrder)
Get exif info using exif class.
getMetadataArray()
Merge together the various types of metadata the different types have different priorities,...
addMetadata( $metaArray, $type='other')
Add misc metadata.
static PNG( $filename)
Entry point for png At some point in the future this might merge the png various tEXt chunks to that ...
static Tiff( $filename)
This doesn't do much yet, but eventually I plan to add XMP support for Tiff.
static GIF( $filename)
function for gif images.
static getTiffByteOrder( $filename)
Read the first 2 bytes of a tiff file to figure out Little Endian or Big Endian.
Class to extract and validate Exif data from jpeg (and possibly tiff) files.
Definition Exif.php:22
static version()
The version of the output format.
Definition Exif.php:691
static parse( $rawData)
This takes the results of iptcparse() and puts it into a form that can be handled by mediawiki.
Definition IPTC.php:30
static segmentSplitter( $filename)
Function to extract metadata segments of interest from jpeg files based on GIFMetadataExtractor.
static doPSIR( $app13)
This reads the Photoshop image resource.