MediaWiki  master
BitmapMetadataHandler.php
Go to the documentation of this file.
1 <?php
27 use Wikimedia\RequestTimeout\TimeoutException;
28 use Wikimedia\XMPReader\Reader as XMPReader;
29 
46  private $metadata = [];
47 
49  private $metaPriority = [
50  20 => [ 'other' ],
51  40 => [ 'native' ],
52  60 => [ 'iptc-good-hash', 'iptc-no-hash' ],
53  70 => [ 'xmp-deprecated' ],
54  80 => [ 'xmp-general' ],
55  90 => [ 'xmp-exif' ],
56  100 => [ 'iptc-bad-hash' ],
57  120 => [ 'exif' ],
58  ];
59 
61  private $iptcType = 'iptc-no-hash';
62 
71  private function doApp13( $app13 ) {
72  try {
73  $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
74  } catch ( TimeoutException $e ) {
75  throw $e;
76  } catch ( Exception $e ) {
77  // Error reading the iptc hash information.
78  // This probably means the App13 segment is something other than what we expect.
79  // However, still try to read it, and treat it as if the hash didn't exist.
80  wfDebug( "Error parsing iptc data of file: " . $e->getMessage() );
81  $this->iptcType = 'iptc-no-hash';
82  }
83 
84  $iptc = IPTC::parse( $app13 );
85  $this->addMetadata( $iptc, $this->iptcType );
86  }
87 
98  public function getExif( $filename, $byteOrder ) {
99  $showEXIF = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ShowEXIF );
100  if ( file_exists( $filename ) && $showEXIF ) {
101  $exif = new Exif( $filename, $byteOrder );
102  $data = $exif->getFilteredData();
103  if ( $data ) {
104  $this->addMetadata( $data, 'exif' );
105  }
106  }
107  }
108 
115  public function addMetadata( $metaArray, $type = 'other' ) {
116  if ( isset( $this->metadata[$type] ) ) {
117  /* merge with old data */
118  $metaArray += $this->metadata[$type];
119  }
120 
121  $this->metadata[$type] = $metaArray;
122  }
123 
133  public function getMetadataArray() {
134  // this seems a bit ugly... This is all so its merged in right order
135  // based on the MWG recommendation.
136  $temp = [];
137  krsort( $this->metaPriority );
138  foreach ( $this->metaPriority as $pri ) {
139  foreach ( $pri as $type ) {
140  if ( isset( $this->metadata[$type] ) ) {
141  // Do some special casing for multilingual values.
142  // Don't discard translations if also as a simple value.
143  foreach ( $this->metadata[$type] as $itemName => $item ) {
144  if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' &&
145  isset( $temp[$itemName] ) && !is_array( $temp[$itemName] )
146  ) {
147  $default = $temp[$itemName];
148  $temp[$itemName] = $item;
149  $temp[$itemName]['x-default'] = $default;
150  unset( $this->metadata[$type][$itemName] );
151  }
152  }
153 
154  $temp += $this->metadata[$type];
155  }
156  }
157  }
158 
159  return $temp;
160  }
161 
168  public static function Jpeg( $filename ) {
169  $showXMP = XMPReader::isSupported();
170  $meta = new self();
171 
172  $seg = JpegMetadataExtractor::segmentSplitter( $filename );
173 
174  if ( isset( $seg['SOF'] ) ) {
175  $meta->addMetadata( [ 'SOF' => $seg['SOF'] ] );
176  }
177  if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
178  $meta->addMetadata( [ 'JPEGFileComment' => $seg['COM'] ], 'native' );
179  }
180  if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) {
181  foreach ( $seg['PSIR'] as $curPSIRValue ) {
182  $meta->doApp13( $curPSIRValue );
183  }
184  }
185  if ( isset( $seg['XMP'] ) && $showXMP ) {
186  $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
187  $xmp->parse( $seg['XMP'] );
188  foreach ( $seg['XMP_ext'] as $xmpExt ) {
189  /* Support for extended xmp in jpeg files
190  * is not well tested and a bit fragile.
191  */
192  $xmp->parseExtended( $xmpExt );
193  }
194  $res = $xmp->getResults();
195  foreach ( $res as $type => $array ) {
196  $meta->addMetadata( $array, $type );
197  }
198  }
199 
200  $meta->getExif( $filename, $seg['byteOrder'] ?? 'BE' );
201 
202  return $meta->getMetadataArray();
203  }
204 
213  public static function PNG( $filename ) {
214  $showXMP = XMPReader::isSupported();
215 
216  $meta = new self();
217  $array = PNGMetadataExtractor::getMetadata( $filename );
218  if ( isset( $array['text']['xmp']['x-default'] )
219  && $array['text']['xmp']['x-default'] !== '' && $showXMP
220  ) {
221  $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
222  $xmp->parse( $array['text']['xmp']['x-default'] );
223  $xmpRes = $xmp->getResults();
224  foreach ( $xmpRes as $type => $xmpSection ) {
225  $meta->addMetadata( $xmpSection, $type );
226  }
227  }
228  unset( $array['text']['xmp'] );
229  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset xmp is not alone in text, makes text always set
230  $meta->addMetadata( $array['text'], 'native' );
231  unset( $array['text'] );
232  $array['metadata'] = $meta->getMetadataArray();
233  $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION;
234 
235  return $array;
236  }
237 
246  public static function GIF( $filename ) {
247  $meta = new self();
248  $baseArray = GIFMetadataExtractor::getMetadata( $filename );
249 
250  if ( count( $baseArray['comment'] ) > 0 ) {
251  $meta->addMetadata( [ 'GIFFileComment' => $baseArray['comment'] ], 'native' );
252  }
253 
254  if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) {
255  $xmp = new XMPReader( LoggerFactory::getInstance( 'XMP' ), $filename );
256  $xmp->parse( $baseArray['xmp'] );
257  $xmpRes = $xmp->getResults();
258  foreach ( $xmpRes as $type => $xmpSection ) {
259  $meta->addMetadata( $xmpSection, $type );
260  }
261  }
262 
263  unset( $baseArray['comment'] );
264  unset( $baseArray['xmp'] );
265 
266  $baseArray['metadata'] = $meta->getMetadataArray();
267  $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
268 
269  return $baseArray;
270  }
271 
285  public static function Tiff( $filename ) {
286  if ( file_exists( $filename ) ) {
287  $byteOrder = self::getTiffByteOrder( $filename );
288  if ( !$byteOrder ) {
289  throw new MWException( "Error determining byte order of $filename" );
290  }
291  $exif = new Exif( $filename, $byteOrder );
292  $data = $exif->getFilteredData();
293  if ( $data ) {
294  $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
295 
296  return $data;
297  } else {
298  throw new MWException( "Could not extract data from tiff file $filename" );
299  }
300  } else {
301  throw new MWException( "File doesn't exist - $filename" );
302  }
303  }
304 
312  public static function getTiffByteOrder( $filename ) {
313  $fh = fopen( $filename, 'rb' );
314  if ( !$fh ) {
315  return false;
316  }
317  $head = fread( $fh, 2 );
318  fclose( $fh );
319 
320  switch ( $head ) {
321  case 'II':
322  return 'LE'; // II for intel.
323  case 'MM':
324  return 'BE'; // MM for motorla.
325  default:
326  return false; // Something went wrong.
327 
328  }
329  }
330 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Class to deal with reconciling and extracting metadata from bitmap images.
getExif( $filename, $byteOrder)
Get exif info using exif class.
addMetadata( $metaArray, $type='other')
Add misc metadata.
static getTiffByteOrder( $filename)
Read the first 2 bytes of a tiff file to figure out Little Endian or Big Endian.
static Tiff( $filename)
This doesn't do much yet, but eventually I plan to add XMP support for Tiff.
static Jpeg( $filename)
Main entry point for jpeg's.
getMetadataArray()
Merge together the various types of metadata the different types have different priorities,...
static PNG( $filename)
Entry point for png At some point in the future this might merge the png various tEXt chunks to that ...
static GIF( $filename)
function for gif images.
Class to extract and validate Exif data from jpeg (and possibly tiff) files.
Definition: Exif.php:34
static version()
#-
Definition: Exif.php:714
static getMetadata( $filename)
static parse( $rawData)
This takes the results of iptcparse() and puts it into a form that can be handled by mediawiki.
Definition: IPTC.php:42
static doPSIR( $app13)
This reads the photoshop image resource.
static segmentSplitter( $filename)
Function to extract metadata segments of interest from jpeg files based on GIFMetadataExtractor.
MediaWiki exception.
Definition: MWException.php:32
PSR-3 logger instance factory.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
static getMetadata( $filename)