MediaWiki REL1_30
XMP.php
Go to the documentation of this file.
1<?php
24use Psr\Log\LoggerAwareInterface;
25use Psr\Log\LoggerInterface;
26use Psr\Log\NullLogger;
27use Wikimedia\ScopedCallback;
28
53class XMPReader implements LoggerAwareInterface {
55 protected $items;
56
58 private $curItem = [];
59
61 private $ancestorStruct = false;
62
64 private $charContent = false;
65
67 private $mode = [];
68
70 private $results = [];
71
73 private $processingArray = false;
74
76 private $itemLang = false;
77
79 private $xmlParser;
80
82 private $charset = false;
83
85 private $extendedXMPOffset = 0;
86
88 private $parsable = 0;
89
91 private $xmlParsableBuffer = '';
92
102 const MODE_INITIAL = 0;
103 const MODE_IGNORE = 1;
104 const MODE_LI = 2;
105 const MODE_LI_LANG = 3;
106 const MODE_QDESC = 4;
107
108 // The following MODE constants are also used in the
109 // $items array to denote what type of property the item is.
110 const MODE_SIMPLE = 10;
111 const MODE_STRUCT = 11; // structure (associative array)
112 const MODE_SEQ = 12; // ordered list
113 const MODE_BAG = 13; // unordered list
114 const MODE_LANG = 14;
115 const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
116 const MODE_BAGSTRUCT = 16; // A BAG of Structs.
117
118 const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
119 const NS_XML = 'http://www.w3.org/XML/1998/namespace';
120
121 // States used while determining if XML is safe to parse
123 const PARSABLE_OK = 1;
125 const PARSABLE_NO = 3;
126
130 private $logger;
131
136 function __construct( LoggerInterface $logger = null ) {
137 if ( !function_exists( 'xml_parser_create_ns' ) ) {
138 // this should already be checked by this point
139 throw new RuntimeException( 'XMP support requires XML Parser' );
140 }
141 if ( $logger ) {
142 $this->setLogger( $logger );
143 } else {
144 $this->setLogger( new NullLogger() );
145 }
146
147 $this->items = XMPInfo::getItems();
148
149 $this->resetXMLParser();
150 }
151
152 public function setLogger( LoggerInterface $logger ) {
153 $this->logger = $logger;
154 }
155
163 private function destroyXMLParser() {
164 if ( $this->xmlParser ) {
165 xml_parser_free( $this->xmlParser );
166 $this->xmlParser = null;
167 }
168 }
169
174 private function resetXMLParser() {
175 $this->destroyXMLParser();
176
177 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' );
178 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
179 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
180
181 xml_set_element_handler( $this->xmlParser,
182 [ $this, 'startElement' ],
183 [ $this, 'endElement' ] );
184
185 xml_set_character_data_handler( $this->xmlParser, [ $this, 'char' ] );
186
187 $this->parsable = self::PARSABLE_UNKNOWN;
188 $this->xmlParsableBuffer = '';
189 }
190
195 public static function isSupported() {
196 return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' );
197 }
198
205 public function getResults() {
206 // xmp-special is for metadata that affects how stuff
207 // is extracted. For example xmpNote:HasExtendedXMP.
208
209 // It is also used to handle photoshop:AuthorsPosition
210 // which is weird and really part of another property,
211 // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard.
212 // The location fields also use it.
213
214 $data = $this->results;
215
216 if ( isset( $data['xmp-special']['AuthorsPosition'] )
217 && is_string( $data['xmp-special']['AuthorsPosition'] )
218 && isset( $data['xmp-general']['Artist'][0] )
219 ) {
220 // Note, if there is more than one creator,
221 // this only applies to first. This also will
222 // only apply to the dc:Creator prop, not the
223 // exif:Artist prop.
224
225 $data['xmp-general']['Artist'][0] =
226 $data['xmp-special']['AuthorsPosition'] . ', '
227 . $data['xmp-general']['Artist'][0];
228 }
229
230 // Go through the LocationShown and LocationCreated
231 // changing it to the non-hierarchal form used by
232 // the other location fields.
233
234 if ( isset( $data['xmp-special']['LocationShown'][0] )
235 && is_array( $data['xmp-special']['LocationShown'][0] )
236 ) {
237 // the is_array is just paranoia. It should always
238 // be an array.
239 foreach ( $data['xmp-special']['LocationShown'] as $loc ) {
240 if ( !is_array( $loc ) ) {
241 // To avoid copying over the _type meta-fields.
242 continue;
243 }
244 foreach ( $loc as $field => $val ) {
245 $data['xmp-general'][$field . 'Dest'][] = $val;
246 }
247 }
248 }
249 if ( isset( $data['xmp-special']['LocationCreated'][0] )
250 && is_array( $data['xmp-special']['LocationCreated'][0] )
251 ) {
252 // the is_array is just paranoia. It should always
253 // be an array.
254 foreach ( $data['xmp-special']['LocationCreated'] as $loc ) {
255 if ( !is_array( $loc ) ) {
256 // To avoid copying over the _type meta-fields.
257 continue;
258 }
259 foreach ( $loc as $field => $val ) {
260 $data['xmp-general'][$field . 'Created'][] = $val;
261 }
262 }
263 }
264
265 // We don't want to return the special values, since they're
266 // special and not info to be stored about the file.
267 unset( $data['xmp-special'] );
268
269 // Convert GPSAltitude to negative if below sea level.
270 if ( isset( $data['xmp-exif']['GPSAltitudeRef'] )
271 && isset( $data['xmp-exif']['GPSAltitude'] )
272 ) {
273 // Must convert to a real before multiplying by -1
274 // XMPValidate guarantees there will always be a '/' in this value.
275 list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] );
276 $data['xmp-exif']['GPSAltitude'] = $nom / $denom;
277
278 if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) {
279 $data['xmp-exif']['GPSAltitude'] *= -1;
280 }
281 unset( $data['xmp-exif']['GPSAltitudeRef'] );
282 }
283
284 return $data;
285 }
286
299 public function parse( $content, $allOfIt = true ) {
300 if ( !$this->xmlParser ) {
301 $this->resetXMLParser();
302 }
303 try {
304
305 // detect encoding by looking for BOM which is supposed to be in processing instruction.
306 // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf
307 if ( !$this->charset ) {
308 $bom = [];
309 if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/',
310 $content, $bom )
311 ) {
312 switch ( $bom[0] ) {
313 case "\xFE\xFF":
314 $this->charset = 'UTF-16BE';
315 break;
316 case "\xFF\xFE":
317 $this->charset = 'UTF-16LE';
318 break;
319 case "\x00\x00\xFE\xFF":
320 $this->charset = 'UTF-32BE';
321 break;
322 case "\xFF\xFE\x00\x00":
323 $this->charset = 'UTF-32LE';
324 break;
325 case "\xEF\xBB\xBF":
326 $this->charset = 'UTF-8';
327 break;
328 default:
329 // this should be impossible to get to
330 throw new RuntimeException( "Invalid BOM" );
331 }
332 } else {
333 // standard specifically says, if no bom assume utf-8
334 $this->charset = 'UTF-8';
335 }
336 }
337 if ( $this->charset !== 'UTF-8' ) {
338 // don't convert if already utf-8
339 MediaWiki\suppressWarnings();
340 $content = iconv( $this->charset, 'UTF-8//IGNORE', $content );
341 MediaWiki\restoreWarnings();
342 }
343
344 // Ensure the XMP block does not have an xml doctype declaration, which
345 // could declare entities unsafe to parse with xml_parse (T85848/T71210).
346 if ( $this->parsable !== self::PARSABLE_OK ) {
347 if ( $this->parsable === self::PARSABLE_NO ) {
348 throw new RuntimeException( 'Unsafe doctype declaration in XML.' );
349 }
350
351 $content = $this->xmlParsableBuffer . $content;
352 if ( !$this->checkParseSafety( $content ) ) {
353 if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) {
354 // parse wasn't Unsuccessful yet, so return true
355 // in this case.
356 return true;
357 }
358 $msg = ( $this->parsable === self::PARSABLE_NO ) ?
359 'Unsafe doctype declaration in XML.' :
360 'No root element found in XML.';
361 throw new RuntimeException( $msg );
362 }
363 }
364
365 $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
366 if ( !$ok ) {
367 $code = xml_get_error_code( $this->xmlParser );
368 $error = xml_error_string( $code );
369 $line = xml_get_current_line_number( $this->xmlParser );
370 $col = xml_get_current_column_number( $this->xmlParser );
371 $offset = xml_get_current_byte_index( $this->xmlParser );
372
373 $this->logger->warning(
374 '{method} : Error reading XMP content: {error} ' .
375 '(line: {line} column: {column} byte offset: {offset})',
376 [
377 'method' => __METHOD__,
378 'error_code' => $code,
379 'error' => $error,
380 'line' => $line,
381 'column' => $col,
382 'offset' => $offset,
383 'content' => $content,
384 ] );
385 $this->results = []; // blank if error.
386 $this->destroyXMLParser();
387 return false;
388 }
389 } catch ( Exception $e ) {
390 $this->logger->warning(
391 '{method} Exception caught while parsing: ' . $e->getMessage(),
392 [
393 'method' => __METHOD__,
394 'exception' => $e,
395 'content' => $content,
396 ]
397 );
398 $this->results = [];
399 return false;
400 }
401 if ( $allOfIt ) {
402 $this->destroyXMLParser();
403 }
404
405 return true;
406 }
407
415 public function parseExtended( $content ) {
416 // @todo FIXME: This is untested. Hard to find example files
417 // or programs that make such files..
418 $guid = substr( $content, 0, 32 );
419 if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
420 || $this->results['xmp-special']['HasExtendedXMP'] !== $guid
421 ) {
422 $this->logger->info( __METHOD__ .
423 " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
424
425 return false;
426 }
427 $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
428
429 if ( !$len ||
430 $len['length'] < 4 ||
431 $len['offset'] < 0 ||
432 $len['offset'] > $len['length']
433 ) {
434 $this->logger->info(
435 __METHOD__ . 'Error reading extended XMP block, invalid length or offset.'
436 );
437
438 return false;
439 }
440
441 // we're not very robust here. we should accept it in the wrong order.
442 // To quote the XMP standard:
443 // "A JPEG writer should write the ExtendedXMP marker segments in order,
444 // immediately following the StandardXMP. However, the JPEG standard
445 // does not require preservation of marker segment order. A robust JPEG
446 // reader should tolerate the marker segments in any order."
447 // On the other hand, the probability that an image will have more than
448 // 128k of metadata is rather low... so the probability that it will have
449 // > 128k, and be in the wrong order is very low...
450
451 if ( $len['offset'] !== $this->extendedXMPOffset ) {
452 $this->logger->info( __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
453 . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' );
454
455 return false;
456 }
457
458 if ( $len['offset'] === 0 ) {
459 // if we're starting the extended block, we've probably already
460 // done the XMPStandard block, so reset.
461 $this->resetXMLParser();
462 }
463
464 $this->extendedXMPOffset += $len['length'];
465
466 $actualContent = substr( $content, 40 );
467
468 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) {
469 $atEnd = true;
470 } else {
471 $atEnd = false;
472 }
473
474 $this->logger->debug( __METHOD__ . 'Parsing a XMPExtended block' );
475
476 return $this->parse( $actualContent, $atEnd );
477 }
478
495 function char( $parser, $data ) {
496 $data = trim( $data );
497 if ( trim( $data ) === "" ) {
498 return;
499 }
500
501 if ( !isset( $this->mode[0] ) ) {
502 throw new RuntimeException( 'Unexpected character data before first rdf:Description element' );
503 }
504
505 if ( $this->mode[0] === self::MODE_IGNORE ) {
506 return;
507 }
508
509 if ( $this->mode[0] !== self::MODE_SIMPLE
510 && $this->mode[0] !== self::MODE_QDESC
511 ) {
512 throw new RuntimeException( 'character data where not expected. (mode ' . $this->mode[0] . ')' );
513 }
514
515 // to check, how does this handle w.s.
516 if ( $this->charContent === false ) {
517 $this->charContent = $data;
518 } else {
519 $this->charContent .= $data;
520 }
521 }
522
531 private function checkParseSafety( $content ) {
532 $reader = new XMLReader();
533 $result = null;
534
535 // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed
536 // instead of using XML().
537 $reader->open(
538 'data://text/plain,' . urlencode( $content ),
539 null,
540 LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET
541 );
542
543 $oldDisable = libxml_disable_entity_loader( true );
545 $reset = new ScopedCallback(
546 'libxml_disable_entity_loader',
547 [ $oldDisable ]
548 );
549 $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false );
550
551 // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning
552 // when parsing truncated XML, which causes unit tests to fail.
553 MediaWiki\suppressWarnings();
554 while ( $reader->read() ) {
555 if ( $reader->nodeType === XMLReader::ELEMENT ) {
556 // Reached the first element without hitting a doctype declaration
557 $this->parsable = self::PARSABLE_OK;
558 $result = true;
559 break;
560 }
561 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
562 $this->parsable = self::PARSABLE_NO;
563 $result = false;
564 break;
565 }
566 }
567 MediaWiki\restoreWarnings();
568
569 if ( !is_null( $result ) ) {
570 return $result;
571 }
572
573 // Reached the end of the parsable xml without finding an element
574 // or doctype. Buffer and try again.
575 $this->parsable = self::PARSABLE_BUFFERING;
576 $this->xmlParsableBuffer = $content;
577 return false;
578 }
579
586 private function endElementModeIgnore( $elm ) {
587 if ( $this->curItem[0] === $elm ) {
588 array_shift( $this->curItem );
589 array_shift( $this->mode );
590 }
591 }
592
608 private function endElementModeSimple( $elm ) {
609 if ( $this->charContent !== false ) {
610 if ( $this->processingArray ) {
611 // if we're processing an array, use the original element
612 // name instead of rdf:li.
613 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
614 } else {
615 list( $ns, $tag ) = explode( ' ', $elm, 2 );
616 }
617 $this->saveValue( $ns, $tag, $this->charContent );
618
619 $this->charContent = false; // reset
620 }
621 array_shift( $this->curItem );
622 array_shift( $this->mode );
623 }
624
643 private function endElementNested( $elm ) {
644 /* cur item must be the same as $elm, unless if in MODE_STRUCT
645 * in which case it could also be rdf:Description */
646 if ( $this->curItem[0] !== $elm
647 && !( $elm === self::NS_RDF . ' Description'
648 && $this->mode[0] === self::MODE_STRUCT )
649 ) {
650 throw new RuntimeException( "nesting mismatch. got a </$elm> but expected a </" .
651 $this->curItem[0] . '>' );
652 }
653
654 // Validate structures.
655 list( $ns, $tag ) = explode( ' ', $elm, 2 );
656 if ( isset( $this->items[$ns][$tag]['validate'] ) ) {
657 $info =& $this->items[$ns][$tag];
658 $finalName = isset( $info['map_name'] )
659 ? $info['map_name'] : $tag;
660
661 if ( is_array( $info['validate'] ) ) {
662 $validate = $info['validate'];
663 } else {
664 $validator = new XMPValidate( $this->logger );
665 $validate = [ $validator, $info['validate'] ];
666 }
667
668 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
669 // This can happen if all the members of the struct failed validation.
670 $this->logger->debug( __METHOD__ . " <$ns:$tag> has no valid members." );
671 } elseif ( is_callable( $validate ) ) {
672 $val =& $this->results['xmp-' . $info['map_group']][$finalName];
673 call_user_func_array( $validate, [ $info, &$val, false ] );
674 if ( is_null( $val ) ) {
675 // the idea being the validation function will unset the variable if
676 // its invalid.
677 $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." );
678 unset( $this->results['xmp-' . $info['map_group']][$finalName] );
679 }
680 } else {
681 $this->logger->warning( __METHOD__ . " Validation function for $finalName ("
682 . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
683 }
684 }
685
686 array_shift( $this->curItem );
687 array_shift( $this->mode );
688 $this->ancestorStruct = false;
689 $this->processingArray = false;
690 $this->itemLang = false;
691 }
692
712 private function endElementModeLi( $elm ) {
713 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
714 $info = $this->items[$ns][$tag];
715 $finalName = isset( $info['map_name'] )
716 ? $info['map_name'] : $tag;
717
718 array_shift( $this->mode );
719
720 if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
721 $this->logger->debug( __METHOD__ . " Empty compund element $finalName." );
722
723 return;
724 }
725
726 if ( $elm === self::NS_RDF . ' Seq' ) {
727 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol';
728 } elseif ( $elm === self::NS_RDF . ' Bag' ) {
729 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul';
730 } elseif ( $elm === self::NS_RDF . ' Alt' ) {
731 // extra if needed as you could theoretically have a non-language alt.
732 if ( $info['mode'] === self::MODE_LANG ) {
733 $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
734 }
735 } else {
736 throw new RuntimeException(
737 __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm."
738 );
739 }
740 }
741
752 private function endElementModeQDesc( $elm ) {
753 if ( $elm === self::NS_RDF . ' value' ) {
754 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
755 $this->saveValue( $ns, $tag, $this->charContent );
756
757 return;
758 } else {
759 array_shift( $this->mode );
760 array_shift( $this->curItem );
761 }
762 }
763
777 function endElement( $parser, $elm ) {
778 if ( $elm === ( self::NS_RDF . ' RDF' )
779 || $elm === 'adobe:ns:meta/ xmpmeta'
780 || $elm === 'adobe:ns:meta/ xapmeta'
781 ) {
782 // ignore these.
783 return;
784 }
785
786 if ( $elm === self::NS_RDF . ' type' ) {
787 // these aren't really supported properly yet.
788 // However, it appears they almost never used.
789 $this->logger->info( __METHOD__ . ' encountered <rdf:type>' );
790 }
791
792 if ( strpos( $elm, ' ' ) === false ) {
793 // This probably shouldn't happen.
794 // However, there is a bug in an adobe product
795 // that forgets the namespace on some things.
796 // (Luckily they are unimportant things).
797 $this->logger->info( __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
798
799 return;
800 }
801
802 if ( count( $this->mode[0] ) === 0 ) {
803 // This should never ever happen and means
804 // there is a pretty major bug in this class.
805 throw new RuntimeException( 'Encountered end element with no mode' );
806 }
807
808 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
809 // just to be paranoid. Should always have a curItem, except for initially
810 // (aka during MODE_INITAL).
811 throw new RuntimeException( "Hit end element </$elm> but no curItem" );
812 }
813
814 switch ( $this->mode[0] ) {
816 $this->endElementModeIgnore( $elm );
817 break;
819 $this->endElementModeSimple( $elm );
820 break;
822 case self::MODE_SEQ:
823 case self::MODE_BAG:
824 case self::MODE_LANG:
826 $this->endElementNested( $elm );
827 break;
829 if ( $elm === self::NS_RDF . ' Description' ) {
830 array_shift( $this->mode );
831 } else {
832 throw new RuntimeException( 'Element ended unexpectedly while in MODE_INITIAL' );
833 }
834 break;
835 case self::MODE_LI:
837 $this->endElementModeLi( $elm );
838 break;
839 case self::MODE_QDESC:
840 $this->endElementModeQDesc( $elm );
841 break;
842 default:
843 $this->logger->warning( __METHOD__ . " no mode (elm = $elm)" );
844 break;
845 }
846 }
847
859 private function startElementModeIgnore( $elm ) {
860 if ( $elm === $this->curItem[0] ) {
861 array_unshift( $this->curItem, $elm );
862 array_unshift( $this->mode, self::MODE_IGNORE );
863 }
864 }
865
873 private function startElementModeBag( $elm ) {
874 if ( $elm === self::NS_RDF . ' Bag' ) {
875 array_unshift( $this->mode, self::MODE_LI );
876 } else {
877 throw new RuntimeException( "Expected <rdf:Bag> but got $elm." );
878 }
879 }
880
888 private function startElementModeSeq( $elm ) {
889 if ( $elm === self::NS_RDF . ' Seq' ) {
890 array_unshift( $this->mode, self::MODE_LI );
891 } elseif ( $elm === self::NS_RDF . ' Bag' ) {
892 # T29105
893 $this->logger->info( __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending'
894 . ' it is a Seq, since some buggy software is known to screw this up.' );
895 array_unshift( $this->mode, self::MODE_LI );
896 } else {
897 throw new RuntimeException( "Expected <rdf:Seq> but got $elm." );
898 }
899 }
900
915 private function startElementModeLang( $elm ) {
916 if ( $elm === self::NS_RDF . ' Alt' ) {
917 array_unshift( $this->mode, self::MODE_LI_LANG );
918 } else {
919 throw new RuntimeException( "Expected <rdf:Seq> but got $elm." );
920 }
921 }
922
941 private function startElementModeSimple( $elm, $attribs ) {
942 if ( $elm === self::NS_RDF . ' Description' ) {
943 // If this value has qualifiers
944 array_unshift( $this->mode, self::MODE_QDESC );
945 array_unshift( $this->curItem, $this->curItem[0] );
946
947 if ( isset( $attribs[self::NS_RDF . ' value'] ) ) {
948 list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
949 $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] );
950 }
951 } elseif ( $elm === self::NS_RDF . ' value' ) {
952 // This should not be here.
953 throw new RuntimeException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
954 } else {
955 // something else we don't recognize, like a qualifier maybe.
956 $this->logger->info( __METHOD__ .
957 " Encountered element <$elm> where only expecting character data as value of " .
958 $this->curItem[0] );
959 array_unshift( $this->mode, self::MODE_IGNORE );
960 array_unshift( $this->curItem, $elm );
961 }
962 }
963
977 private function startElementModeQDesc( $elm ) {
978 if ( $elm === self::NS_RDF . ' value' ) {
979 return; // do nothing
980 } else {
981 // otherwise its a qualifier, which we ignore
982 array_unshift( $this->mode, self::MODE_IGNORE );
983 array_unshift( $this->curItem, $elm );
984 }
985 }
986
999 private function startElementModeInitial( $ns, $tag, $attribs ) {
1000 if ( $ns !== self::NS_RDF ) {
1001 if ( isset( $this->items[$ns][$tag] ) ) {
1002 if ( isset( $this->items[$ns][$tag]['structPart'] ) ) {
1003 // If this element is supposed to appear only as
1004 // a child of a structure, but appears here (not as
1005 // a child of a struct), then something weird is
1006 // happening, so ignore this element and its children.
1007
1008 $this->logger->warning( "Encountered <$ns:$tag> outside"
1009 . " of its expected parent. Ignoring." );
1010
1011 array_unshift( $this->mode, self::MODE_IGNORE );
1012 array_unshift( $this->curItem, $ns . ' ' . $tag );
1013
1014 return;
1015 }
1016 $mode = $this->items[$ns][$tag]['mode'];
1017 array_unshift( $this->mode, $mode );
1018 array_unshift( $this->curItem, $ns . ' ' . $tag );
1019 if ( $mode === self::MODE_STRUCT ) {
1020 $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] )
1021 ? $this->items[$ns][$tag]['map_name'] : $tag;
1022 }
1023 if ( $this->charContent !== false ) {
1024 // Something weird.
1025 // Should not happen in valid XMP.
1026 throw new RuntimeException( 'tag nested in non-whitespace characters.' );
1027 }
1028 } else {
1029 // This element is not on our list of allowed elements so ignore.
1030 $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
1031 array_unshift( $this->mode, self::MODE_IGNORE );
1032 array_unshift( $this->curItem, $ns . ' ' . $tag );
1033
1034 return;
1035 }
1036 }
1037 // process attributes
1038 $this->doAttribs( $attribs );
1039 }
1040
1060 private function startElementModeStruct( $ns, $tag, $attribs ) {
1061 if ( $ns !== self::NS_RDF ) {
1062 if ( isset( $this->items[$ns][$tag] ) ) {
1063 if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
1064 && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] )
1065 ) {
1066 // This assumes that we don't have inter-namespace nesting
1067 // which we don't in all the properties we're interested in.
1068 throw new RuntimeException( " <$tag> appeared nested in <" . $this->ancestorStruct
1069 . "> where it is not allowed." );
1070 }
1071 array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] );
1072 array_unshift( $this->curItem, $ns . ' ' . $tag );
1073 if ( $this->charContent !== false ) {
1074 // Something weird.
1075 // Should not happen in valid XMP.
1076 throw new RuntimeException( "tag <$tag> nested in non-whitespace characters (" .
1077 $this->charContent . ")." );
1078 }
1079 } else {
1080 array_unshift( $this->mode, self::MODE_IGNORE );
1081 array_unshift( $this->curItem, $ns . ' ' . $tag );
1082
1083 return;
1084 }
1085 }
1086
1087 if ( $ns === self::NS_RDF && $tag === 'Description' ) {
1088 $this->doAttribs( $attribs );
1089 array_unshift( $this->mode, self::MODE_STRUCT );
1090 array_unshift( $this->curItem, $this->curItem[0] );
1091 }
1092 }
1093
1107 private function startElementModeLi( $elm, $attribs ) {
1108 if ( ( $elm ) !== self::NS_RDF . ' li' ) {
1109 throw new RuntimeException( "<rdf:li> expected but got $elm." );
1110 }
1111
1112 if ( !isset( $this->mode[1] ) ) {
1113 // This should never ever ever happen. Checking for it
1114 // to be paranoid.
1115 throw new RuntimeException( 'In mode Li, but no 2xPrevious mode!' );
1116 }
1117
1118 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) {
1119 // This list item contains a compound (STRUCT) value.
1120 array_unshift( $this->mode, self::MODE_STRUCT );
1121 array_unshift( $this->curItem, $elm );
1122 $this->processingArray = true;
1123
1124 if ( !isset( $this->curItem[1] ) ) {
1125 // be paranoid.
1126 throw new RuntimeException( 'Can not find parent of BAGSTRUCT.' );
1127 }
1128 list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] );
1129 $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] )
1130 ? $this->items[$curNS][$curTag]['map_name'] : $curTag;
1131
1132 $this->doAttribs( $attribs );
1133 } else {
1134 // Normal BAG or SEQ containing simple values.
1135 array_unshift( $this->mode, self::MODE_SIMPLE );
1136 // need to add curItem[0] on again since one is for the specific item
1137 // and one is for the entire group.
1138 array_unshift( $this->curItem, $this->curItem[0] );
1139 $this->processingArray = true;
1140 }
1141 }
1142
1157 private function startElementModeLiLang( $elm, $attribs ) {
1158 if ( $elm !== self::NS_RDF . ' li' ) {
1159 throw new RuntimeException( __METHOD__ . " <rdf:li> expected but got $elm." );
1160 }
1161 if ( !isset( $attribs[self::NS_XML . ' lang'] )
1162 || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] )
1163 ) {
1164 throw new RuntimeException( __METHOD__
1165 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
1166 }
1167
1168 // Lang is case-insensitive.
1169 $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] );
1170
1171 // need to add curItem[0] on again since one is for the specific item
1172 // and one is for the entire group.
1173 array_unshift( $this->curItem, $this->curItem[0] );
1174 array_unshift( $this->mode, self::MODE_SIMPLE );
1175 $this->processingArray = true;
1176 }
1177
1188 function startElement( $parser, $elm, $attribs ) {
1189 if ( $elm === self::NS_RDF . ' RDF'
1190 || $elm === 'adobe:ns:meta/ xmpmeta'
1191 || $elm === 'adobe:ns:meta/ xapmeta'
1192 ) {
1193 /* ignore. */
1194 return;
1195 } elseif ( $elm === self::NS_RDF . ' Description' ) {
1196 if ( count( $this->mode ) === 0 ) {
1197 // outer rdf:desc
1198 array_unshift( $this->mode, self::MODE_INITIAL );
1199 }
1200 } elseif ( $elm === self::NS_RDF . ' type' ) {
1201 // This doesn't support rdf:type properly.
1202 // In practise I have yet to see a file that
1203 // uses this element, however it is mentioned
1204 // on page 25 of part 1 of the xmp standard.
1205 // Also it seems as if exiv2 and exiftool do not support
1206 // this either (That or I misunderstand the standard)
1207 $this->logger->info( __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' );
1208 }
1209
1210 if ( strpos( $elm, ' ' ) === false ) {
1211 // This probably shouldn't happen.
1212 $this->logger->info( __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
1213
1214 return;
1215 }
1216
1217 list( $ns, $tag ) = explode( ' ', $elm, 2 );
1218
1219 if ( count( $this->mode ) === 0 ) {
1220 // This should not happen.
1221 throw new RuntimeException( 'Error extracting XMP, '
1222 . "encountered <$elm> with no mode" );
1223 }
1224
1225 switch ( $this->mode[0] ) {
1226 case self::MODE_IGNORE:
1227 $this->startElementModeIgnore( $elm );
1228 break;
1229 case self::MODE_SIMPLE:
1230 $this->startElementModeSimple( $elm, $attribs );
1231 break;
1232 case self::MODE_INITIAL:
1233 $this->startElementModeInitial( $ns, $tag, $attribs );
1234 break;
1235 case self::MODE_STRUCT:
1236 $this->startElementModeStruct( $ns, $tag, $attribs );
1237 break;
1238 case self::MODE_BAG:
1240 $this->startElementModeBag( $elm );
1241 break;
1242 case self::MODE_SEQ:
1243 $this->startElementModeSeq( $elm );
1244 break;
1245 case self::MODE_LANG:
1246 $this->startElementModeLang( $elm );
1247 break;
1248 case self::MODE_LI_LANG:
1249 $this->startElementModeLiLang( $elm, $attribs );
1250 break;
1251 case self::MODE_LI:
1252 $this->startElementModeLi( $elm, $attribs );
1253 break;
1254 case self::MODE_QDESC:
1255 $this->startElementModeQDesc( $elm );
1256 break;
1257 default:
1258 throw new RuntimeException( 'StartElement in unknown mode: ' . $this->mode[0] );
1259 }
1260 }
1261
1262 // @codingStandardsIgnoreStart Generic.Files.LineLength
1278 // @codingStandardsIgnoreEnd
1279 private function doAttribs( $attribs ) {
1280 // first check for rdf:parseType attribute, as that can change
1281 // how the attributes are interperted.
1282
1283 if ( isset( $attribs[self::NS_RDF . ' parseType'] )
1284 && $attribs[self::NS_RDF . ' parseType'] === 'Resource'
1285 && $this->mode[0] === self::MODE_SIMPLE
1286 ) {
1287 // this is equivalent to having an inner rdf:Description
1288 $this->mode[0] = self::MODE_QDESC;
1289 }
1290 foreach ( $attribs as $name => $val ) {
1291 if ( strpos( $name, ' ' ) === false ) {
1292 // This shouldn't happen, but so far some old software forgets namespace
1293 // on rdf:about.
1294 $this->logger->info( __METHOD__ . ' Encountered non-namespaced attribute: '
1295 . " $name=\"$val\". Skipping. " );
1296 continue;
1297 }
1298 list( $ns, $tag ) = explode( ' ', $name, 2 );
1299 if ( $ns === self::NS_RDF ) {
1300 if ( $tag === 'value' || $tag === 'resource' ) {
1301 // resource is for url.
1302 // value attribute is a weird way of just putting the contents.
1303 $this->char( $this->xmlParser, $val );
1304 }
1305 } elseif ( isset( $this->items[$ns][$tag] ) ) {
1306 if ( $this->mode[0] === self::MODE_SIMPLE ) {
1307 throw new RuntimeException( __METHOD__
1308 . " $ns:$tag found as attribute where not allowed" );
1309 }
1310 $this->saveValue( $ns, $tag, $val );
1311 } else {
1312 $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
1313 }
1314 }
1315 }
1316
1328 private function saveValue( $ns, $tag, $val ) {
1329 $info =& $this->items[$ns][$tag];
1330 $finalName = isset( $info['map_name'] )
1331 ? $info['map_name'] : $tag;
1332 if ( isset( $info['validate'] ) ) {
1333 if ( is_array( $info['validate'] ) ) {
1334 $validate = $info['validate'];
1335 } else {
1336 $validator = new XMPValidate( $this->logger );
1337 $validate = [ $validator, $info['validate'] ];
1338 }
1339
1340 if ( is_callable( $validate ) ) {
1341 call_user_func_array( $validate, [ $info, &$val, true ] );
1342 // the reasoning behind using &$val instead of using the return value
1343 // is to be consistent between here and validating structures.
1344 if ( is_null( $val ) ) {
1345 $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." );
1346
1347 return;
1348 }
1349 } else {
1350 $this->logger->warning( __METHOD__ . " Validation function for $finalName ("
1351 . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
1352 }
1353 }
1354
1355 if ( $this->ancestorStruct && $this->processingArray ) {
1356 // Aka both an array and a struct. ( self::MODE_BAGSTRUCT )
1357 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val;
1358 } elseif ( $this->ancestorStruct ) {
1359 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val;
1360 } elseif ( $this->processingArray ) {
1361 if ( $this->itemLang === false ) {
1362 // normal array
1363 $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
1364 } else {
1365 // lang array.
1366 $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val;
1367 }
1368 } else {
1369 $this->results['xmp-' . $info['map_group']][$finalName] = $val;
1370 }
1371 }
1372}
$line
Definition cdb.php:58
static getItems()
Get the items array.
Definition XMPInfo.php:33
Class for reading xmp data containing properties relevant to images, and spitting out an array that F...
Definition XMP.php:53
const NS_XML
Definition XMP.php:119
const NS_RDF
Definition XMP.php:118
bool string $charContent
Temporary holder for character data that appears in xmp doc.
Definition XMP.php:64
const MODE_SEQ
Definition XMP.php:112
const MODE_LANG
Definition XMP.php:114
bool string $itemLang
Used for lang alts only.
Definition XMP.php:76
destroyXMLParser()
free the XML parser.
Definition XMP.php:163
const MODE_STRUCT
Definition XMP.php:111
const PARSABLE_BUFFERING
Definition XMP.php:124
LoggerInterface $logger
Definition XMP.php:130
const PARSABLE_NO
Definition XMP.php:125
const PARSABLE_OK
Definition XMP.php:123
endElementModeIgnore( $elm)
When we hit a closing element in MODE_IGNORE Check to see if this is the element we started to ignore...
Definition XMP.php:586
const MODE_LI
Definition XMP.php:104
char( $parser, $data)
Character data handler Called whenever character data is found in the xmp document.
Definition XMP.php:495
setLogger(LoggerInterface $logger)
Definition XMP.php:152
getResults()
Get the result array.
Definition XMP.php:205
startElementModeLang( $elm)
Start element in MODE_LANG (language alternative) this should always be <rdf:Alt>
Definition XMP.php:915
array $results
Array to hold results.
Definition XMP.php:70
bool string $charset
Character set like 'UTF-8'.
Definition XMP.php:82
doAttribs( $attribs)
Process attributes.
Definition XMP.php:1279
parseExtended( $content)
Entry point for XMPExtended blocks in jpeg files.
Definition XMP.php:415
endElementModeSimple( $elm)
Hit a closing element when in MODE_SIMPLE.
Definition XMP.php:608
array $curItem
Array to hold the current element (and previous element, and so on)
Definition XMP.php:58
const PARSABLE_UNKNOWN
Definition XMP.php:122
startElementModeStruct( $ns, $tag, $attribs)
Hit an opening element when in a Struct (MODE_STRUCT) This is generally for fields of a compound prop...
Definition XMP.php:1060
static isSupported()
Check if this instance supports using this class.
Definition XMP.php:195
parse( $content, $allOfIt=true)
Main function to call to parse XMP.
Definition XMP.php:299
array $mode
Stores the state the xmpreader is in (see MODE_FOO constants)
Definition XMP.php:67
int $parsable
Flag determining if the XMP is safe to parse.
Definition XMP.php:88
resetXMLParser()
Main use is if a single item has multiple xmp documents describing it.
Definition XMP.php:174
startElement( $parser, $elm, $attribs)
Hits an opening element.
Definition XMP.php:1188
startElementModeBag( $elm)
Start element in MODE_BAG (unordered array) this should always be <rdf:Bag>
Definition XMP.php:873
const MODE_INITIAL
These are various mode constants.
Definition XMP.php:102
const MODE_ALT
Definition XMP.php:115
startElementModeQDesc( $elm)
Start an element when in MODE_QDESC.
Definition XMP.php:977
__construct(LoggerInterface $logger=null)
Primary job is to initialize the XMLParser.
Definition XMP.php:136
startElementModeInitial( $ns, $tag, $attribs)
Starting an element when in MODE_INITIAL This usually happens when we hit an element inside the outer...
Definition XMP.php:999
const MODE_LI_LANG
Definition XMP.php:105
saveValue( $ns, $tag, $val)
Given an extracted value, save it to results array.
Definition XMP.php:1328
endElement( $parser, $elm)
Handler for hitting a closing element.
Definition XMP.php:777
startElementModeSeq( $elm)
Start element in MODE_SEQ (ordered array) this should always be <rdf:Seq>
Definition XMP.php:888
string $xmlParsableBuffer
Buffer of XML to parse.
Definition XMP.php:91
const MODE_QDESC
Definition XMP.php:106
const MODE_SIMPLE
Definition XMP.php:110
const MODE_BAG
Definition XMP.php:113
endElementModeLi( $elm)
Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) Add information about what type of ele...
Definition XMP.php:712
array $items
XMP item configuration array.
Definition XMP.php:55
startElementModeSimple( $elm, $attribs)
Handle an opening element when in MODE_SIMPLE.
Definition XMP.php:941
const MODE_BAGSTRUCT
Definition XMP.php:116
resource $xmlParser
A resource handle for the XML parser.
Definition XMP.php:79
endElementNested( $elm)
Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG generally means we've finished processing a ...
Definition XMP.php:643
startElementModeIgnore( $elm)
Hit an opening element while in MODE_IGNORE.
Definition XMP.php:859
startElementModeLi( $elm, $attribs)
opening element in MODE_LI process elements of arrays.
Definition XMP.php:1107
bool $processingArray
If we're doing a seq or bag.
Definition XMP.php:73
const MODE_IGNORE
Definition XMP.php:103
bool string $ancestorStruct
The structure name when processing nested structures.
Definition XMP.php:61
int $extendedXMPOffset
Definition XMP.php:85
endElementModeQDesc( $elm)
End element while in MODE_QDESC mostly when ending an element when we have a simple value that has qu...
Definition XMP.php:752
checkParseSafety( $content)
Check if a block of XML is safe to pass to xml_parse, i.e.
Definition XMP.php:531
startElementModeLiLang( $elm, $attribs)
Opening element in MODE_LI_LANG.
Definition XMP.php:1157
This contains some static methods for validating XMP properties.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
namespace being checked & $result
Definition hooks.txt:2293
do that in ParserLimitReportFormat instead $parser
Definition hooks.txt:2572
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:863
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition hooks.txt:1984
returning false will NOT prevent logging $e
Definition hooks.txt:2146