108 const NS_RDF =
'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
109 const NS_XML =
'http://www.w3.org/XML/1998/namespace';
118 if ( !function_exists(
'xml_parser_create_ns' ) ) {
120 throw new MWException(
'XMP support requires XML Parser' );
134 if ( $this->xmlParser ) {
136 xml_parser_free( $this->xmlParser );
139 $this->xmlParser = xml_parser_create_ns(
'UTF-8',
' ' );
140 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
141 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
143 xml_set_element_handler( $this->xmlParser,
144 array( $this,
'startElement' ),
145 array( $this,
'endElement' ) );
147 xml_set_character_data_handler( $this->xmlParser,
array( $this,
'char' ) );
156 xml_parser_free( $this->xmlParser );
178 if ( isset( $data[
'xmp-special'][
'AuthorsPosition'] )
179 && is_string( $data[
'xmp-special'][
'AuthorsPosition'] )
180 && isset( $data[
'xmp-general'][
'Artist'][0] )
187 $data[
'xmp-general'][
'Artist'][0] =
188 $data[
'xmp-special'][
'AuthorsPosition'] .
', '
189 . $data[
'xmp-general'][
'Artist'][0];
196 if ( isset( $data[
'xmp-special'][
'LocationShown'][0] )
197 && is_array( $data[
'xmp-special'][
'LocationShown'][0] )
201 foreach ( $data[
'xmp-special'][
'LocationShown']
as $loc ) {
202 if ( !is_array( $loc ) ) {
206 foreach ( $loc
as $field => $val ) {
207 $data[
'xmp-general'][$field .
'Dest'][] = $val;
211 if ( isset( $data[
'xmp-special'][
'LocationCreated'][0] )
212 && is_array( $data[
'xmp-special'][
'LocationCreated'][0] )
216 foreach ( $data[
'xmp-special'][
'LocationCreated']
as $loc ) {
217 if ( !is_array( $loc ) ) {
221 foreach ( $loc
as $field => $val ) {
222 $data[
'xmp-general'][$field .
'Created'][] = $val;
229 unset( $data[
'xmp-special'] );
232 if ( isset( $data[
'xmp-exif'][
'GPSAltitudeRef'] )
233 && isset( $data[
'xmp-exif'][
'GPSAltitude'] )
238 list( $nom, $denom ) = explode(
'/', $data[
'xmp-exif'][
'GPSAltitude'] );
239 $data[
'xmp-exif'][
'GPSAltitude'] = $nom / $denom;
241 if ( $data[
'xmp-exif'][
'GPSAltitudeRef'] ==
'1' ) {
242 $data[
'xmp-exif'][
'GPSAltitude'] *= -1;
244 unset( $data[
'xmp-exif'][
'GPSAltitudeRef'] );
263 public function parse( $content, $allOfIt =
true, $reset =
false ) {
271 if ( !$this->charset ) {
273 if ( preg_match(
'/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/',
278 $this->charset =
'UTF-16BE';
281 $this->charset =
'UTF-16LE';
283 case "\x00\x00\xFE\xFF":
284 $this->charset =
'UTF-32BE';
286 case "\xFF\xFE\x00\x00":
287 $this->charset =
'UTF-32LE';
290 $this->charset =
'UTF-8';
298 $this->charset =
'UTF-8';
301 if ( $this->charset !==
'UTF-8' ) {
304 $content = iconv( $this->charset,
'UTF-8//IGNORE', $content );
308 $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
310 $error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
311 $where =
'line: ' . xml_get_current_line_number( $this->xmlParser )
312 .
' column: ' . xml_get_current_column_number( $this->xmlParser )
313 .
' byte offset: ' . xml_get_current_byte_index( $this->xmlParser );
315 wfDebugLog(
'XMP',
"XMPReader::parse : Error reading XMP content: $error ($where)" );
316 $this->results =
array();
321 $this->results =
array();
339 $guid = substr( $content, 0, 32 );
340 if ( !isset( $this->results[
'xmp-special'][
'HasExtendedXMP'] )
341 || $this->results[
'xmp-special'][
'HasExtendedXMP'] !== $guid
344 " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
348 $len = unpack(
'Nlength/Noffset', substr( $content, 32, 8 ) );
350 if ( !$len || $len[
'length'] < 4 || $len[
'offset'] < 0 || $len[
'offset'] > $len[
'length'] ) {
351 wfDebugLog(
'XMP', __METHOD__ .
'Error reading extended XMP block, invalid length or offset.' );
367 if ( $len[
'offset'] !== $this->extendedXMPOffset ) {
368 wfDebugLog(
'XMP', __METHOD__ .
'Ignoring XMPExtended block due to wrong order. (Offset was '
369 . $len[
'offset'] .
' but expected ' . $this->extendedXMPOffset .
')' );
374 if ( $len[
'offset'] === 0 ) {
380 $this->extendedXMPOffset += $len[
'length'];
382 $actualContent = substr( $content, 40 );
384 if ( $this->extendedXMPOffset === strlen( $actualContent ) ) {
390 wfDebugLog(
'XMP', __METHOD__ .
'Parsing a XMPExtended block' );
392 return $this->
parse( $actualContent, $atEnd );
413 $data = trim( $data );
414 if ( trim( $data ) ===
"" ) {
418 if ( !isset( $this->mode[0] ) ) {
419 throw new MWException(
'Unexpected character data before first rdf:Description element' );
422 if ( $this->mode[0] === self::MODE_IGNORE ) {
426 if ( $this->mode[0] !== self::MODE_SIMPLE
427 && $this->mode[0] !== self::MODE_QDESC
429 throw new MWException(
'character data where not expected. (mode ' . $this->mode[0] .
')' );
433 if ( $this->charContent ===
false ) {
434 $this->charContent = $data;
436 $this->charContent .= $data;
447 if ( $this->curItem[0] === $elm ) {
448 array_shift( $this->curItem );
449 array_shift( $this->mode );
469 if ( $this->charContent !==
false ) {
470 if ( $this->processingArray ) {
473 list( $ns, $tag ) = explode(
' ', $this->curItem[0], 2 );
475 list( $ns, $tag ) = explode(
' ', $elm, 2 );
477 $this->
saveValue( $ns, $tag, $this->charContent );
479 $this->charContent =
false;
481 array_shift( $this->curItem );
482 array_shift( $this->mode );
507 if ( $this->curItem[0] !== $elm
508 && !( $elm === self::NS_RDF .
' Description'
509 && $this->mode[0] === self::MODE_STRUCT )
511 throw new MWException(
"nesting mismatch. got a </$elm> but expected a </" .
512 $this->curItem[0] .
'>' );
516 list( $ns, $tag ) = explode(
' ', $elm, 2 );
517 if ( isset( $this->items[$ns][$tag][
'validate'] ) ) {
519 $info =& $this->items[$ns][$tag];
520 $finalName = isset( $info[
'map_name'] )
521 ? $info[
'map_name'] : $tag;
523 $validate = is_array( $info[
'validate'] ) ? $info[
'validate']
524 :
array(
'XMPValidate', $info[
'validate'] );
526 if ( !isset( $this->results[
'xmp-' . $info[
'map_group']][$finalName] ) ) {
528 wfDebugLog(
'XMP', __METHOD__ .
" <$ns:$tag> has no valid members." );
529 } elseif ( is_callable( $validate ) ) {
530 $val =& $this->results[
'xmp-' . $info[
'map_group']][$finalName];
531 call_user_func_array( $validate,
array( $info, &$val,
false ) );
532 if ( is_null( $val ) ) {
535 wfDebugLog(
'XMP', __METHOD__ .
" <$ns:$tag> failed validation." );
536 unset( $this->results[
'xmp-' . $info[
'map_group']][$finalName] );
539 wfDebugLog(
'XMP', __METHOD__ .
" Validation function for $finalName ("
540 . $validate[0] .
'::' . $validate[1] .
'()) is not callable.' );
544 array_shift( $this->curItem );
545 array_shift( $this->mode );
546 $this->ancestorStruct =
false;
547 $this->processingArray =
false;
548 $this->itemLang =
false;
572 list( $ns, $tag ) = explode(
' ', $this->curItem[0], 2 );
573 $info = $this->items[$ns][$tag];
574 $finalName = isset( $info[
'map_name'] )
575 ? $info[
'map_name'] : $tag;
577 array_shift( $this->mode );
579 if ( !isset( $this->results[
'xmp-' . $info[
'map_group']][$finalName] ) ) {
580 wfDebugLog(
'XMP', __METHOD__ .
" Empty compund element $finalName." );
585 if ( $elm === self::NS_RDF .
' Seq' ) {
586 $this->results[
'xmp-' . $info[
'map_group']][$finalName][
'_type'] =
'ol';
587 } elseif ( $elm === self::NS_RDF .
' Bag' ) {
588 $this->results[
'xmp-' . $info[
'map_group']][$finalName][
'_type'] =
'ul';
589 } elseif ( $elm === self::NS_RDF .
' Alt' ) {
591 if ( $info[
'mode'] === self::MODE_LANG ) {
592 $this->results[
'xmp-' . $info[
'map_group']][$finalName][
'_type'] =
'lang';
595 throw new MWException( __METHOD__ .
" expected </rdf:seq> or </rdf:bag> but instead got $elm." );
611 if ( $elm === self::NS_RDF .
' value' ) {
612 list( $ns, $tag ) = explode(
' ', $this->curItem[0], 2 );
613 $this->
saveValue( $ns, $tag, $this->charContent );
617 array_shift( $this->mode );
618 array_shift( $this->curItem );
636 if ( $elm === ( self::NS_RDF .
' RDF' )
637 || $elm ===
'adobe:ns:meta/ xmpmeta'
638 || $elm ===
'adobe:ns:meta/ xapmeta'
644 if ( $elm === self::NS_RDF .
' type' ) {
647 wfDebugLog(
'XMP', __METHOD__ .
' encountered <rdf:type>' );
650 if ( strpos( $elm,
' ' ) ===
false ) {
655 wfDebugLog(
'XMP', __METHOD__ .
" Encountered </$elm> which has no namespace. Skipping." );
660 if ( count( $this->mode[0] ) === 0 ) {
663 throw new MWException(
'Encountered end element with no mode' );
666 if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
669 throw new MWException(
"Hit end element </$elm> but no curItem" );
672 switch ( $this->mode[0] ) {
687 if ( $elm === self::NS_RDF .
' Description' ) {
688 array_shift( $this->mode );
690 throw new MWException(
'Element ended unexpectedly while in MODE_INITIAL' );
701 wfDebugLog(
'XMP', __METHOD__ .
" no mode (elm = $elm)" );
718 if ( $elm === $this->curItem[0] ) {
719 array_unshift( $this->curItem, $elm );
720 array_unshift( $this->mode, self::MODE_IGNORE );
732 if ( $elm === self::NS_RDF .
' Bag' ) {
733 array_unshift( $this->mode, self::MODE_LI );
747 if ( $elm === self::NS_RDF .
' Seq' ) {
748 array_unshift( $this->mode, self::MODE_LI );
749 } elseif ( $elm === self::NS_RDF .
' Bag' ) {
751 wfDebugLog(
'XMP', __METHOD__ .
' Expected an rdf:Seq, but got an rdf:Bag. Pretending'
752 .
' it is a Seq, since some buggy software is known to screw this up.' );
753 array_unshift( $this->mode, self::MODE_LI );
755 throw new MWException(
"Expected <rdf:Seq> but got $elm." );
774 if ( $elm === self::NS_RDF .
' Alt' ) {
775 array_unshift( $this->mode, self::MODE_LI_LANG );
777 throw new MWException(
"Expected <rdf:Seq> but got $elm." );
800 if ( $elm === self::NS_RDF .
' Description' ) {
802 array_unshift( $this->mode, self::MODE_QDESC );
803 array_unshift( $this->curItem, $this->curItem[0] );
805 if ( isset(
$attribs[self::NS_RDF .
' value'] ) ) {
806 list( $ns, $tag ) = explode(
' ', $this->curItem[0], 2 );
809 } elseif ( $elm === self::NS_RDF .
' value' ) {
811 throw new MWException( __METHOD__ .
' Encountered <rdf:value> where it was unexpected.' );
815 " Encountered element <$elm> where only expecting character data as value of " .
817 array_unshift( $this->mode, self::MODE_IGNORE );
818 array_unshift( $this->curItem, $elm );
837 if ( $elm === self::NS_RDF .
' value' ) {
841 array_unshift( $this->mode, self::MODE_IGNORE );
842 array_unshift( $this->curItem, $elm );
859 if ( $ns !== self::NS_RDF ) {
861 if ( isset( $this->items[$ns][$tag] ) ) {
862 if ( isset( $this->items[$ns][$tag][
'structPart'] ) ) {
868 wfDebugLog(
'XMP',
"Encountered <$ns:$tag> outside"
869 .
" of its expected parent. Ignoring." );
871 array_unshift( $this->mode, self::MODE_IGNORE );
872 array_unshift( $this->curItem, $ns .
' ' . $tag );
876 $mode = $this->items[$ns][$tag][
'mode'];
877 array_unshift( $this->mode,
$mode );
878 array_unshift( $this->curItem, $ns .
' ' . $tag );
879 if (
$mode === self::MODE_STRUCT ) {
880 $this->ancestorStruct = isset( $this->items[$ns][$tag][
'map_name'] )
881 ? $this->items[$ns][$tag][
'map_name'] : $tag;
883 if ( $this->charContent !==
false ) {
886 throw new MWException(
'tag nested in non-whitespace characters.' );
890 wfDebugLog(
'XMP', __METHOD__ .
" Ignoring unrecognized element <$ns:$tag>." );
891 array_unshift( $this->mode, self::MODE_IGNORE );
892 array_unshift( $this->curItem, $ns .
' ' . $tag );
921 if ( $ns !== self::NS_RDF ) {
923 if ( isset( $this->items[$ns][$tag] ) ) {
924 if ( isset( $this->items[$ns][$this->ancestorStruct][
'children'] )
925 && !isset( $this->items[$ns][$this->ancestorStruct][
'children'][$tag] )
929 throw new MWException(
" <$tag> appeared nested in <" . $this->ancestorStruct
930 .
"> where it is not allowed." );
932 array_unshift( $this->mode, $this->items[$ns][$tag][
'mode'] );
933 array_unshift( $this->curItem, $ns .
' ' . $tag );
934 if ( $this->charContent !==
false ) {
937 throw new MWException(
"tag <$tag> nested in non-whitespace characters (" .
938 $this->charContent .
")." );
941 array_unshift( $this->mode, self::MODE_IGNORE );
942 array_unshift( $this->curItem, $elm );
948 if ( $ns === self::NS_RDF && $tag ===
'Description' ) {
950 array_unshift( $this->mode, self::MODE_STRUCT );
951 array_unshift( $this->curItem, $this->curItem[0] );
969 if ( ( $elm ) !== self::NS_RDF .
' li' ) {
970 throw new MWException(
"<rdf:li> expected but got $elm." );
973 if ( !isset( $this->mode[1] ) ) {
976 throw new MWException(
'In mode Li, but no 2xPrevious mode!' );
979 if ( $this->mode[1] === self::MODE_BAGSTRUCT ) {
981 array_unshift( $this->mode, self::MODE_STRUCT );
982 array_unshift( $this->curItem, $elm );
983 $this->processingArray =
true;
985 if ( !isset( $this->curItem[1] ) ) {
987 throw new MWException(
'Can not find parent of BAGSTRUCT.' );
989 list( $curNS, $curTag ) = explode(
' ', $this->curItem[1] );
990 $this->ancestorStruct = isset( $this->items[$curNS][$curTag][
'map_name'] )
991 ? $this->items[$curNS][$curTag][
'map_name'] : $curTag;
996 array_unshift( $this->mode, self::MODE_SIMPLE );
999 array_unshift( $this->curItem, $this->curItem[0] );
1000 $this->processingArray =
true;
1019 if ( $elm !== self::NS_RDF .
' li' ) {
1020 throw new MWException( __METHOD__ .
" <rdf:li> expected but got $elm." );
1022 if ( !isset(
$attribs[self::NS_XML .
' lang'] )
1023 || !preg_match(
'/^[-A-Za-z0-9]{2,}$/D',
$attribs[self::NS_XML .
' lang'] )
1026 .
" <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
1030 $this->itemLang = strtolower(
$attribs[self::NS_XML .
' lang'] );
1034 array_unshift( $this->curItem, $this->curItem[0] );
1035 array_unshift( $this->mode, self::MODE_SIMPLE );
1036 $this->processingArray =
true;
1051 if ( $elm === self::NS_RDF .
' RDF'
1052 || $elm ===
'adobe:ns:meta/ xmpmeta'
1053 || $elm ===
'adobe:ns:meta/ xapmeta'
1057 } elseif ( $elm === self::NS_RDF .
' Description' ) {
1058 if ( count( $this->mode ) === 0 ) {
1060 array_unshift( $this->mode, self::MODE_INITIAL );
1062 } elseif ( $elm === self::NS_RDF .
' type' ) {
1070 wfDebugLog(
'XMP', __METHOD__ .
' Encountered <rdf:type> which isn\'t currently supported' );
1073 if ( strpos( $elm,
' ' ) ===
false ) {
1075 wfDebugLog(
'XMP', __METHOD__ .
" Encountered <$elm> which has no namespace. Skipping." );
1080 list( $ns, $tag ) = explode(
' ', $elm, 2 );
1082 if ( count( $this->mode ) === 0 ) {
1085 .
"encountered <$elm> with no mode" );
1088 switch ( $this->mode[0] ) {
1121 throw new MWException(
'StartElement in unknown mode: ' . $this->mode[0] );
1146 if ( isset(
$attribs[self::NS_RDF .
' parseType'] )
1147 &&
$attribs[self::NS_RDF .
' parseType'] ===
'Resource'
1148 && $this->mode[0] === self::MODE_SIMPLE
1154 if ( strpos(
$name,
' ' ) ===
false ) {
1157 wfDebugLog(
'XMP', __METHOD__ .
' Encountered non-namespaced attribute: '
1158 .
" $name=\"$val\". Skipping. " );
1161 list( $ns, $tag ) = explode(
' ',
$name, 2 );
1162 if ( $ns === self::NS_RDF ) {
1163 if ( $tag ===
'value' || $tag ===
'resource' ) {
1166 $this->
char( $this->xmlParser, $val );
1168 } elseif ( isset( $this->items[$ns][$tag] ) ) {
1169 if ( $this->mode[0] === self::MODE_SIMPLE ) {
1171 .
" $ns:$tag found as attribute where not allowed" );
1175 wfDebugLog(
'XMP', __METHOD__ .
" Ignoring unrecognized element <$ns:$tag>." );
1191 private function saveValue( $ns, $tag, $val ) {
1193 $info =& $this->items[$ns][$tag];
1194 $finalName = isset( $info[
'map_name'] )
1195 ? $info[
'map_name'] : $tag;
1196 if ( isset( $info[
'validate'] ) ) {
1197 $validate = is_array( $info[
'validate'] ) ? $info[
'validate']
1198 :
array(
'XMPValidate', $info[
'validate'] );
1200 if ( is_callable( $validate ) ) {
1201 call_user_func_array( $validate,
array( $info, &$val,
true ) );
1204 if ( is_null( $val ) ) {
1205 wfDebugLog(
'XMP', __METHOD__ .
" <$ns:$tag> failed validation." );
1210 wfDebugLog(
'XMP', __METHOD__ .
" Validation function for $finalName ("
1211 . $validate[0] .
'::' . $validate[1] .
'()) is not callable.' );
1215 if ( $this->ancestorStruct && $this->processingArray ) {
1218 } elseif ( $this->ancestorStruct ) {
1220 } elseif ( $this->processingArray ) {
1221 if ( $this->itemLang ===
false ) {
1223 $this->results[
'xmp-' . $info[
'map_group']][$finalName][] = $val;
1226 $this->results[
'xmp-' . $info[
'map_group']][$finalName][
$this->itemLang] = $val;
1229 $this->results[
'xmp-' . $info[
'map_group']][$finalName] = $val;