MediaWiki  1.23.0
XMP.php
Go to the documentation of this file.
1 <?php
49 class XMPReader {
51  protected $items;
52 
54  private $curItem = array();
55 
57  private $ancestorStruct = false;
58 
60  private $charContent = false;
61 
63  private $mode = array();
64 
66  private $results = array();
67 
69  private $processingArray = false;
70 
72  private $itemLang = false;
73 
75  private $xmlParser;
76 
78  private $charset = false;
79 
81  private $extendedXMPOffset = 0;
82 
92  const MODE_INITIAL = 0;
93  const MODE_IGNORE = 1;
94  const MODE_LI = 2;
95  const MODE_LI_LANG = 3;
96  const MODE_QDESC = 4;
97 
98  // The following MODE constants are also used in the
99  // $items array to denote what type of property the item is.
100  const MODE_SIMPLE = 10;
101  const MODE_STRUCT = 11; // structure (associative array)
102  const MODE_SEQ = 12; // ordered list
103  const MODE_BAG = 13; // unordered list
104  const MODE_LANG = 14;
105  const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
106  const MODE_BAGSTRUCT = 16; // A BAG of Structs.
107 
108  const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
109  const NS_XML = 'http://www.w3.org/XML/1998/namespace';
110 
116  function __construct() {
117 
118  if ( !function_exists( 'xml_parser_create_ns' ) ) {
119  // this should already be checked by this point
120  throw new MWException( 'XMP support requires XML Parser' );
121  }
122 
123  $this->items = XMPInfo::getItems();
124 
125  $this->resetXMLParser();
126  }
127 
132  private function resetXMLParser() {
133 
134  if ( $this->xmlParser ) {
135  //is this needed?
136  xml_parser_free( $this->xmlParser );
137  }
138 
139  $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' );
140  xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
141  xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
142 
143  xml_set_element_handler( $this->xmlParser,
144  array( $this, 'startElement' ),
145  array( $this, 'endElement' ) );
146 
147  xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
148  }
149 
154  function __destruct() {
155  // not sure if this is needed.
156  xml_parser_free( $this->xmlParser );
157  }
158 
165  public function getResults() {
166  // xmp-special is for metadata that affects how stuff
167  // is extracted. For example xmpNote:HasExtendedXMP.
168 
169  // It is also used to handle photoshop:AuthorsPosition
170  // which is weird and really part of another property,
171  // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard.
172  // The location fields also use it.
173 
174  $data = $this->results;
175 
176  wfRunHooks( 'XMPGetResults', array( &$data ) );
177 
178  if ( isset( $data['xmp-special']['AuthorsPosition'] )
179  && is_string( $data['xmp-special']['AuthorsPosition'] )
180  && isset( $data['xmp-general']['Artist'][0] )
181  ) {
182  // Note, if there is more than one creator,
183  // this only applies to first. This also will
184  // only apply to the dc:Creator prop, not the
185  // exif:Artist prop.
186 
187  $data['xmp-general']['Artist'][0] =
188  $data['xmp-special']['AuthorsPosition'] . ', '
189  . $data['xmp-general']['Artist'][0];
190  }
191 
192  // Go through the LocationShown and LocationCreated
193  // changing it to the non-hierarchal form used by
194  // the other location fields.
195 
196  if ( isset( $data['xmp-special']['LocationShown'][0] )
197  && is_array( $data['xmp-special']['LocationShown'][0] )
198  ) {
199  // the is_array is just paranoia. It should always
200  // be an array.
201  foreach ( $data['xmp-special']['LocationShown'] as $loc ) {
202  if ( !is_array( $loc ) ) {
203  // To avoid copying over the _type meta-fields.
204  continue;
205  }
206  foreach ( $loc as $field => $val ) {
207  $data['xmp-general'][$field . 'Dest'][] = $val;
208  }
209  }
210  }
211  if ( isset( $data['xmp-special']['LocationCreated'][0] )
212  && is_array( $data['xmp-special']['LocationCreated'][0] )
213  ) {
214  // the is_array is just paranoia. It should always
215  // be an array.
216  foreach ( $data['xmp-special']['LocationCreated'] as $loc ) {
217  if ( !is_array( $loc ) ) {
218  // To avoid copying over the _type meta-fields.
219  continue;
220  }
221  foreach ( $loc as $field => $val ) {
222  $data['xmp-general'][$field . 'Created'][] = $val;
223  }
224  }
225  }
226 
227  // We don't want to return the special values, since they're
228  // special and not info to be stored about the file.
229  unset( $data['xmp-special'] );
230 
231  // Convert GPSAltitude to negative if below sea level.
232  if ( isset( $data['xmp-exif']['GPSAltitudeRef'] )
233  && isset( $data['xmp-exif']['GPSAltitude'] )
234  ) {
235 
236  // Must convert to a real before multiplying by -1
237  // XMPValidate guarantees there will always be a '/' in this value.
238  list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] );
239  $data['xmp-exif']['GPSAltitude'] = $nom / $denom;
240 
241  if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) {
242  $data['xmp-exif']['GPSAltitude'] *= -1;
243  }
244  unset( $data['xmp-exif']['GPSAltitudeRef'] );
245  }
246 
247  return $data;
248  }
249 
263  public function parse( $content, $allOfIt = true, $reset = false ) {
264  if ( $reset ) {
265  $this->resetXMLParser();
266  }
267  try {
268 
269  // detect encoding by looking for BOM which is supposed to be in processing instruction.
270  // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf
271  if ( !$this->charset ) {
272  $bom = array();
273  if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/',
274  $content, $bom )
275  ) {
276  switch ( $bom[0] ) {
277  case "\xFE\xFF":
278  $this->charset = 'UTF-16BE';
279  break;
280  case "\xFF\xFE":
281  $this->charset = 'UTF-16LE';
282  break;
283  case "\x00\x00\xFE\xFF":
284  $this->charset = 'UTF-32BE';
285  break;
286  case "\xFF\xFE\x00\x00":
287  $this->charset = 'UTF-32LE';
288  break;
289  case "\xEF\xBB\xBF":
290  $this->charset = 'UTF-8';
291  break;
292  default:
293  //this should be impossible to get to
294  throw new MWException( "Invalid BOM" );
295  }
296  } else {
297  // standard specifically says, if no bom assume utf-8
298  $this->charset = 'UTF-8';
299  }
300  }
301  if ( $this->charset !== 'UTF-8' ) {
302  //don't convert if already utf-8
304  $content = iconv( $this->charset, 'UTF-8//IGNORE', $content );
306  }
307 
308  $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
309  if ( !$ok ) {
310  $error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
311  $where = 'line: ' . xml_get_current_line_number( $this->xmlParser )
312  . ' column: ' . xml_get_current_column_number( $this->xmlParser )
313  . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser );
314 
315  wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" );
316  $this->results = array(); // blank if error.
317  return false;
318  }
319  } catch ( MWException $e ) {
320  wfDebugLog( 'XMP', 'XMP parse error: ' . $e );
321  $this->results = array();
322 
323  return false;
324  }
325 
326  return true;
327  }
328 
336  public function parseExtended( $content ) {
337  // @todo FIXME: This is untested. Hard to find example files
338  // or programs that make such files..
339  $guid = substr( $content, 0, 32 );
340  if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
341  || $this->results['xmp-special']['HasExtendedXMP'] !== $guid
342  ) {
343  wfDebugLog( 'XMP', __METHOD__ .
344  " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
345 
346  return false;
347  }
348  $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
349 
350  if ( !$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) {
351  wfDebugLog( 'XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' );
352 
353  return false;
354  }
355 
356  // we're not very robust here. we should accept it in the wrong order.
357  // To quote the XMP standard:
358  // "A JPEG writer should write the ExtendedXMP marker segments in order,
359  // immediately following the StandardXMP. However, the JPEG standard
360  // does not require preservation of marker segment order. A robust JPEG
361  // reader should tolerate the marker segments in any order."
362  //
363  // otoh the probability that an image will have more than 128k of
364  // metadata is rather low... so the probability that it will have
365  // > 128k, and be in the wrong order is very low...
366 
367  if ( $len['offset'] !== $this->extendedXMPOffset ) {
368  wfDebugLog( 'XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
369  . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' );
370 
371  return false;
372  }
373 
374  if ( $len['offset'] === 0 ) {
375  // if we're starting the extended block, we've probably already
376  // done the XMPStandard block, so reset.
377  $this->resetXMLParser();
378  }
379 
380  $this->extendedXMPOffset += $len['length'];
381 
382  $actualContent = substr( $content, 40 );
383 
384  if ( $this->extendedXMPOffset === strlen( $actualContent ) ) {
385  $atEnd = true;
386  } else {
387  $atEnd = false;
388  }
389 
390  wfDebugLog( 'XMP', __METHOD__ . 'Parsing a XMPExtended block' );
391 
392  return $this->parse( $actualContent, $atEnd );
393  }
394 
411  function char( $parser, $data ) {
412 
413  $data = trim( $data );
414  if ( trim( $data ) === "" ) {
415  return;
416  }
417 
418  if ( !isset( $this->mode[0] ) ) {
419  throw new MWException( 'Unexpected character data before first rdf:Description element' );
420  }
421 
422  if ( $this->mode[0] === self::MODE_IGNORE ) {
423  return;
424  }
425 
426  if ( $this->mode[0] !== self::MODE_SIMPLE
427  && $this->mode[0] !== self::MODE_QDESC
428  ) {
429  throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' );
430  }
431 
432  // to check, how does this handle w.s.
433  if ( $this->charContent === false ) {
434  $this->charContent = $data;
435  } else {
436  $this->charContent .= $data;
437  }
438  }
439 
446  private function endElementModeIgnore( $elm ) {
447  if ( $this->curItem[0] === $elm ) {
448  array_shift( $this->curItem );
449  array_shift( $this->mode );
450  }
451  }
452 
468  private function endElementModeSimple( $elm ) {
469  if ( $this->charContent !== false ) {
470  if ( $this->processingArray ) {
471  // if we're processing an array, use the original element
472  // name instead of rdf:li.
473  list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
474  } else {
475  list( $ns, $tag ) = explode( ' ', $elm, 2 );
476  }
477  $this->saveValue( $ns, $tag, $this->charContent );
478 
479  $this->charContent = false; // reset
480  }
481  array_shift( $this->curItem );
482  array_shift( $this->mode );
483  }
484 
503  private function endElementNested( $elm ) {
504 
505  /* cur item must be the same as $elm, unless if in MODE_STRUCT
506  in which case it could also be rdf:Description */
507  if ( $this->curItem[0] !== $elm
508  && !( $elm === self::NS_RDF . ' Description'
509  && $this->mode[0] === self::MODE_STRUCT )
510  ) {
511  throw new MWException( "nesting mismatch. got a </$elm> but expected a </" .
512  $this->curItem[0] . '>' );
513  }
514 
515  // Validate structures.
516  list( $ns, $tag ) = explode( ' ', $elm, 2 );
517  if ( isset( $this->items[$ns][$tag]['validate'] ) ) {
518 
519  $info =& $this->items[$ns][$tag];
520  $finalName = isset( $info['map_name'] )
521  ? $info['map_name'] : $tag;
522 
523  $validate = is_array( $info['validate'] ) ? $info['validate']
524  : array( 'XMPValidate', $info['validate'] );
525 
526  if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
527  // This can happen if all the members of the struct failed validation.
528  wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." );
529  } elseif ( is_callable( $validate ) ) {
530  $val =& $this->results['xmp-' . $info['map_group']][$finalName];
531  call_user_func_array( $validate, array( $info, &$val, false ) );
532  if ( is_null( $val ) ) {
533  // the idea being the validation function will unset the variable if
534  // its invalid.
535  wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
536  unset( $this->results['xmp-' . $info['map_group']][$finalName] );
537  }
538  } else {
539  wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
540  . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
541  }
542  }
543 
544  array_shift( $this->curItem );
545  array_shift( $this->mode );
546  $this->ancestorStruct = false;
547  $this->processingArray = false;
548  $this->itemLang = false;
549  }
550 
570  private function endElementModeLi( $elm ) {
571 
572  list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
573  $info = $this->items[$ns][$tag];
574  $finalName = isset( $info['map_name'] )
575  ? $info['map_name'] : $tag;
576 
577  array_shift( $this->mode );
578 
579  if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
580  wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." );
581 
582  return;
583  }
584 
585  if ( $elm === self::NS_RDF . ' Seq' ) {
586  $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol';
587  } elseif ( $elm === self::NS_RDF . ' Bag' ) {
588  $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul';
589  } elseif ( $elm === self::NS_RDF . ' Alt' ) {
590  // extra if needed as you could theoretically have a non-language alt.
591  if ( $info['mode'] === self::MODE_LANG ) {
592  $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
593  }
594  } else {
595  throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
596  }
597  }
598 
609  private function endElementModeQDesc( $elm ) {
610 
611  if ( $elm === self::NS_RDF . ' value' ) {
612  list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
613  $this->saveValue( $ns, $tag, $this->charContent );
614 
615  return;
616  } else {
617  array_shift( $this->mode );
618  array_shift( $this->curItem );
619  }
620  }
621 
635  function endElement( $parser, $elm ) {
636  if ( $elm === ( self::NS_RDF . ' RDF' )
637  || $elm === 'adobe:ns:meta/ xmpmeta'
638  || $elm === 'adobe:ns:meta/ xapmeta'
639  ) {
640  // ignore these.
641  return;
642  }
643 
644  if ( $elm === self::NS_RDF . ' type' ) {
645  // these aren't really supported properly yet.
646  // However, it appears they almost never used.
647  wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' );
648  }
649 
650  if ( strpos( $elm, ' ' ) === false ) {
651  // This probably shouldn't happen.
652  // However, there is a bug in an adobe product
653  // that forgets the namespace on some things.
654  // (Luckily they are unimportant things).
655  wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
656 
657  return;
658  }
659 
660  if ( count( $this->mode[0] ) === 0 ) {
661  // This should never ever happen and means
662  // there is a pretty major bug in this class.
663  throw new MWException( 'Encountered end element with no mode' );
664  }
665 
666  if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
667  // just to be paranoid. Should always have a curItem, except for initially
668  // (aka during MODE_INITAL).
669  throw new MWException( "Hit end element </$elm> but no curItem" );
670  }
671 
672  switch ( $this->mode[0] ) {
673  case self::MODE_IGNORE:
674  $this->endElementModeIgnore( $elm );
675  break;
676  case self::MODE_SIMPLE:
677  $this->endElementModeSimple( $elm );
678  break;
679  case self::MODE_STRUCT:
680  case self::MODE_SEQ:
681  case self::MODE_BAG:
682  case self::MODE_LANG:
684  $this->endElementNested( $elm );
685  break;
686  case self::MODE_INITIAL:
687  if ( $elm === self::NS_RDF . ' Description' ) {
688  array_shift( $this->mode );
689  } else {
690  throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' );
691  }
692  break;
693  case self::MODE_LI:
694  case self::MODE_LI_LANG:
695  $this->endElementModeLi( $elm );
696  break;
697  case self::MODE_QDESC:
698  $this->endElementModeQDesc( $elm );
699  break;
700  default:
701  wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" );
702  break;
703  }
704  }
705 
717  private function startElementModeIgnore( $elm ) {
718  if ( $elm === $this->curItem[0] ) {
719  array_unshift( $this->curItem, $elm );
720  array_unshift( $this->mode, self::MODE_IGNORE );
721  }
722  }
723 
731  private function startElementModeBag( $elm ) {
732  if ( $elm === self::NS_RDF . ' Bag' ) {
733  array_unshift( $this->mode, self::MODE_LI );
734  } else {
735  throw new MWException( "Expected <rdf:Bag> but got $elm." );
736  }
737  }
738 
746  private function startElementModeSeq( $elm ) {
747  if ( $elm === self::NS_RDF . ' Seq' ) {
748  array_unshift( $this->mode, self::MODE_LI );
749  } elseif ( $elm === self::NS_RDF . ' Bag' ) {
750  # bug 27105
751  wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending'
752  . ' it is a Seq, since some buggy software is known to screw this up.' );
753  array_unshift( $this->mode, self::MODE_LI );
754  } else {
755  throw new MWException( "Expected <rdf:Seq> but got $elm." );
756  }
757  }
758 
773  private function startElementModeLang( $elm ) {
774  if ( $elm === self::NS_RDF . ' Alt' ) {
775  array_unshift( $this->mode, self::MODE_LI_LANG );
776  } else {
777  throw new MWException( "Expected <rdf:Seq> but got $elm." );
778  }
779  }
780 
799  private function startElementModeSimple( $elm, $attribs ) {
800  if ( $elm === self::NS_RDF . ' Description' ) {
801  // If this value has qualifiers
802  array_unshift( $this->mode, self::MODE_QDESC );
803  array_unshift( $this->curItem, $this->curItem[0] );
804 
805  if ( isset( $attribs[self::NS_RDF . ' value'] ) ) {
806  list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
807  $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] );
808  }
809  } elseif ( $elm === self::NS_RDF . ' value' ) {
810  // This should not be here.
811  throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
812  } else {
813  // something else we don't recognize, like a qualifier maybe.
814  wfDebugLog( 'XMP', __METHOD__ .
815  " Encountered element <$elm> where only expecting character data as value of " .
816  $this->curItem[0] );
817  array_unshift( $this->mode, self::MODE_IGNORE );
818  array_unshift( $this->curItem, $elm );
819  }
820  }
821 
836  private function startElementModeQDesc( $elm ) {
837  if ( $elm === self::NS_RDF . ' value' ) {
838  return; // do nothing
839  } else {
840  // otherwise its a qualifier, which we ignore
841  array_unshift( $this->mode, self::MODE_IGNORE );
842  array_unshift( $this->curItem, $elm );
843  }
844  }
845 
858  private function startElementModeInitial( $ns, $tag, $attribs ) {
859  if ( $ns !== self::NS_RDF ) {
860 
861  if ( isset( $this->items[$ns][$tag] ) ) {
862  if ( isset( $this->items[$ns][$tag]['structPart'] ) ) {
863  // If this element is supposed to appear only as
864  // a child of a structure, but appears here (not as
865  // a child of a struct), then something weird is
866  // happening, so ignore this element and its children.
867 
868  wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside"
869  . " of its expected parent. Ignoring." );
870 
871  array_unshift( $this->mode, self::MODE_IGNORE );
872  array_unshift( $this->curItem, $ns . ' ' . $tag );
873 
874  return;
875  }
876  $mode = $this->items[$ns][$tag]['mode'];
877  array_unshift( $this->mode, $mode );
878  array_unshift( $this->curItem, $ns . ' ' . $tag );
879  if ( $mode === self::MODE_STRUCT ) {
880  $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] )
881  ? $this->items[$ns][$tag]['map_name'] : $tag;
882  }
883  if ( $this->charContent !== false ) {
884  // Something weird.
885  // Should not happen in valid XMP.
886  throw new MWException( 'tag nested in non-whitespace characters.' );
887  }
888  } else {
889  // This element is not on our list of allowed elements so ignore.
890  wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
891  array_unshift( $this->mode, self::MODE_IGNORE );
892  array_unshift( $this->curItem, $ns . ' ' . $tag );
893 
894  return;
895  }
896  }
897  // process attributes
898  $this->doAttribs( $attribs );
899  }
900 
920  private function startElementModeStruct( $ns, $tag, $attribs ) {
921  if ( $ns !== self::NS_RDF ) {
922 
923  if ( isset( $this->items[$ns][$tag] ) ) {
924  if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
925  && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] )
926  ) {
927  // This assumes that we don't have inter-namespace nesting
928  // which we don't in all the properties we're interested in.
929  throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
930  . "> where it is not allowed." );
931  }
932  array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] );
933  array_unshift( $this->curItem, $ns . ' ' . $tag );
934  if ( $this->charContent !== false ) {
935  // Something weird.
936  // Should not happen in valid XMP.
937  throw new MWException( "tag <$tag> nested in non-whitespace characters (" .
938  $this->charContent . ")." );
939  }
940  } else {
941  array_unshift( $this->mode, self::MODE_IGNORE );
942  array_unshift( $this->curItem, $elm );
943 
944  return;
945  }
946  }
947 
948  if ( $ns === self::NS_RDF && $tag === 'Description' ) {
949  $this->doAttribs( $attribs );
950  array_unshift( $this->mode, self::MODE_STRUCT );
951  array_unshift( $this->curItem, $this->curItem[0] );
952  }
953  }
954 
968  private function startElementModeLi( $elm, $attribs ) {
969  if ( ( $elm ) !== self::NS_RDF . ' li' ) {
970  throw new MWException( "<rdf:li> expected but got $elm." );
971  }
972 
973  if ( !isset( $this->mode[1] ) ) {
974  // This should never ever ever happen. Checking for it
975  // to be paranoid.
976  throw new MWException( 'In mode Li, but no 2xPrevious mode!' );
977  }
978 
979  if ( $this->mode[1] === self::MODE_BAGSTRUCT ) {
980  // This list item contains a compound (STRUCT) value.
981  array_unshift( $this->mode, self::MODE_STRUCT );
982  array_unshift( $this->curItem, $elm );
983  $this->processingArray = true;
984 
985  if ( !isset( $this->curItem[1] ) ) {
986  // be paranoid.
987  throw new MWException( 'Can not find parent of BAGSTRUCT.' );
988  }
989  list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] );
990  $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] )
991  ? $this->items[$curNS][$curTag]['map_name'] : $curTag;
992 
993  $this->doAttribs( $attribs );
994  } else {
995  // Normal BAG or SEQ containing simple values.
996  array_unshift( $this->mode, self::MODE_SIMPLE );
997  // need to add curItem[0] on again since one is for the specific item
998  // and one is for the entire group.
999  array_unshift( $this->curItem, $this->curItem[0] );
1000  $this->processingArray = true;
1001  }
1002  }
1003 
1018  private function startElementModeLiLang( $elm, $attribs ) {
1019  if ( $elm !== self::NS_RDF . ' li' ) {
1020  throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
1021  }
1022  if ( !isset( $attribs[self::NS_XML . ' lang'] )
1023  || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] )
1024  ) {
1025  throw new MWException( __METHOD__
1026  . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
1027  }
1028 
1029  // Lang is case-insensitive.
1030  $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] );
1031 
1032  // need to add curItem[0] on again since one is for the specific item
1033  // and one is for the entire group.
1034  array_unshift( $this->curItem, $this->curItem[0] );
1035  array_unshift( $this->mode, self::MODE_SIMPLE );
1036  $this->processingArray = true;
1037  }
1049  function startElement( $parser, $elm, $attribs ) {
1050 
1051  if ( $elm === self::NS_RDF . ' RDF'
1052  || $elm === 'adobe:ns:meta/ xmpmeta'
1053  || $elm === 'adobe:ns:meta/ xapmeta'
1054  ) {
1055  /* ignore. */
1056  return;
1057  } elseif ( $elm === self::NS_RDF . ' Description' ) {
1058  if ( count( $this->mode ) === 0 ) {
1059  // outer rdf:desc
1060  array_unshift( $this->mode, self::MODE_INITIAL );
1061  }
1062  } elseif ( $elm === self::NS_RDF . ' type' ) {
1063  // This doesn't support rdf:type properly.
1064  // In practise I have yet to see a file that
1065  // uses this element, however it is mentioned
1066  // on page 25 of part 1 of the xmp standard.
1067  //
1068  // also it seems as if exiv2 and exiftool do not support
1069  // this either (That or I misunderstand the standard)
1070  wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' );
1071  }
1072 
1073  if ( strpos( $elm, ' ' ) === false ) {
1074  // This probably shouldn't happen.
1075  wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
1076 
1077  return;
1078  }
1079 
1080  list( $ns, $tag ) = explode( ' ', $elm, 2 );
1081 
1082  if ( count( $this->mode ) === 0 ) {
1083  // This should not happen.
1084  throw new MWException( 'Error extracting XMP, '
1085  . "encountered <$elm> with no mode" );
1086  }
1087 
1088  switch ( $this->mode[0] ) {
1089  case self::MODE_IGNORE:
1090  $this->startElementModeIgnore( $elm );
1091  break;
1092  case self::MODE_SIMPLE:
1093  $this->startElementModeSimple( $elm, $attribs );
1094  break;
1095  case self::MODE_INITIAL:
1096  $this->startElementModeInitial( $ns, $tag, $attribs );
1097  break;
1098  case self::MODE_STRUCT:
1099  $this->startElementModeStruct( $ns, $tag, $attribs );
1100  break;
1101  case self::MODE_BAG:
1102  case self::MODE_BAGSTRUCT:
1103  $this->startElementModeBag( $elm );
1104  break;
1105  case self::MODE_SEQ:
1106  $this->startElementModeSeq( $elm );
1107  break;
1108  case self::MODE_LANG:
1109  $this->startElementModeLang( $elm );
1110  break;
1111  case self::MODE_LI_LANG:
1112  $this->startElementModeLiLang( $elm, $attribs );
1113  break;
1114  case self::MODE_LI:
1115  $this->startElementModeLi( $elm, $attribs );
1116  break;
1117  case self::MODE_QDESC:
1118  $this->startElementModeQDesc( $elm );
1119  break;
1120  default:
1121  throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] );
1122  }
1123  }
1124 
1125  // @codingStandardsIgnoreStart Long line that cannot be broken
1141  // @codingStandardsIgnoreEnd
1142  private function doAttribs( $attribs ) {
1143  // first check for rdf:parseType attribute, as that can change
1144  // how the attributes are interperted.
1145 
1146  if ( isset( $attribs[self::NS_RDF . ' parseType'] )
1147  && $attribs[self::NS_RDF . ' parseType'] === 'Resource'
1148  && $this->mode[0] === self::MODE_SIMPLE
1149  ) {
1150  // this is equivalent to having an inner rdf:Description
1151  $this->mode[0] = self::MODE_QDESC;
1152  }
1153  foreach ( $attribs as $name => $val ) {
1154  if ( strpos( $name, ' ' ) === false ) {
1155  // This shouldn't happen, but so far some old software forgets namespace
1156  // on rdf:about.
1157  wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: '
1158  . " $name=\"$val\". Skipping. " );
1159  continue;
1160  }
1161  list( $ns, $tag ) = explode( ' ', $name, 2 );
1162  if ( $ns === self::NS_RDF ) {
1163  if ( $tag === 'value' || $tag === 'resource' ) {
1164  // resource is for url.
1165  // value attribute is a weird way of just putting the contents.
1166  $this->char( $this->xmlParser, $val );
1167  }
1168  } elseif ( isset( $this->items[$ns][$tag] ) ) {
1169  if ( $this->mode[0] === self::MODE_SIMPLE ) {
1170  throw new MWException( __METHOD__
1171  . " $ns:$tag found as attribute where not allowed" );
1172  }
1173  $this->saveValue( $ns, $tag, $val );
1174  } else {
1175  wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
1176  }
1177  }
1178  }
1179 
1191  private function saveValue( $ns, $tag, $val ) {
1192 
1193  $info =& $this->items[$ns][$tag];
1194  $finalName = isset( $info['map_name'] )
1195  ? $info['map_name'] : $tag;
1196  if ( isset( $info['validate'] ) ) {
1197  $validate = is_array( $info['validate'] ) ? $info['validate']
1198  : array( 'XMPValidate', $info['validate'] );
1199 
1200  if ( is_callable( $validate ) ) {
1201  call_user_func_array( $validate, array( $info, &$val, true ) );
1202  // the reasoning behind using &$val instead of using the return value
1203  // is to be consistent between here and validating structures.
1204  if ( is_null( $val ) ) {
1205  wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
1206 
1207  return;
1208  }
1209  } else {
1210  wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
1211  . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
1212  }
1213  }
1214 
1215  if ( $this->ancestorStruct && $this->processingArray ) {
1216  // Aka both an array and a struct. ( self::MODE_BAGSTRUCT )
1217  $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val;
1218  } elseif ( $this->ancestorStruct ) {
1219  $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val;
1220  } elseif ( $this->processingArray ) {
1221  if ( $this->itemLang === false ) {
1222  // normal array
1223  $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
1224  } else {
1225  // lang array.
1226  $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val;
1227  }
1228  } else {
1229  $this->results['xmp-' . $info['map_group']][$finalName] = $val;
1230  }
1231  }
1232 }
XMPReader\endElement
endElement( $parser, $elm)
Handler for hitting a closing element.
Definition: XMP.php:624
XMPReader\MODE_BAGSTRUCT
const MODE_BAGSTRUCT
Definition: XMP.php:95
XMPReader\$ancestorStruct
bool string $ancestorStruct
The structure name when processing nested structures.
Definition: XMP.php:54
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
XMPReader\startElementModeSimple
startElementModeSimple( $elm, $attribs)
Handle an opening element when in MODE_SIMPLE.
Definition: XMP.php:788
XMPReader\startElementModeInitial
startElementModeInitial( $ns, $tag, $attribs)
Starting an element when in MODE_INITIAL This usually happens when we hit an element inside the outer...
Definition: XMP.php:847
XMPReader\endElementModeLi
endElementModeLi( $elm)
Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) Add information about what type of ele...
Definition: XMP.php:559
XMPReader\$itemLang
bool string $itemLang
Used for lang alts only *.
Definition: XMP.php:64
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all')
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1040
wfSuppressWarnings
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
Definition: GlobalFunctions.php:2387
XMPReader\resetXMLParser
resetXMLParser()
Main use is if a single item has multiple xmp documents describing it.
Definition: XMP.php:121
XMPReader\MODE_LI
const MODE_LI
Definition: XMP.php:83
XMPReader\endElementModeSimple
endElementModeSimple( $elm)
Hit a closing element when in MODE_SIMPLE.
Definition: XMP.php:457
XMPReader\startElement
startElement( $parser, $elm, $attribs)
Hits an opening element.
Definition: XMP.php:1038
XMPReader\startElementModeLiLang
startElementModeLiLang( $elm, $attribs)
Opening element in MODE_LI_LANG.
Definition: XMP.php:1007
XMPReader\MODE_IGNORE
const MODE_IGNORE
Definition: XMP.php:82
XMPReader\$items
array $items
XMP item configuration array *.
Definition: XMP.php:50
XMPReader\MODE_INITIAL
const MODE_INITIAL
These are various mode constants.
Definition: XMP.php:81
XMPReader\endElementModeQDesc
endElementModeQDesc( $elm)
End element while in MODE_QDESC mostly when ending an element when we have a simple value that has qu...
Definition: XMP.php:598
XMPReader\MODE_LI_LANG
const MODE_LI_LANG
Definition: XMP.php:84
MWException
MediaWiki exception.
Definition: MWException.php:26
XMPReader\MODE_SEQ
const MODE_SEQ
Definition: XMP.php:91
wfRestoreWarnings
wfRestoreWarnings()
Restore error level to previous value.
Definition: GlobalFunctions.php:2417
XMPReader\MODE_SIMPLE
const MODE_SIMPLE
Definition: XMP.php:89
XMPReader\endElementModeIgnore
endElementModeIgnore( $elm)
When we hit a closing element in MODE_IGNORE Check to see if this is the element we started to ignore...
Definition: XMP.php:435
$parser
do that in ParserLimitReportFormat instead $parser
Definition: hooks.txt:1956
XMPReader\startElementModeSeq
startElementModeSeq( $elm)
Start element in MODE_SEQ (ordered array) this should always be <rdf:Seq>
Definition: XMP.php:735
wfRunHooks
wfRunHooks( $event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in $wgHooks.
Definition: GlobalFunctions.php:4001
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
XMPReader\MODE_LANG
const MODE_LANG
Definition: XMP.php:93
XMPReader\startElementModeLang
startElementModeLang( $elm)
Start element in MODE_LANG (language alternative) this should always be <rdf:Alt>
Definition: XMP.php:762
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
XMPReader\MODE_BAG
const MODE_BAG
Definition: XMP.php:92
XMPReader\$charContent
bool string $charContent
Temporary holder for character data that appears in xmp doc.
Definition: XMP.php:56
XMPReader
Class for reading xmp data containing properties relevant to images, and spitting out an array that F...
Definition: XMP.php:49
$ok
$ok
Definition: UtfNormalTest.php:71
XMPReader\$extendedXMPOffset
int $extendedXMPOffset
Definition: XMP.php:70
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:336
XMPReader\startElementModeStruct
startElementModeStruct( $ns, $tag, $attribs)
Hit an opening element when in a Struct (MODE_STRUCT) This is generally for fields of a compound prop...
Definition: XMP.php:909
XMPReader\$curItem
array $curItem
Array to hold the current element (and previous element, and so on) *.
Definition: XMP.php:52
XMPReader\parse
parse( $content, $allOfIt=true, $reset=false)
Main function to call to parse XMP.
Definition: XMP.php:252
XMPReader\$charset
bool string $charset
Character set like 'UTF-8' *.
Definition: XMP.php:68
XMPReader\startElementModeLi
startElementModeLi( $elm, $attribs)
opening element in MODE_LI process elements of arrays.
Definition: XMP.php:957
XMPReader\NS_RDF
const NS_RDF
Definition: XMP.php:97
XMPReader\doAttribs
doAttribs( $attribs)
Process attributes.
Definition: XMP.php:1131
XMPReader\endElementNested
endElementNested( $elm)
Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG generally means we've finished processing a ...
Definition: XMP.php:492
XMPInfo\getItems
static getItems()
Get the items array.
Definition: XMPInfo.php:33
XMPReader\MODE_ALT
const MODE_ALT
Definition: XMP.php:94
XMPReader\MODE_STRUCT
const MODE_STRUCT
Definition: XMP.php:90
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
XMPReader\$results
array $results
Array to hold results *.
Definition: XMP.php:60
XMPReader\startElementModeQDesc
startElementModeQDesc( $elm)
Start an element when in MODE_QDESC.
Definition: XMP.php:825
XMPReader\MODE_QDESC
const MODE_QDESC
Definition: XMP.php:85
XMPReader\__destruct
__destruct()
Destroy the xml parser.
Definition: XMP.php:143
XMPReader\NS_XML
const NS_XML
Definition: XMP.php:98
XMPReader\startElementModeBag
startElementModeBag( $elm)
Start element in MODE_BAG (unordered array) this should always be <rdf:Bag>
Definition: XMP.php:720
XMPReader\parseExtended
parseExtended( $content)
Entry point for XMPExtended blocks in jpeg files.
Definition: XMP.php:325
XMPReader\saveValue
saveValue( $ns, $tag, $val)
Given an extracted value, save it to results array.
Definition: XMP.php:1180
XMPReader\char
char( $parser, $data)
Character data handler Called whenever character data is found in the xmp document.
Definition: XMP.php:400
XMPReader\__construct
__construct()
Constructor.
Definition: XMP.php:105
XMPReader\$xmlParser
resource $xmlParser
A resource handle for the XML parser *.
Definition: XMP.php:66
$error
usually copyright or history_copyright This message must be in HTML not wikitext $subpages will be ignored and the rest of subPageSubtitle() will run. 'SkinTemplateBuildNavUrlsNav_urlsAfterPermalink' whether MediaWiki currently thinks this is a CSS JS page Hooks may change this value to override the return value of Title::isCssOrJsPage(). 'TitleIsAlwaysKnown' whether MediaWiki currently thinks this page is known isMovable() always returns false. $title whether MediaWiki currently thinks this page is movable Hooks may change this value to override the return value of Title::isMovable(). 'TitleIsWikitextPage' whether MediaWiki currently thinks this is a wikitext page Hooks may change this value to override the return value of Title::isWikitextPage() 'TitleMove' use UploadVerification and UploadVerifyFile instead where the first element is the message key and the remaining elements are used as parameters to the message based on mime etc Preferred in most cases over UploadVerification object with all info about the upload string as detected by MediaWiki Handlers will typically only apply for specific mime types object & $error
Definition: hooks.txt:2573
$e
if( $useReadline) $e
Definition: eval.php:66
$attribs
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1530
XMPReader\$mode
array $mode
Stores the state the xmpreader is in (see MODE_FOO constants) *.
Definition: XMP.php:58
XMPReader\getResults
getResults()
Get the result array.
Definition: XMP.php:154
XMPReader\$processingArray
bool $processingArray
If we're doing a seq or bag.
Definition: XMP.php:62
XMPReader\startElementModeIgnore
startElementModeIgnore( $elm)
Hit an opening element while in MODE_IGNORE.
Definition: XMP.php:706