MediaWiki  1.23.16
XmlTypeCheck.php
Go to the documentation of this file.
1 <?php
28 class XmlTypeCheck {
33  public $wellFormed = null;
34 
39  public $filterMatch = false;
40 
45  public $rootElement = '';
46 
52  protected $elementData = array();
53 
57  protected $elementDataContext = array();
58 
62  protected $stackDepth = 0;
63 
67  private $parserOptions = array(
68  'processing_instruction_handler' => '',
69  'external_dtd_handler' => '',
70  'dtd_handler' => '',
71  'require_safe_dtd' => true
72  );
73 
100  function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
101  $this->filterCallback = $filterCallback;
102  $this->parserOptions = array_merge( $this->parserOptions, $options );
103  $this->validateFromInput( $input, $isFile );
104  }
105 
117  public static function newFromFilename( $fname, $filterCallback = null ) {
118  return new self( $fname, $filterCallback, true );
119  }
120 
132  public static function newFromString( $string, $filterCallback = null ) {
133  return new self( $string, $filterCallback, false );
134  }
135 
141  public function getRootElement() {
142  return $this->rootElement;
143  }
144 
145 
149  private function validateFromInput( $xml, $isFile ) {
150  $reader = new XMLReader();
151  if ( $isFile ) {
152  $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
153  } else {
154  $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
155  }
156  if ( $s !== true ) {
157  // Couldn't open the XML
158  $this->wellFormed = false;
159  } else {
160  $oldDisable = libxml_disable_entity_loader( true );
161  $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
162  try {
163  $this->validate( $reader );
164  } catch ( Exception $e ) {
165  // Calling this malformed, because we didn't parse the whole
166  // thing. Maybe just an external entity refernce.
167  $this->wellFormed = false;
168  $reader->close();
169  libxml_disable_entity_loader( $oldDisable );
170  throw $e;
171  }
172  $reader->close();
173  libxml_disable_entity_loader( $oldDisable );
174  }
175  }
176 
177  private function readNext( XMLReader $reader ) {
178  set_error_handler( array( $this, 'XmlErrorHandler' ) );
179  $ret = $reader->read();
180  restore_error_handler();
181  return $ret;
182  }
183 
184  public function XmlErrorHandler( $errno, $errstr ) {
185  $this->wellFormed = false;
186  }
187 
188  private function validate( $reader ) {
189 
190  // First, move through anything that isn't an element, and
191  // handle any processing instructions with the callback
192  do {
193  if( !$this->readNext( $reader ) ) {
194  // Hit the end of the document before any elements
195  $this->wellFormed = false;
196  return;
197  }
198  if ( $reader->nodeType === XMLReader::PI ) {
199  $this->processingInstructionHandler( $reader->name, $reader->value );
200  }
201  if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
202  $this->DTDHandler( $reader );
203  }
204  } while ( $reader->nodeType != XMLReader::ELEMENT );
205 
206  // Process the rest of the document
207  do {
208  switch ( $reader->nodeType ) {
209  case XMLReader::ELEMENT:
210  $name = $this->expandNS(
211  $reader->name,
212  $reader->namespaceURI
213  );
214  if ( $this->rootElement === '' ) {
215  $this->rootElement = $name;
216  }
217  $empty = $reader->isEmptyElement;
218  $attrs = $this->getAttributesArray( $reader );
219  $this->elementOpen( $name, $attrs );
220  if ( $empty ) {
221  $this->elementClose();
222  }
223  break;
224 
225  case XMLReader::END_ELEMENT:
226  $this->elementClose();
227  break;
228 
230  case XMLReader::SIGNIFICANT_WHITESPACE:
231  case XMLReader::CDATA:
232  case XMLReader::TEXT:
233  $this->elementData( $reader->value );
234  break;
235 
236  case XMLReader::ENTITY_REF:
237  // Unexpanded entity (maybe external?),
238  // don't send to the filter (xml_parse didn't)
239  break;
240 
241  case XMLReader::COMMENT:
242  // Don't send to the filter (xml_parse didn't)
243  break;
244 
245  case XMLReader::PI:
246  // Processing instructions can happen after the header too
248  $reader->name,
249  $reader->value
250  );
251  break;
252  case XMLReader::DOC_TYPE:
253  // We should never see a doctype after first
254  // element.
255  $this->wellFormed = false;
256  break;
257  default:
258  // One of DOC, ENTITY, END_ENTITY,
259  // NOTATION, or XML_DECLARATION
260  // xml_parse didn't send these to the filter, so we won't.
261  }
262 
263  } while ( $this->readNext( $reader ) );
264 
265  if ( $this->stackDepth !== 0 ) {
266  $this->wellFormed = false;
267  } elseif ( $this->wellFormed === null ) {
268  $this->wellFormed = true;
269  }
270 
271  }
272 
278  private function getAttributesArray( XMLReader $r ) {
279  $attrs = array();
280  while ( $r->moveToNextAttribute() ) {
281  if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
282  // XMLReader treats xmlns attributes as normal
283  // attributes, while xml_parse doesn't
284  continue;
285  }
286  $name = $this->expandNS( $r->name, $r->namespaceURI );
287  $attrs[$name] = $r->value;
288  }
289  return $attrs;
290  }
291 
297  private function expandNS( $name, $namespaceURI ) {
298  if ( $namespaceURI ) {
299  $parts = explode( ':', $name );
300  $localname = array_pop( $parts );
301  return "$namespaceURI:$localname";
302  }
303  return $name;
304  }
305 
310  private function elementOpen( $name, $attribs ) {
311  $this->elementDataContext[] = array( $name, $attribs );
312  $this->elementData[] = '';
313  $this->stackDepth++;
314  }
315 
318  private function elementClose() {
319  list( $name, $attribs ) = array_pop( $this->elementDataContext );
320  $data = array_pop( $this->elementData );
321  $this->stackDepth--;
322 
323  if ( is_callable( $this->filterCallback )
324  && call_user_func(
325  $this->filterCallback,
326  $name,
327  $attribs,
328  $data
329  )
330  ) {
331  // Filter hit
332  $this->filterMatch = true;
333  }
334  }
335 
339  private function elementData( $data ) {
340  // Collect any data here, and we'll run the callback in elementClose
341  $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
342  }
343 
348  private function processingInstructionHandler( $target, $data ) {
349  if ( $this->parserOptions['processing_instruction_handler'] ) {
350  if ( call_user_func(
351  $this->parserOptions['processing_instruction_handler'],
352  $target,
353  $data
354  ) ) {
355  // Filter hit!
356  $this->filterMatch = true;
357  }
358  }
359  }
365  private function DTDHandler( XMLReader $reader ) {
366  $externalCallback = $this->parserOptions['external_dtd_handler'];
367  $generalCallback = $this->parserOptions['dtd_handler'];
368  $checkIfSafe = $this->parserOptions['require_safe_dtd'];
369  if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
370  return;
371  }
372  $dtd = $reader->readOuterXML();
373  $callbackReturn = false;
374 
375  if ( $generalCallback ) {
376  $callbackReturn = call_user_func( $generalCallback, $dtd );
377  }
378  if ( $callbackReturn ) {
379  // Filter hit!
380  $this->filterMatch = true;
381  $this->filterMatchType = $callbackReturn;
382  $callbackReturn = false;
383  }
384 
385  $parsedDTD = $this->parseDTD( $dtd );
386  if ( $externalCallback && isset( $parsedDTD['type'] ) ) {
387  $callbackReturn = call_user_func(
388  $externalCallback,
389  $parsedDTD['type'],
390  isset( $parsedDTD['publicid'] ) ? $parsedDTD['publicid'] : null,
391  isset( $parsedDTD['systemid'] ) ? $parsedDTD['systemid'] : null
392  );
393  }
394  if ( $callbackReturn ) {
395  // Filter hit!
396  $this->filterMatch = true;
397  $this->filterMatchType = $callbackReturn;
398  $callbackReturn = false;
399  }
400 
401  if ( $checkIfSafe && isset( $parsedDTD['internal'] ) ) {
402  if ( !$this->checkDTDIsSafe( $parsedDTD['internal'] ) ) {
403  $this->wellFormed = false;
404  }
405  }
406  }
407 
428  private function checkDTDIsSafe( $internalSubset ) {
429  $offset = 0;
430  $res = preg_match(
431  '/^(?:\s*<!ENTITY\s+\S+\s+' .
432  '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&amp;|&quot;){0,255})"' .
433  '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&amp;|&apos;){0,255})\')\s*>' .
434  '|\s*<!--(?:[^-]|-[^-])*-->' .
435  '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
436  '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
437  $internalSubset
438  );
439 
440  return (bool)$res;
441  }
442 
451  private function parseDTD( $dtd ) {
452  $m = array();
453  $res = preg_match(
454  '/^<!DOCTYPE\s*\S+\s*' .
455  '(?:(?P<typepublic>PUBLIC)\s*' .
456  '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' . // public identifer
457  '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' . // system identifier
458  '|(?P<typesystem>SYSTEM)\s*' .
459  '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
460  ')?\s*' .
461  '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
462  $dtd,
463  $m
464  );
465  if ( !$res ) {
466  $this->wellFormed = false;
467  return array();
468  }
469  $parsed = array();
470  foreach ( $m as $field => $value ) {
471  if ( $value === '' || is_numeric( $field ) ) {
472  continue;
473  }
474  switch ( $field ) {
475  case 'typepublic':
476  case 'typesystem':
477  $parsed['type'] = $value;
478  break;
479  case 'pubquote':
480  case 'pubapos':
481  $parsed['publicid'] = $value;
482  break;
483  case 'pubsysquote':
484  case 'pubsysapos':
485  case 'sysquote':
486  case 'sysapos':
487  $parsed['systemid'] = $value;
488  break;
489  case 'internal':
490  $parsed['internal'] = $value;
491  break;
492  }
493  }
494  return $parsed;
495  }
496 }
XmlTypeCheck\$filterMatch
$filterMatch
Will be set to true if the optional element filter returned a match at some point.
Definition: XmlTypeCheck.php:39
XmlTypeCheck\parseDTD
parseDTD( $dtd)
Parse DTD into parts.
Definition: XmlTypeCheck.php:451
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
XmlTypeCheck\newFromString
static newFromString( $string, $filterCallback=null)
Alternative constructor: from string.
Definition: XmlTypeCheck.php:132
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1530
XmlTypeCheck\__construct
__construct( $input, $filterCallback=null, $isFile=true, $options=array())
Allow filtering an XML file.
Definition: XmlTypeCheck.php:100
$fname
if(!defined( 'MEDIAWIKI')) $fname
This file is not a valid entry point, perform no further processing unless MEDIAWIKI is defined.
Definition: Setup.php:35
XmlTypeCheck\newFromFilename
static newFromFilename( $fname, $filterCallback=null)
Alternative constructor: from filename.
Definition: XmlTypeCheck.php:117
XmlTypeCheck\$rootElement
$rootElement
Name of the document's root element, including any namespace as an expanded URL.
Definition: XmlTypeCheck.php:45
$s
$s
Definition: mergeMessageFileList.php:156
XmlTypeCheck\expandNS
expandNS( $name, $namespaceURI)
Definition: XmlTypeCheck.php:297
XmlTypeCheck\getAttributesArray
getAttributesArray(XMLReader $r)
Get all of the attributes for an XMLReader's current node.
Definition: XmlTypeCheck.php:278
true
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1530
XmlTypeCheck\$elementData
$elementData
A stack of strings containing the data of each xml element as it's processed.
Definition: XmlTypeCheck.php:52
csslex.WHITESPACE
string WHITESPACE
Definition: csslex.py:70
XmlTypeCheck\$elementDataContext
$elementDataContext
A stack of element names and attributes, as we process them.
Definition: XmlTypeCheck.php:57
XmlTypeCheck\XmlErrorHandler
XmlErrorHandler( $errno, $errstr)
Definition: XmlTypeCheck.php:184
XmlTypeCheck\DTDHandler
DTDHandler(XMLReader $reader)
Handle coming across a <!DOCTYPE declaration.
Definition: XmlTypeCheck.php:365
XmlTypeCheck\$wellFormed
$wellFormed
Will be set to true or false to indicate whether the file is well-formed XML.
Definition: XmlTypeCheck.php:33
XmlTypeCheck\readNext
readNext(XMLReader $reader)
Definition: XmlTypeCheck.php:177
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
XmlTypeCheck\$parserOptions
$parserOptions
Additional parsing options.
Definition: XmlTypeCheck.php:67
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1530
XmlTypeCheck\$stackDepth
$stackDepth
Current depth of the data stack.
Definition: XmlTypeCheck.php:62
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:336
$value
$value
Definition: styleTest.css.php:45
XmlTypeCheck\elementData
elementData( $data)
Definition: XmlTypeCheck.php:339
XmlTypeCheck\getRootElement
getRootElement()
Get the root element.
Definition: XmlTypeCheck.php:141
XmlTypeCheck
Definition: XmlTypeCheck.php:28
COMMENT
const COMMENT
Definition: UtfNormalTest2.php:33
XmlTypeCheck\elementOpen
elementOpen( $name, $attribs)
Definition: XmlTypeCheck.php:310
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
XmlTypeCheck\processingInstructionHandler
processingInstructionHandler( $target, $data)
Definition: XmlTypeCheck.php:348
XmlTypeCheck\validateFromInput
validateFromInput( $xml, $isFile)
Definition: XmlTypeCheck.php:149
XmlTypeCheck\validate
validate( $reader)
Definition: XmlTypeCheck.php:188
XmlTypeCheck\elementClose
elementClose()
Definition: XmlTypeCheck.php:318
$attribs
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1530
$res
$res
Definition: database.txt:21
XmlTypeCheck\checkDTDIsSafe
checkDTDIsSafe( $internalSubset)
Check if the internal subset of the DTD is safe.
Definition: XmlTypeCheck.php:428
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:1632