MediaWiki  1.30.0
XmlTypeCheck.php
Go to the documentation of this file.
1 <?php
28 class XmlTypeCheck {
33  public $wellFormed = null;
34 
39  public $filterMatch = false;
40 
46  public $filterMatchType = false;
47 
52  public $rootElement = '';
53 
59  protected $elementData = [];
60 
64  protected $elementDataContext = [];
65 
69  protected $stackDepth = 0;
70 
74  private $parserOptions = [
75  'processing_instruction_handler' => '',
76  'external_dtd_handler' => '',
77  'dtd_handler' => '',
78  'require_safe_dtd' => true
79  ];
80 
107  function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
108  $this->filterCallback = $filterCallback;
109  $this->parserOptions = array_merge( $this->parserOptions, $options );
110  $this->validateFromInput( $input, $isFile );
111  }
112 
124  public static function newFromFilename( $fname, $filterCallback = null ) {
125  return new self( $fname, $filterCallback, true );
126  }
127 
139  public static function newFromString( $string, $filterCallback = null ) {
140  return new self( $string, $filterCallback, false );
141  }
142 
148  public function getRootElement() {
149  return $this->rootElement;
150  }
151 
155  private function validateFromInput( $xml, $isFile ) {
156  $reader = new XMLReader();
157  if ( $isFile ) {
158  $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
159  } else {
160  $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
161  }
162  if ( $s !== true ) {
163  // Couldn't open the XML
164  $this->wellFormed = false;
165  } else {
166  $oldDisable = libxml_disable_entity_loader( true );
167  $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
168  try {
169  $this->validate( $reader );
170  } catch ( Exception $e ) {
171  // Calling this malformed, because we didn't parse the whole
172  // thing. Maybe just an external entity refernce.
173  $this->wellFormed = false;
174  $reader->close();
175  libxml_disable_entity_loader( $oldDisable );
176  throw $e;
177  }
178  $reader->close();
179  libxml_disable_entity_loader( $oldDisable );
180  }
181  }
182 
183  private function readNext( XMLReader $reader ) {
184  set_error_handler( [ $this, 'XmlErrorHandler' ] );
185  $ret = $reader->read();
186  restore_error_handler();
187  return $ret;
188  }
189 
190  public function XmlErrorHandler( $errno, $errstr ) {
191  $this->wellFormed = false;
192  }
193 
194  private function validate( $reader ) {
195  // First, move through anything that isn't an element, and
196  // handle any processing instructions with the callback
197  do {
198  if ( !$this->readNext( $reader ) ) {
199  // Hit the end of the document before any elements
200  $this->wellFormed = false;
201  return;
202  }
203  if ( $reader->nodeType === XMLReader::PI ) {
204  $this->processingInstructionHandler( $reader->name, $reader->value );
205  }
206  if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
207  $this->DTDHandler( $reader );
208  }
209  } while ( $reader->nodeType != XMLReader::ELEMENT );
210 
211  // Process the rest of the document
212  do {
213  switch ( $reader->nodeType ) {
214  case XMLReader::ELEMENT:
215  $name = $this->expandNS(
216  $reader->name,
217  $reader->namespaceURI
218  );
219  if ( $this->rootElement === '' ) {
220  $this->rootElement = $name;
221  }
222  $empty = $reader->isEmptyElement;
223  $attrs = $this->getAttributesArray( $reader );
224  $this->elementOpen( $name, $attrs );
225  if ( $empty ) {
226  $this->elementClose();
227  }
228  break;
229 
230  case XMLReader::END_ELEMENT:
231  $this->elementClose();
232  break;
233 
234  case XMLReader::WHITESPACE:
235  case XMLReader::SIGNIFICANT_WHITESPACE:
236  case XMLReader::CDATA:
237  case XMLReader::TEXT:
238  $this->elementData( $reader->value );
239  break;
240 
241  case XMLReader::ENTITY_REF:
242  // Unexpanded entity (maybe external?),
243  // don't send to the filter (xml_parse didn't)
244  break;
245 
246  case XMLReader::COMMENT:
247  // Don't send to the filter (xml_parse didn't)
248  break;
249 
250  case XMLReader::PI:
251  // Processing instructions can happen after the header too
253  $reader->name,
254  $reader->value
255  );
256  break;
257  case XMLReader::DOC_TYPE:
258  // We should never see a doctype after first
259  // element.
260  $this->wellFormed = false;
261  break;
262  default:
263  // One of DOC, ENTITY, END_ENTITY,
264  // NOTATION, or XML_DECLARATION
265  // xml_parse didn't send these to the filter, so we won't.
266  }
267  } while ( $this->readNext( $reader ) );
268 
269  if ( $this->stackDepth !== 0 ) {
270  $this->wellFormed = false;
271  } elseif ( $this->wellFormed === null ) {
272  $this->wellFormed = true;
273  }
274  }
275 
281  private function getAttributesArray( XMLReader $r ) {
282  $attrs = [];
283  while ( $r->moveToNextAttribute() ) {
284  if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
285  // XMLReader treats xmlns attributes as normal
286  // attributes, while xml_parse doesn't
287  continue;
288  }
289  $name = $this->expandNS( $r->name, $r->namespaceURI );
290  $attrs[$name] = $r->value;
291  }
292  return $attrs;
293  }
294 
300  private function expandNS( $name, $namespaceURI ) {
301  if ( $namespaceURI ) {
302  $parts = explode( ':', $name );
303  $localname = array_pop( $parts );
304  return "$namespaceURI:$localname";
305  }
306  return $name;
307  }
308 
313  private function elementOpen( $name, $attribs ) {
314  $this->elementDataContext[] = [ $name, $attribs ];
315  $this->elementData[] = '';
316  $this->stackDepth++;
317  }
318 
319  private function elementClose() {
320  list( $name, $attribs ) = array_pop( $this->elementDataContext );
321  $data = array_pop( $this->elementData );
322  $this->stackDepth--;
323  $callbackReturn = false;
324 
325  if ( is_callable( $this->filterCallback ) ) {
326  $callbackReturn = call_user_func(
327  $this->filterCallback,
328  $name,
329  $attribs,
330  $data
331  );
332  }
333  if ( $callbackReturn ) {
334  // Filter hit!
335  $this->filterMatch = true;
336  $this->filterMatchType = $callbackReturn;
337  }
338  }
339 
343  private function elementData( $data ) {
344  // Collect any data here, and we'll run the callback in elementClose
345  $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
346  }
347 
352  private function processingInstructionHandler( $target, $data ) {
353  $callbackReturn = false;
354  if ( $this->parserOptions['processing_instruction_handler'] ) {
355  $callbackReturn = call_user_func(
356  $this->parserOptions['processing_instruction_handler'],
357  $target,
358  $data
359  );
360  }
361  if ( $callbackReturn ) {
362  // Filter hit!
363  $this->filterMatch = true;
364  $this->filterMatchType = $callbackReturn;
365  }
366  }
372  private function DTDHandler( XMLReader $reader ) {
373  $externalCallback = $this->parserOptions['external_dtd_handler'];
374  $generalCallback = $this->parserOptions['dtd_handler'];
375  $checkIfSafe = $this->parserOptions['require_safe_dtd'];
376  if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
377  return;
378  }
379  $dtd = $reader->readOuterXML();
380  $callbackReturn = false;
381 
382  if ( $generalCallback ) {
383  $callbackReturn = call_user_func( $generalCallback, $dtd );
384  }
385  if ( $callbackReturn ) {
386  // Filter hit!
387  $this->filterMatch = true;
388  $this->filterMatchType = $callbackReturn;
389  $callbackReturn = false;
390  }
391 
392  $parsedDTD = $this->parseDTD( $dtd );
393  if ( $externalCallback && isset( $parsedDTD['type'] ) ) {
394  $callbackReturn = call_user_func(
395  $externalCallback,
396  $parsedDTD['type'],
397  isset( $parsedDTD['publicid'] ) ? $parsedDTD['publicid'] : null,
398  isset( $parsedDTD['systemid'] ) ? $parsedDTD['systemid'] : null
399  );
400  }
401  if ( $callbackReturn ) {
402  // Filter hit!
403  $this->filterMatch = true;
404  $this->filterMatchType = $callbackReturn;
405  $callbackReturn = false;
406  }
407 
408  if ( $checkIfSafe && isset( $parsedDTD['internal'] ) ) {
409  if ( !$this->checkDTDIsSafe( $parsedDTD['internal'] ) ) {
410  $this->wellFormed = false;
411  }
412  }
413  }
414 
435  private function checkDTDIsSafe( $internalSubset ) {
436  $offset = 0;
437  $res = preg_match(
438  '/^(?:\s*<!ENTITY\s+\S+\s+' .
439  '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&amp;|&quot;){0,255})"' .
440  '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&amp;|&apos;){0,255})\')\s*>' .
441  '|\s*<!--(?:[^-]|-[^-])*-->' .
442  '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
443  '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
444  $internalSubset
445  );
446 
447  return (bool)$res;
448  }
449 
458  private function parseDTD( $dtd ) {
459  $m = [];
460  $res = preg_match(
461  '/^<!DOCTYPE\s*\S+\s*' .
462  '(?:(?P<typepublic>PUBLIC)\s*' .
463  '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' . // public identifer
464  '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' . // system identifier
465  '|(?P<typesystem>SYSTEM)\s*' .
466  '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
467  ')?\s*' .
468  '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
469  $dtd,
470  $m
471  );
472  if ( !$res ) {
473  $this->wellFormed = false;
474  return [];
475  }
476  $parsed = [];
477  foreach ( $m as $field => $value ) {
478  if ( $value === '' || is_numeric( $field ) ) {
479  continue;
480  }
481  switch ( $field ) {
482  case 'typepublic':
483  case 'typesystem':
484  $parsed['type'] = $value;
485  break;
486  case 'pubquote':
487  case 'pubapos':
488  $parsed['publicid'] = $value;
489  break;
490  case 'pubsysquote':
491  case 'pubsysapos':
492  case 'sysquote':
493  case 'sysapos':
494  $parsed['systemid'] = $value;
495  break;
496  case 'internal':
497  $parsed['internal'] = $value;
498  break;
499  }
500  }
501  return $parsed;
502  }
503 }
XmlTypeCheck\$filterMatch
$filterMatch
Will be set to true if the optional element filter returned a match at some point.
Definition: XmlTypeCheck.php:39
XmlTypeCheck\parseDTD
parseDTD( $dtd)
Parse DTD into parts.
Definition: XmlTypeCheck.php:458
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
XmlTypeCheck\newFromString
static newFromString( $string, $filterCallback=null)
Alternative constructor: from string.
Definition: XmlTypeCheck.php:139
$fname
if(!defined( 'MEDIAWIKI')) $fname
This file is not a valid entry point, perform no further processing unless MEDIAWIKI is defined.
Definition: Setup.php:36
XmlTypeCheck\newFromFilename
static newFromFilename( $fname, $filterCallback=null)
Alternative constructor: from filename.
Definition: XmlTypeCheck.php:124
XmlTypeCheck\$rootElement
$rootElement
Name of the document's root element, including any namespace as an expanded URL.
Definition: XmlTypeCheck.php:52
$s
$s
Definition: mergeMessageFileList.php:188
$res
$res
Definition: database.txt:21
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
XmlTypeCheck\expandNS
expandNS( $name, $namespaceURI)
Definition: XmlTypeCheck.php:300
XmlTypeCheck\getAttributesArray
getAttributesArray(XMLReader $r)
Get all of the attributes for an XMLReader's current node.
Definition: XmlTypeCheck.php:281
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
XmlTypeCheck\$elementData
$elementData
A stack of strings containing the data of each xml element as it's processed.
Definition: XmlTypeCheck.php:59
XmlTypeCheck\$elementDataContext
$elementDataContext
A stack of element names and attributes, as we process them.
Definition: XmlTypeCheck.php:64
XmlTypeCheck\XmlErrorHandler
XmlErrorHandler( $errno, $errstr)
Definition: XmlTypeCheck.php:190
XmlTypeCheck\DTDHandler
DTDHandler(XMLReader $reader)
Handle coming across a <!DOCTYPE declaration.
Definition: XmlTypeCheck.php:372
XmlTypeCheck\__construct
__construct( $input, $filterCallback=null, $isFile=true, $options=[])
Allow filtering an XML file.
Definition: XmlTypeCheck.php:107
$input
if(is_array( $mode)) switch( $mode) $input
Definition: postprocess-phan.php:141
XmlTypeCheck\$wellFormed
$wellFormed
Will be set to true or false to indicate whether the file is well-formed XML.
Definition: XmlTypeCheck.php:33
$attribs
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1965
XmlTypeCheck\readNext
readNext(XMLReader $reader)
Definition: XmlTypeCheck.php:183
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
XmlTypeCheck\$parserOptions
$parserOptions
Additional parsing options.
Definition: XmlTypeCheck.php:74
XmlTypeCheck\$stackDepth
$stackDepth
Current depth of the data stack.
Definition: XmlTypeCheck.php:69
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2141
$value
$value
Definition: styleTest.css.php:45
XmlTypeCheck\elementData
elementData( $data)
Definition: XmlTypeCheck.php:343
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1965
XmlTypeCheck\getRootElement
getRootElement()
Get the root element.
Definition: XmlTypeCheck.php:148
XmlTypeCheck
Definition: XmlTypeCheck.php:28
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1965
XmlTypeCheck\elementOpen
elementOpen( $name, $attribs)
Definition: XmlTypeCheck.php:313
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
true
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1965
XmlTypeCheck\processingInstructionHandler
processingInstructionHandler( $target, $data)
Definition: XmlTypeCheck.php:352
XmlTypeCheck\validateFromInput
validateFromInput( $xml, $isFile)
Definition: XmlTypeCheck.php:155
XmlTypeCheck\validate
validate( $reader)
Definition: XmlTypeCheck.php:194
XmlTypeCheck\elementClose
elementClose()
Definition: XmlTypeCheck.php:319
XmlTypeCheck\$filterMatchType
mixed $filterMatchType
Will contain the type of filter hit if the optional element filter returned a match at some point.
Definition: XmlTypeCheck.php:46
XmlTypeCheck\checkDTDIsSafe
checkDTDIsSafe( $internalSubset)
Check if the internal subset of the DTD is safe.
Definition: XmlTypeCheck.php:435