MediaWiki  1.28.1
XmlTypeCheck.php
Go to the documentation of this file.
1 <?php
28 class XmlTypeCheck {
33  public $wellFormed = null;
34 
39  public $filterMatch = false;
40 
46  public $filterMatchType = false;
47 
52  public $rootElement = '';
53 
59  protected $elementData = [];
60 
64  protected $elementDataContext = [];
65 
69  protected $stackDepth = 0;
70 
74  private $parserOptions = [
75  'processing_instruction_handler' => '',
76  'external_dtd_handler' => '',
77  'dtd_handler' => '',
78  'require_safe_dtd' => true
79  ];
80 
107  function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
108  $this->filterCallback = $filterCallback;
109  $this->parserOptions = array_merge( $this->parserOptions, $options );
110  $this->validateFromInput( $input, $isFile );
111  }
112 
124  public static function newFromFilename( $fname, $filterCallback = null ) {
125  return new self( $fname, $filterCallback, true );
126  }
127 
139  public static function newFromString( $string, $filterCallback = null ) {
140  return new self( $string, $filterCallback, false );
141  }
142 
148  public function getRootElement() {
149  return $this->rootElement;
150  }
151 
155  private function validateFromInput( $xml, $isFile ) {
156  $reader = new XMLReader();
157  if ( $isFile ) {
158  $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
159  } else {
160  $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
161  }
162  if ( $s !== true ) {
163  // Couldn't open the XML
164  $this->wellFormed = false;
165  } else {
166  $oldDisable = libxml_disable_entity_loader( true );
167  $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
168  try {
169  $this->validate( $reader );
170  } catch ( Exception $e ) {
171  // Calling this malformed, because we didn't parse the whole
172  // thing. Maybe just an external entity refernce.
173  $this->wellFormed = false;
174  $reader->close();
175  libxml_disable_entity_loader( $oldDisable );
176  throw $e;
177  }
178  $reader->close();
179  libxml_disable_entity_loader( $oldDisable );
180  }
181  }
182 
183  private function readNext( XMLReader $reader ) {
184  set_error_handler( [ $this, 'XmlErrorHandler' ] );
185  $ret = $reader->read();
186  restore_error_handler();
187  return $ret;
188  }
189 
190  public function XmlErrorHandler( $errno, $errstr ) {
191  $this->wellFormed = false;
192  }
193 
194  private function validate( $reader ) {
195 
196  // First, move through anything that isn't an element, and
197  // handle any processing instructions with the callback
198  do {
199  if ( !$this->readNext( $reader ) ) {
200  // Hit the end of the document before any elements
201  $this->wellFormed = false;
202  return;
203  }
204  if ( $reader->nodeType === XMLReader::PI ) {
205  $this->processingInstructionHandler( $reader->name, $reader->value );
206  }
207  if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
208  $this->DTDHandler( $reader );
209  }
210  } while ( $reader->nodeType != XMLReader::ELEMENT );
211 
212  // Process the rest of the document
213  do {
214  switch ( $reader->nodeType ) {
215  case XMLReader::ELEMENT:
216  $name = $this->expandNS(
217  $reader->name,
218  $reader->namespaceURI
219  );
220  if ( $this->rootElement === '' ) {
221  $this->rootElement = $name;
222  }
223  $empty = $reader->isEmptyElement;
224  $attrs = $this->getAttributesArray( $reader );
225  $this->elementOpen( $name, $attrs );
226  if ( $empty ) {
227  $this->elementClose();
228  }
229  break;
230 
231  case XMLReader::END_ELEMENT:
232  $this->elementClose();
233  break;
234 
235  case XMLReader::WHITESPACE:
236  case XMLReader::SIGNIFICANT_WHITESPACE:
237  case XMLReader::CDATA:
238  case XMLReader::TEXT:
239  $this->elementData( $reader->value );
240  break;
241 
242  case XMLReader::ENTITY_REF:
243  // Unexpanded entity (maybe external?),
244  // don't send to the filter (xml_parse didn't)
245  break;
246 
247  case XMLReader::COMMENT:
248  // Don't send to the filter (xml_parse didn't)
249  break;
250 
251  case XMLReader::PI:
252  // Processing instructions can happen after the header too
254  $reader->name,
255  $reader->value
256  );
257  break;
258  case XMLReader::DOC_TYPE:
259  // We should never see a doctype after first
260  // element.
261  $this->wellFormed = false;
262  break;
263  default:
264  // One of DOC, ENTITY, END_ENTITY,
265  // NOTATION, or XML_DECLARATION
266  // xml_parse didn't send these to the filter, so we won't.
267  }
268 
269  } while ( $this->readNext( $reader ) );
270 
271  if ( $this->stackDepth !== 0 ) {
272  $this->wellFormed = false;
273  } elseif ( $this->wellFormed === null ) {
274  $this->wellFormed = true;
275  }
276 
277  }
278 
284  private function getAttributesArray( XMLReader $r ) {
285  $attrs = [];
286  while ( $r->moveToNextAttribute() ) {
287  if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
288  // XMLReader treats xmlns attributes as normal
289  // attributes, while xml_parse doesn't
290  continue;
291  }
292  $name = $this->expandNS( $r->name, $r->namespaceURI );
293  $attrs[$name] = $r->value;
294  }
295  return $attrs;
296  }
297 
303  private function expandNS( $name, $namespaceURI ) {
304  if ( $namespaceURI ) {
305  $parts = explode( ':', $name );
306  $localname = array_pop( $parts );
307  return "$namespaceURI:$localname";
308  }
309  return $name;
310  }
311 
316  private function elementOpen( $name, $attribs ) {
317  $this->elementDataContext[] = [ $name, $attribs ];
318  $this->elementData[] = '';
319  $this->stackDepth++;
320  }
321 
324  private function elementClose() {
325  list( $name, $attribs ) = array_pop( $this->elementDataContext );
326  $data = array_pop( $this->elementData );
327  $this->stackDepth--;
328  $callbackReturn = false;
329 
330  if ( is_callable( $this->filterCallback ) ) {
331  $callbackReturn = call_user_func(
332  $this->filterCallback,
333  $name,
334  $attribs,
335  $data
336  );
337  }
338  if ( $callbackReturn ) {
339  // Filter hit!
340  $this->filterMatch = true;
341  $this->filterMatchType = $callbackReturn;
342  }
343  }
344 
348  private function elementData( $data ) {
349  // Collect any data here, and we'll run the callback in elementClose
350  $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
351  }
352 
357  private function processingInstructionHandler( $target, $data ) {
358  $callbackReturn = false;
359  if ( $this->parserOptions['processing_instruction_handler'] ) {
360  $callbackReturn = call_user_func(
361  $this->parserOptions['processing_instruction_handler'],
362  $target,
363  $data
364  );
365  }
366  if ( $callbackReturn ) {
367  // Filter hit!
368  $this->filterMatch = true;
369  $this->filterMatchType = $callbackReturn;
370  }
371  }
377  private function DTDHandler( XMLReader $reader ) {
378  $externalCallback = $this->parserOptions['external_dtd_handler'];
379  $generalCallback = $this->parserOptions['dtd_handler'];
380  $checkIfSafe = $this->parserOptions['require_safe_dtd'];
381  if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
382  return;
383  }
384  $dtd = $reader->readOuterXML();
385  $callbackReturn = false;
386 
387  if ( $generalCallback ) {
388  $callbackReturn = call_user_func( $generalCallback, $dtd );
389  }
390  if ( $callbackReturn ) {
391  // Filter hit!
392  $this->filterMatch = true;
393  $this->filterMatchType = $callbackReturn;
394  $callbackReturn = false;
395  }
396 
397  $parsedDTD = $this->parseDTD( $dtd );
398  if ( $externalCallback && isset( $parsedDTD['type'] ) ) {
399  $callbackReturn = call_user_func(
400  $externalCallback,
401  $parsedDTD['type'],
402  isset( $parsedDTD['publicid'] ) ? $parsedDTD['publicid'] : null,
403  isset( $parsedDTD['systemid'] ) ? $parsedDTD['systemid'] : null
404  );
405  }
406  if ( $callbackReturn ) {
407  // Filter hit!
408  $this->filterMatch = true;
409  $this->filterMatchType = $callbackReturn;
410  $callbackReturn = false;
411  }
412 
413  if ( $checkIfSafe && isset( $parsedDTD['internal'] ) ) {
414  if ( !$this->checkDTDIsSafe( $parsedDTD['internal'] ) ) {
415  $this->wellFormed = false;
416  }
417  }
418  }
419 
440  private function checkDTDIsSafe( $internalSubset ) {
441  $offset = 0;
442  $res = preg_match(
443  '/^(?:\s*<!ENTITY\s+\S+\s+' .
444  '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&amp;|&quot;){0,255})"' .
445  '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&amp;|&apos;){0,255})\')\s*>' .
446  '|\s*<!--(?:[^-]|-[^-])*-->' .
447  '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
448  '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
449  $internalSubset
450  );
451 
452  return (bool)$res;
453  }
454 
463  private function parseDTD( $dtd ) {
464  $m = [];
465  $res = preg_match(
466  '/^<!DOCTYPE\s*\S+\s*' .
467  '(?:(?P<typepublic>PUBLIC)\s*' .
468  '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' . // public identifer
469  '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' . // system identifier
470  '|(?P<typesystem>SYSTEM)\s*' .
471  '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
472  ')?\s*' .
473  '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
474  $dtd,
475  $m
476  );
477  if ( !$res ) {
478  $this->wellFormed = false;
479  return [];
480  }
481  $parsed = [];
482  foreach ( $m as $field => $value ) {
483  if ( $value === '' || is_numeric( $field ) ) {
484  continue;
485  }
486  switch ( $field ) {
487  case 'typepublic':
488  case 'typesystem':
489  $parsed['type'] = $value;
490  break;
491  case 'pubquote':
492  case 'pubapos':
493  $parsed['publicid'] = $value;
494  break;
495  case 'pubsysquote':
496  case 'pubsysapos':
497  case 'sysquote':
498  case 'sysapos':
499  $parsed['systemid'] = $value;
500  break;
501  case 'internal':
502  $parsed['internal'] = $value;
503  break;
504  }
505  }
506  return $parsed;
507  }
508 }
readNext(XMLReader $reader)
$parserOptions
Additional parsing options.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static newFromString($string, $filterCallback=null)
Alternative constructor: from string.
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1936
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:2102
validateFromInput($xml, $isFile)
expandNS($name, $namespaceURI)
$value
elementData($data)
getRootElement()
Get the root element.
processingInstructionHandler($target, $data)
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1936
XmlErrorHandler($errno, $errstr)
$stackDepth
Current depth of the data stack.
elementOpen($name, $attribs)
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1046
$res
Definition: database.txt:21
mixed $filterMatchType
Will contain the type of filter hit if the optional element filter returned a match at some point...
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1936
parseDTD($dtd)
Parse DTD into parts.
static newFromFilename($fname, $filterCallback=null)
Alternative constructor: from filename.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$rootElement
Name of the document's root element, including any namespace as an expanded URL.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
__construct($input, $filterCallback=null, $isFile=true, $options=[])
Allow filtering an XML file.
$elementData
A stack of strings containing the data of each xml element as it's processed.
if(!defined( 'MEDIAWIKI')) $fname
This file is not a valid entry point, perform no further processing unless MEDIAWIKI is defined...
Definition: Setup.php:36
$filterMatch
Will be set to true if the optional element filter returned a match at some point.
getAttributesArray(XMLReader $r)
Get all of the attributes for an XMLReader's current node.
DTDHandler(XMLReader $reader)
Handle coming across a
$elementDataContext
A stack of element names and attributes, as we process them.
$wellFormed
Will be set to true or false to indicate whether the file is well-formed XML.
validate($reader)
checkDTDIsSafe($internalSubset)
Check if the internal subset of the DTD is safe.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:300