87 private $parserOptions = [
88 'processing_instruction_handler' =>
null,
89 'external_dtd_handler' =>
'',
91 'require_safe_dtd' => true
122 $this->parserOptions = array_merge( $this->parserOptions, $options );
123 $this->validateFromInput( $input, $isFile );
169 private function validateFromInput( $xml, $isFile ) {
170 $reader =
new XMLReader();
172 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
174 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
178 $this->wellFormed =
false;
181 $oldDisable = @libxml_disable_entity_loader(
true );
182 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
184 $this->validate( $reader );
185 }
catch ( Exception $e ) {
188 $this->wellFormed =
false;
191 @libxml_disable_entity_loader( $oldDisable );
196 @libxml_disable_entity_loader( $oldDisable );
200 private function readNext( XMLReader $reader ) {
201 set_error_handler(
function ( $line, $file ) {
202 $this->wellFormed =
false;
205 $ret = $reader->read();
206 restore_error_handler();
210 private function validate( $reader ) {
214 if ( !$this->readNext( $reader ) ) {
216 $this->wellFormed =
false;
219 if ( $reader->nodeType === XMLReader::PI ) {
220 $this->processingInstructionHandler( $reader->name, $reader->value );
222 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
223 $this->dtdHandler( $reader );
225 }
while ( $reader->nodeType != XMLReader::ELEMENT );
229 switch ( $reader->nodeType ) {
230 case XMLReader::ELEMENT:
231 $name = $this->expandNS(
233 $reader->namespaceURI
235 if ( $this->rootElement ===
'' ) {
236 $this->rootElement = $name;
238 $empty = $reader->isEmptyElement;
239 $attrs = $this->getAttributesArray( $reader );
240 $this->elementOpen( $name, $attrs );
242 $this->elementClose();
246 case XMLReader::END_ELEMENT:
247 $this->elementClose();
250 case XMLReader::WHITESPACE:
251 case XMLReader::SIGNIFICANT_WHITESPACE:
252 case XMLReader::CDATA:
253 case XMLReader::TEXT:
254 $this->elementData( $reader->value );
257 case XMLReader::ENTITY_REF:
262 case XMLReader::COMMENT:
268 $this->processingInstructionHandler(
273 case XMLReader::DOC_TYPE:
276 $this->wellFormed =
false;
283 }
while ( $this->readNext( $reader ) );
285 if ( $this->stackDepth !== 0 ) {
286 $this->wellFormed =
false;
287 } elseif ( $this->wellFormed ===
null ) {
288 $this->wellFormed =
true;
297 private function getAttributesArray( XMLReader $r ) {
299 while ( $r->moveToNextAttribute() ) {
300 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
305 $name = $this->expandNS( $r->name, $r->namespaceURI );
306 $attrs[$name] = $r->value;
316 private function expandNS( $name, $namespaceURI ) {
317 if ( $namespaceURI ) {
318 $parts = explode(
':', $name );
319 $localname = array_pop( $parts );
320 return "$namespaceURI:$localname";
329 private function elementOpen( $name, $attribs ) {
330 $this->elementDataContext[] = [ $name, $attribs ];
331 $this->elementData[] =
'';
335 private function elementClose() {
336 [ $name, $attribs ] = array_pop( $this->elementDataContext );
337 $data = array_pop( $this->elementData );
339 $callbackReturn =
false;
341 if ( is_callable( $this->filterCallback ) ) {
344 if ( $callbackReturn ) {
346 $this->filterMatch =
true;
347 $this->filterMatchType = $callbackReturn;
354 private function elementData( $data ) {
356 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
363 private function processingInstructionHandler( $target, $data ) {
364 $callbackReturn =
false;
365 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
367 $callbackReturn = $this->parserOptions[
'processing_instruction_handler'](
372 if ( $callbackReturn ) {
374 $this->filterMatch =
true;
375 $this->filterMatchType = $callbackReturn;
384 private function dtdHandler( XMLReader $reader ) {
385 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
386 $generalCallback = $this->parserOptions[
'dtd_handler'];
387 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
388 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
391 $dtd = $reader->readOuterXml();
392 $callbackReturn =
false;
394 if ( $generalCallback ) {
395 $callbackReturn = $generalCallback( $dtd );
397 if ( $callbackReturn ) {
399 $this->filterMatch =
true;
400 $this->filterMatchType = $callbackReturn;
401 $callbackReturn =
false;
404 $parsedDTD = $this->parseDTD( $dtd );
405 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
406 $callbackReturn = $externalCallback(
408 $parsedDTD[
'publicid'] ??
null,
409 $parsedDTD[
'systemid'] ??
null
412 if ( $callbackReturn ) {
414 $this->filterMatch =
true;
415 $this->filterMatchType = $callbackReturn;
418 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
419 !$this->checkDTDIsSafe( $parsedDTD[
'internal'] )
421 $this->wellFormed =
false;
445 private function checkDTDIsSafe( $internalSubset ) {
447 '/^(?:\s*<!ENTITY\s+\S+\s+' .
448 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
449 '|\'(?:&[^\'%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
450 '|\s*<!--(?:[^-]|-[^-])*-->' .
451 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
452 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
467 private function parseDTD( $dtd ) {
470 '/^<!DOCTYPE\s*\S+\s*' .
471 '(?:(?P<typepublic>PUBLIC)\s*' .
472 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
473 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
474 '|(?P<typesystem>SYSTEM)\s*' .
475 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
477 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
482 $this->wellFormed =
false;
486 foreach ( $m as $field => $value ) {
487 if ( $value ===
'' || is_numeric( $field ) ) {
493 $parsed[
'type'] = $value;
497 $parsed[
'publicid'] = $value;
503 $parsed[
'systemid'] = $value;
506 $parsed[
'internal'] = $value;