77 private $parserOptions = [
78 'processing_instruction_handler' =>
null,
79 'external_dtd_handler' =>
'',
81 'require_safe_dtd' =>
true
112 $this->parserOptions = array_merge( $this->parserOptions, $options );
113 $this->validateFromInput( $input, $isFile );
159 private function validateFromInput( $xml, $isFile ) {
160 $reader =
new XMLReader();
162 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
164 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
168 $this->wellFormed =
false;
171 $oldDisable = @libxml_disable_entity_loader(
true );
172 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
174 $this->validate( $reader );
175 }
catch ( Exception $e ) {
178 $this->wellFormed =
false;
181 @libxml_disable_entity_loader( $oldDisable );
186 @libxml_disable_entity_loader( $oldDisable );
190 private function readNext( XMLReader $reader ) {
191 set_error_handler(
function (
$line,
$file ) {
192 $this->wellFormed =
false;
195 $ret = $reader->read();
196 restore_error_handler();
200 private function validate( $reader ) {
204 if ( !$this->readNext( $reader ) ) {
206 $this->wellFormed =
false;
209 if ( $reader->nodeType === XMLReader::PI ) {
210 $this->processingInstructionHandler( $reader->name, $reader->value );
212 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
213 $this->dtdHandler( $reader );
215 }
while ( $reader->nodeType != XMLReader::ELEMENT );
219 switch ( $reader->nodeType ) {
220 case XMLReader::ELEMENT:
221 $name = $this->expandNS(
223 $reader->namespaceURI
225 if ( $this->rootElement ===
'' ) {
226 $this->rootElement = $name;
228 $empty = $reader->isEmptyElement;
229 $attrs = $this->getAttributesArray( $reader );
230 $this->elementOpen( $name, $attrs );
232 $this->elementClose();
236 case XMLReader::END_ELEMENT:
237 $this->elementClose();
240 case XMLReader::WHITESPACE:
241 case XMLReader::SIGNIFICANT_WHITESPACE:
242 case XMLReader::CDATA:
243 case XMLReader::TEXT:
244 $this->elementData( $reader->value );
247 case XMLReader::ENTITY_REF:
252 case XMLReader::COMMENT:
258 $this->processingInstructionHandler(
263 case XMLReader::DOC_TYPE:
266 $this->wellFormed =
false;
273 }
while ( $this->readNext( $reader ) );
275 if ( $this->stackDepth !== 0 ) {
276 $this->wellFormed =
false;
277 } elseif ( $this->wellFormed ===
null ) {
278 $this->wellFormed =
true;
287 private function getAttributesArray( XMLReader $r ) {
289 while ( $r->moveToNextAttribute() ) {
290 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
295 $name = $this->expandNS( $r->name, $r->namespaceURI );
296 $attrs[$name] = $r->value;
306 private function expandNS( $name, $namespaceURI ) {
307 if ( $namespaceURI ) {
308 $parts = explode(
':', $name );
309 $localname = array_pop( $parts );
310 return "$namespaceURI:$localname";
319 private function elementOpen( $name, $attribs ) {
320 $this->elementDataContext[] = [ $name, $attribs ];
321 $this->elementData[] =
'';
325 private function elementClose() {
326 list( $name, $attribs ) = array_pop( $this->elementDataContext );
327 $data = array_pop( $this->elementData );
329 $callbackReturn =
false;
331 if ( is_callable( $this->filterCallback ) ) {
334 if ( $callbackReturn ) {
336 $this->filterMatch =
true;
337 $this->filterMatchType = $callbackReturn;
344 private function elementData( $data ) {
346 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
353 private function processingInstructionHandler( $target, $data ) {
354 $callbackReturn =
false;
355 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
357 $callbackReturn = $this->parserOptions[
'processing_instruction_handler'](
362 if ( $callbackReturn ) {
364 $this->filterMatch =
true;
365 $this->filterMatchType = $callbackReturn;
374 private function dtdHandler( XMLReader $reader ) {
375 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
376 $generalCallback = $this->parserOptions[
'dtd_handler'];
377 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
378 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
381 $dtd = $reader->readOuterXml();
382 $callbackReturn =
false;
384 if ( $generalCallback ) {
385 $callbackReturn = $generalCallback( $dtd );
387 if ( $callbackReturn ) {
389 $this->filterMatch =
true;
390 $this->filterMatchType = $callbackReturn;
391 $callbackReturn =
false;
394 $parsedDTD = $this->parseDTD( $dtd );
395 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
396 $callbackReturn = $externalCallback(
398 $parsedDTD[
'publicid'] ??
null,
399 $parsedDTD[
'systemid'] ??
null
402 if ( $callbackReturn ) {
404 $this->filterMatch =
true;
405 $this->filterMatchType = $callbackReturn;
408 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
409 !$this->checkDTDIsSafe( $parsedDTD[
'internal'] )
411 $this->wellFormed =
false;
435 private function checkDTDIsSafe( $internalSubset ) {
437 '/^(?:\s*<!ENTITY\s+\S+\s+' .
438 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
439 '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
440 '|\s*<!--(?:[^-]|-[^-])*-->' .
441 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
442 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
457 private function parseDTD( $dtd ) {
460 '/^<!DOCTYPE\s*\S+\s*' .
461 '(?:(?P<typepublic>PUBLIC)\s*' .
462 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
463 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
464 '|(?P<typesystem>SYSTEM)\s*' .
465 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
467 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
472 $this->wellFormed =
false;
476 foreach ( $m as $field => $value ) {
477 if ( $value ===
'' || is_numeric( $field ) ) {
483 $parsed[
'type'] = $value;
487 $parsed[
'publicid'] = $value;
493 $parsed[
'systemid'] = $value;
496 $parsed[
'internal'] = $value;