82 private $parserOptions = [
83 'processing_instruction_handler' =>
null,
84 'external_dtd_handler' =>
'',
86 'require_safe_dtd' => true
117 $this->parserOptions = array_merge( $this->parserOptions, $options );
118 $this->validateFromInput( $input, $isFile );
164 private function validateFromInput( $xml, $isFile ) {
165 $reader =
new XMLReader();
167 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
169 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
173 $this->wellFormed =
false;
176 $oldDisable = @libxml_disable_entity_loader(
true );
177 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
179 $this->validate( $reader );
180 }
catch ( Exception $e ) {
183 $this->wellFormed =
false;
186 @libxml_disable_entity_loader( $oldDisable );
191 @libxml_disable_entity_loader( $oldDisable );
195 private function readNext( XMLReader $reader ) {
196 set_error_handler(
function ( $line, $file ) {
197 $this->wellFormed =
false;
200 $ret = $reader->read();
201 restore_error_handler();
205 private function validate( $reader ) {
209 if ( !$this->readNext( $reader ) ) {
211 $this->wellFormed =
false;
214 if ( $reader->nodeType === XMLReader::PI ) {
215 $this->processingInstructionHandler( $reader->name, $reader->value );
217 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
218 $this->dtdHandler( $reader );
220 }
while ( $reader->nodeType != XMLReader::ELEMENT );
224 switch ( $reader->nodeType ) {
225 case XMLReader::ELEMENT:
226 $name = $this->expandNS(
228 $reader->namespaceURI
230 if ( $this->rootElement ===
'' ) {
231 $this->rootElement = $name;
233 $empty = $reader->isEmptyElement;
234 $attrs = $this->getAttributesArray( $reader );
235 $this->elementOpen( $name, $attrs );
237 $this->elementClose();
241 case XMLReader::END_ELEMENT:
242 $this->elementClose();
245 case XMLReader::WHITESPACE:
246 case XMLReader::SIGNIFICANT_WHITESPACE:
247 case XMLReader::CDATA:
248 case XMLReader::TEXT:
249 $this->elementData( $reader->value );
252 case XMLReader::ENTITY_REF:
257 case XMLReader::COMMENT:
263 $this->processingInstructionHandler(
268 case XMLReader::DOC_TYPE:
271 $this->wellFormed =
false;
278 }
while ( $this->readNext( $reader ) );
280 if ( $this->stackDepth !== 0 ) {
281 $this->wellFormed =
false;
282 } elseif ( $this->wellFormed ===
null ) {
283 $this->wellFormed =
true;
292 private function getAttributesArray( XMLReader $r ) {
294 while ( $r->moveToNextAttribute() ) {
295 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
300 $name = $this->expandNS( $r->name, $r->namespaceURI );
301 $attrs[$name] = $r->value;
311 private function expandNS( $name, $namespaceURI ) {
312 if ( $namespaceURI ) {
313 $parts = explode(
':', $name );
314 $localname = array_pop( $parts );
315 return "$namespaceURI:$localname";
324 private function elementOpen( $name, $attribs ) {
325 $this->elementDataContext[] = [ $name, $attribs ];
326 $this->elementData[] =
'';
330 private function elementClose() {
331 [ $name, $attribs ] = array_pop( $this->elementDataContext );
332 $data = array_pop( $this->elementData );
334 $callbackReturn =
false;
336 if ( is_callable( $this->filterCallback ) ) {
339 if ( $callbackReturn ) {
341 $this->filterMatch =
true;
342 $this->filterMatchType = $callbackReturn;
349 private function elementData( $data ) {
351 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
358 private function processingInstructionHandler( $target, $data ) {
359 $callbackReturn =
false;
360 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
362 $callbackReturn = $this->parserOptions[
'processing_instruction_handler'](
367 if ( $callbackReturn ) {
369 $this->filterMatch =
true;
370 $this->filterMatchType = $callbackReturn;
379 private function dtdHandler( XMLReader $reader ) {
380 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
381 $generalCallback = $this->parserOptions[
'dtd_handler'];
382 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
383 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
386 $dtd = $reader->readOuterXml();
387 $callbackReturn =
false;
389 if ( $generalCallback ) {
390 $callbackReturn = $generalCallback( $dtd );
392 if ( $callbackReturn ) {
394 $this->filterMatch =
true;
395 $this->filterMatchType = $callbackReturn;
396 $callbackReturn =
false;
399 $parsedDTD = $this->parseDTD( $dtd );
400 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
401 $callbackReturn = $externalCallback(
403 $parsedDTD[
'publicid'] ??
null,
404 $parsedDTD[
'systemid'] ??
null
407 if ( $callbackReturn ) {
409 $this->filterMatch =
true;
410 $this->filterMatchType = $callbackReturn;
413 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
414 !$this->checkDTDIsSafe( $parsedDTD[
'internal'] )
416 $this->wellFormed =
false;
440 private function checkDTDIsSafe( $internalSubset ) {
442 '/^(?:\s*<!ENTITY\s+\S+\s+' .
443 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
444 '|\'(?:&[^\'%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
445 '|\s*<!--(?:[^-]|-[^-])*-->' .
446 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
447 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
462 private function parseDTD( $dtd ) {
465 '/^<!DOCTYPE\s*\S+\s*' .
466 '(?:(?P<typepublic>PUBLIC)\s*' .
467 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
468 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
469 '|(?P<typesystem>SYSTEM)\s*' .
470 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
472 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
477 $this->wellFormed =
false;
481 foreach ( $m as $field => $value ) {
482 if ( $value ===
'' || is_numeric( $field ) ) {
488 $parsed[
'type'] = $value;
492 $parsed[
'publicid'] = $value;
498 $parsed[
'systemid'] = $value;
501 $parsed[
'internal'] = $value;