75 'processing_instruction_handler' =>
'',
76 'external_dtd_handler' =>
'',
78 'require_safe_dtd' =>
true
108 $this->filterCallback = $filterCallback;
109 $this->parserOptions = array_merge( $this->parserOptions,
$options );
125 return new self(
$fname, $filterCallback,
true );
140 return new self( $string, $filterCallback,
false );
156 $reader =
new XMLReader();
158 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
160 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
164 $this->wellFormed =
false;
166 $oldDisable = libxml_disable_entity_loader(
true );
167 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
170 }
catch ( Exception
$e ) {
173 $this->wellFormed =
false;
175 libxml_disable_entity_loader( $oldDisable );
179 libxml_disable_entity_loader( $oldDisable );
184 set_error_handler( [ $this,
'XmlErrorHandler' ] );
185 $ret = $reader->read();
186 restore_error_handler();
191 $this->wellFormed =
false;
198 if ( !$this->
readNext( $reader ) ) {
200 $this->wellFormed =
false;
203 if ( $reader->nodeType === XMLReader::PI ) {
206 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
209 }
while ( $reader->nodeType != XMLReader::ELEMENT );
213 switch ( $reader->nodeType ) {
214 case XMLReader::ELEMENT:
217 $reader->namespaceURI
219 if ( $this->rootElement ===
'' ) {
220 $this->rootElement =
$name;
222 $empty = $reader->isEmptyElement;
230 case XMLReader::END_ELEMENT:
234 case XMLReader::WHITESPACE:
235 case XMLReader::SIGNIFICANT_WHITESPACE:
236 case XMLReader::CDATA:
237 case XMLReader::TEXT:
241 case XMLReader::ENTITY_REF:
246 case XMLReader::COMMENT:
257 case XMLReader::DOC_TYPE:
260 $this->wellFormed =
false;
267 }
while ( $this->
readNext( $reader ) );
269 if ( $this->stackDepth !== 0 ) {
270 $this->wellFormed =
false;
271 } elseif ( $this->wellFormed ===
null ) {
272 $this->wellFormed =
true;
283 while ( $r->moveToNextAttribute() ) {
284 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
290 $attrs[
$name] = $r->value;
301 if ( $namespaceURI ) {
302 $parts = explode(
':',
$name );
303 $localname = array_pop( $parts );
304 return "$namespaceURI:$localname";
323 $callbackReturn =
false;
325 if ( is_callable( $this->filterCallback ) ) {
326 $callbackReturn = call_user_func(
327 $this->filterCallback,
333 if ( $callbackReturn ) {
335 $this->filterMatch =
true;
336 $this->filterMatchType = $callbackReturn;
345 $this->
elementData[ $this->stackDepth - 1 ] .= trim( $data );
353 $callbackReturn =
false;
354 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
355 $callbackReturn = call_user_func(
356 $this->parserOptions[
'processing_instruction_handler'],
361 if ( $callbackReturn ) {
363 $this->filterMatch =
true;
364 $this->filterMatchType = $callbackReturn;
373 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
374 $generalCallback = $this->parserOptions[
'dtd_handler'];
375 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
376 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
379 $dtd = $reader->readOuterXML();
380 $callbackReturn =
false;
382 if ( $generalCallback ) {
383 $callbackReturn = call_user_func( $generalCallback, $dtd );
385 if ( $callbackReturn ) {
387 $this->filterMatch =
true;
388 $this->filterMatchType = $callbackReturn;
389 $callbackReturn =
false;
392 $parsedDTD = $this->
parseDTD( $dtd );
393 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
394 $callbackReturn = call_user_func(
397 isset( $parsedDTD[
'publicid'] ) ? $parsedDTD[
'publicid'] :
null,
398 isset( $parsedDTD[
'systemid'] ) ? $parsedDTD[
'systemid'] :
null
401 if ( $callbackReturn ) {
403 $this->filterMatch =
true;
404 $this->filterMatchType = $callbackReturn;
405 $callbackReturn =
false;
408 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) ) {
410 $this->wellFormed =
false;
438 '/^(?:\s*<!ENTITY\s+\S+\s+' .
439 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
440 '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
441 '|\s*<!--(?:[^-]|-[^-])*-->' .
442 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
443 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
461 '/^<!DOCTYPE\s*\S+\s*' .
462 '(?:(?P<typepublic>PUBLIC)\s*' .
463 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
464 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
465 '|(?P<typesystem>SYSTEM)\s*' .
466 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
468 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
473 $this->wellFormed =
false;
477 foreach ( $m
as $field =>
$value ) {
478 if (
$value ===
'' || is_numeric( $field ) ) {
488 $parsed[
'publicid'] =
$value;
494 $parsed[
'systemid'] =
$value;
497 $parsed[
'internal'] =
$value;