78 'processing_instruction_handler' =>
null,
79 'external_dtd_handler' =>
'',
81 'require_safe_dtd' =>
true
112 $this->parserOptions = array_merge( $this->parserOptions, $options );
160 $reader =
new XMLReader();
162 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
164 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
168 $this->wellFormed =
false;
170 $oldDisable = libxml_disable_entity_loader(
true );
171 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
174 }
catch ( Exception $e ) {
177 $this->wellFormed =
false;
179 libxml_disable_entity_loader( $oldDisable );
183 libxml_disable_entity_loader( $oldDisable );
188 set_error_handler( [ $this,
'XmlErrorHandler' ] );
189 $ret = $reader->read();
190 restore_error_handler();
195 $this->wellFormed =
false;
202 if ( !$this->
readNext( $reader ) ) {
204 $this->wellFormed =
false;
207 if ( $reader->nodeType === XMLReader::PI ) {
210 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
213 }
while ( $reader->nodeType != XMLReader::ELEMENT );
217 switch ( $reader->nodeType ) {
218 case XMLReader::ELEMENT:
221 $reader->namespaceURI
223 if ( $this->rootElement ===
'' ) {
224 $this->rootElement = $name;
226 $empty = $reader->isEmptyElement;
234 case XMLReader::END_ELEMENT:
238 case XMLReader::WHITESPACE:
239 case XMLReader::SIGNIFICANT_WHITESPACE:
240 case XMLReader::CDATA:
241 case XMLReader::TEXT:
245 case XMLReader::ENTITY_REF:
250 case XMLReader::COMMENT:
261 case XMLReader::DOC_TYPE:
264 $this->wellFormed =
false;
271 }
while ( $this->
readNext( $reader ) );
273 if ( $this->stackDepth !== 0 ) {
274 $this->wellFormed =
false;
275 } elseif ( $this->wellFormed ===
null ) {
276 $this->wellFormed =
true;
287 while ( $r->moveToNextAttribute() ) {
288 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
293 $name = $this->
expandNS( $r->name, $r->namespaceURI );
294 $attrs[$name] = $r->value;
304 private function expandNS( $name, $namespaceURI ) {
305 if ( $namespaceURI ) {
306 $parts = explode(
':', $name );
307 $localname = array_pop( $parts );
308 return "$namespaceURI:$localname";
318 $this->elementDataContext[] = [ $name, $attribs ];
324 list( $name, $attribs ) = array_pop( $this->elementDataContext );
327 $callbackReturn =
false;
329 if ( is_callable( $this->filterCallback ) ) {
330 $callbackReturn = call_user_func(
331 $this->filterCallback,
337 if ( $callbackReturn ) {
339 $this->filterMatch =
true;
340 $this->filterMatchType = $callbackReturn;
349 $this->
elementData[ $this->stackDepth - 1 ] .= trim( $data );
357 $callbackReturn =
false;
358 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
359 $callbackReturn = call_user_func(
360 $this->parserOptions[
'processing_instruction_handler'],
365 if ( $callbackReturn ) {
367 $this->filterMatch =
true;
368 $this->filterMatchType = $callbackReturn;
378 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
379 $generalCallback = $this->parserOptions[
'dtd_handler'];
380 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
381 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
384 $dtd = $reader->readOuterXml();
385 $callbackReturn =
false;
387 if ( $generalCallback ) {
388 $callbackReturn = call_user_func( $generalCallback, $dtd );
390 if ( $callbackReturn ) {
392 $this->filterMatch =
true;
393 $this->filterMatchType = $callbackReturn;
394 $callbackReturn =
false;
397 $parsedDTD = $this->
parseDTD( $dtd );
398 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
399 $callbackReturn = call_user_func(
402 $parsedDTD[
'publicid'] ??
null,
403 $parsedDTD[
'systemid'] ??
null
406 if ( $callbackReturn ) {
408 $this->filterMatch =
true;
409 $this->filterMatchType = $callbackReturn;
410 $callbackReturn =
false;
413 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
416 $this->wellFormed =
false;
443 '/^(?:\s*<!ENTITY\s+\S+\s+' .
444 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
445 '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
446 '|\s*<!--(?:[^-]|-[^-])*-->' .
447 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
448 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
466 '/^<!DOCTYPE\s*\S+\s*' .
467 '(?:(?P<typepublic>PUBLIC)\s*' .
468 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
469 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
470 '|(?P<typesystem>SYSTEM)\s*' .
471 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
473 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
478 $this->wellFormed =
false;
482 foreach ( $m as $field => $value ) {
483 if ( $value ===
'' || is_numeric( $field ) ) {
489 $parsed[
'type'] = $value;
493 $parsed[
'publicid'] = $value;
499 $parsed[
'systemid'] = $value;
502 $parsed[
'internal'] = $value;