78 'processing_instruction_handler' =>
null,
79 'external_dtd_handler' =>
'',
81 'require_safe_dtd' =>
true
112 $this->parserOptions = array_merge( $this->parserOptions, $options );
160 $reader =
new XMLReader();
162 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
164 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING );
168 $this->wellFormed =
false;
171 $oldDisable = @libxml_disable_entity_loader(
true );
172 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
175 }
catch ( Exception $e ) {
178 $this->wellFormed =
false;
181 @libxml_disable_entity_loader( $oldDisable );
186 @libxml_disable_entity_loader( $oldDisable );
191 set_error_handler(
function (
$line,
$file ) {
192 $this->wellFormed =
false;
194 $ret = $reader->read();
195 restore_error_handler();
203 if ( !$this->
readNext( $reader ) ) {
205 $this->wellFormed =
false;
208 if ( $reader->nodeType === XMLReader::PI ) {
211 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
214 }
while ( $reader->nodeType != XMLReader::ELEMENT );
218 switch ( $reader->nodeType ) {
219 case XMLReader::ELEMENT:
222 $reader->namespaceURI
224 if ( $this->rootElement ===
'' ) {
225 $this->rootElement = $name;
227 $empty = $reader->isEmptyElement;
235 case XMLReader::END_ELEMENT:
239 case XMLReader::WHITESPACE:
240 case XMLReader::SIGNIFICANT_WHITESPACE:
241 case XMLReader::CDATA:
242 case XMLReader::TEXT:
246 case XMLReader::ENTITY_REF:
251 case XMLReader::COMMENT:
262 case XMLReader::DOC_TYPE:
265 $this->wellFormed =
false;
272 }
while ( $this->
readNext( $reader ) );
274 if ( $this->stackDepth !== 0 ) {
275 $this->wellFormed =
false;
276 } elseif ( $this->wellFormed ===
null ) {
277 $this->wellFormed =
true;
288 while ( $r->moveToNextAttribute() ) {
289 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
294 $name = $this->
expandNS( $r->name, $r->namespaceURI );
295 $attrs[$name] = $r->value;
305 private function expandNS( $name, $namespaceURI ) {
306 if ( $namespaceURI ) {
307 $parts = explode(
':', $name );
308 $localname = array_pop( $parts );
309 return "$namespaceURI:$localname";
319 $this->elementDataContext[] = [ $name, $attribs ];
325 list( $name, $attribs ) = array_pop( $this->elementDataContext );
328 $callbackReturn =
false;
330 if ( is_callable( $this->filterCallback ) ) {
333 if ( $callbackReturn ) {
335 $this->filterMatch =
true;
336 $this->filterMatchType = $callbackReturn;
345 $this->
elementData[ $this->stackDepth - 1 ] .= trim( $data );
353 $callbackReturn =
false;
354 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
356 $callbackReturn = $this->parserOptions[
'processing_instruction_handler'](
361 if ( $callbackReturn ) {
363 $this->filterMatch =
true;
364 $this->filterMatchType = $callbackReturn;
374 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
375 $generalCallback = $this->parserOptions[
'dtd_handler'];
376 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
377 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
380 $dtd = $reader->readOuterXml();
381 $callbackReturn =
false;
383 if ( $generalCallback ) {
384 $callbackReturn = $generalCallback( $dtd );
386 if ( $callbackReturn ) {
388 $this->filterMatch =
true;
389 $this->filterMatchType = $callbackReturn;
390 $callbackReturn =
false;
393 $parsedDTD = $this->
parseDTD( $dtd );
394 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
395 $callbackReturn = $externalCallback(
397 $parsedDTD[
'publicid'] ??
null,
398 $parsedDTD[
'systemid'] ??
null
401 if ( $callbackReturn ) {
403 $this->filterMatch =
true;
404 $this->filterMatchType = $callbackReturn;
407 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
410 $this->wellFormed =
false;
436 '/^(?:\s*<!ENTITY\s+\S+\s+' .
437 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
438 '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
439 '|\s*<!--(?:[^-]|-[^-])*-->' .
440 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
441 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
459 '/^<!DOCTYPE\s*\S+\s*' .
460 '(?:(?P<typepublic>PUBLIC)\s*' .
461 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
462 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
463 '|(?P<typesystem>SYSTEM)\s*' .
464 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
466 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
471 $this->wellFormed =
false;
475 foreach ( $m as $field => $value ) {
476 if ( $value ===
'' || is_numeric( $field ) ) {
482 $parsed[
'type'] = $value;
486 $parsed[
'publicid'] = $value;
492 $parsed[
'systemid'] = $value;
495 $parsed[
'internal'] = $value;
int $stackDepth
Current depth of the data stack.
callable null $filterCallback
getRootElement()
Get the root element.
__construct( $input, $filterCallback=null, $isFile=true, $options=[])
Allow filtering an XML file.
parseDTD( $dtd)
Parse DTD into parts.
string[] $elementData
A stack of strings containing the data of each xml element as it's processed.
bool null $wellFormed
Will be set to true or false to indicate whether the file is well-formed XML.
string $rootElement
Name of the document's root element, including any namespace as an expanded URL.
bool $filterMatch
Will be set to true if the optional element filter returned a match at some point.
static newFromFilename( $fname, $filterCallback=null)
Alternative constructor: from filename.
readNext(XMLReader $reader)
elementOpen( $name, $attribs)
validateFromInput( $xml, $isFile)
array $parserOptions
Additional parsing options.
mixed $filterMatchType
Will contain the type of filter hit if the optional element filter returned a match at some point.
array $elementDataContext
A stack of element names and attributes, as we process them.
expandNS( $name, $namespaceURI)
dtdHandler(XMLReader $reader)
Handle coming across a <!DOCTYPE declaration.
static newFromString( $string, $filterCallback=null)
Alternative constructor: from string.
processingInstructionHandler( $target, $data)
checkDTDIsSafe( $internalSubset)
Check if the internal subset of the DTD is safe.
getAttributesArray(XMLReader $r)
Get all of the attributes for an XMLReader's current node.
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.