73 private $parserOptions = [
74 'processing_instruction_handler' =>
null,
75 'external_dtd_handler' =>
'',
77 'require_safe_dtd' => true
108 $this->parserOptions = array_merge( $this->parserOptions, $options );
109 $this->validateFromInput( $input, $isFile );
155 private function validateFromInput( $xml, $isFile ) {
157 $xmlParseHuge = LIBXML_PARSEHUGE;
158 if ( defined(
'MW_PHPUNIT_TEST' ) ) {
164 $reader =
new XMLReader();
166 $s = $reader->open( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING | $xmlParseHuge );
168 $s = $reader->XML( $xml,
null, LIBXML_NOERROR | LIBXML_NOWARNING | $xmlParseHuge );
172 $this->wellFormed =
false;
175 $oldDisable = @libxml_disable_entity_loader(
true );
176 $reader->setParserProperty( XMLReader::SUBST_ENTITIES,
true );
178 $this->validate( $reader );
179 }
catch ( Exception $e ) {
182 $this->wellFormed =
false;
187 @libxml_disable_entity_loader( $oldDisable );
192 private function readNext( XMLReader $reader ): bool {
193 set_error_handler( function ( $line, $file ) {
194 $this->wellFormed =
false;
197 $ret = $reader->read();
198 restore_error_handler();
202 private function validate( XMLReader $reader ) {
206 if ( !$this->readNext( $reader ) ) {
208 $this->wellFormed =
false;
211 if ( $reader->nodeType === XMLReader::PI ) {
212 $this->processingInstructionHandler( $reader->name, $reader->value );
214 if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
215 $this->dtdHandler( $reader );
217 }
while ( $reader->nodeType != XMLReader::ELEMENT );
221 switch ( $reader->nodeType ) {
222 case XMLReader::ELEMENT:
223 $name = $this->expandNS(
225 $reader->namespaceURI
227 if ( $this->rootElement ===
'' ) {
228 $this->rootElement = $name;
230 $empty = $reader->isEmptyElement;
231 $attrs = $this->getAttributesArray( $reader );
232 $this->elementOpen( $name, $attrs );
234 $this->elementClose();
238 case XMLReader::END_ELEMENT:
239 $this->elementClose();
242 case XMLReader::WHITESPACE:
243 case XMLReader::SIGNIFICANT_WHITESPACE:
244 case XMLReader::CDATA:
245 case XMLReader::TEXT:
246 $this->elementData( $reader->value );
249 case XMLReader::ENTITY_REF:
254 case XMLReader::COMMENT:
260 $this->processingInstructionHandler(
265 case XMLReader::DOC_TYPE:
268 $this->wellFormed =
false;
275 }
while ( $this->readNext( $reader ) );
277 if ( $this->stackDepth !== 0 ) {
278 $this->wellFormed =
false;
279 } elseif ( $this->wellFormed ===
null ) {
280 $this->wellFormed =
true;
289 private function getAttributesArray( XMLReader $r ) {
291 while ( $r->moveToNextAttribute() ) {
292 if ( $r->namespaceURI ===
'http://www.w3.org/2000/xmlns/' ) {
297 $name = $this->expandNS( $r->name, $r->namespaceURI );
298 $attrs[$name] = $r->value;
308 private function expandNS( $name, $namespaceURI ) {
309 if ( $namespaceURI ) {
310 $parts = explode(
':', $name );
311 $localname = array_pop( $parts );
312 return "$namespaceURI:$localname";
321 private function elementOpen( $name, $attribs ) {
322 $this->elementDataContext[] = [ $name, $attribs ];
323 $this->elementData[] =
'';
327 private function elementClose() {
328 [ $name, $attribs ] = array_pop( $this->elementDataContext );
329 $data = array_pop( $this->elementData );
331 $callbackReturn =
false;
333 if ( is_callable( $this->filterCallback ) ) {
334 $callbackReturn = ( $this->filterCallback )( $name, $attribs, $data );
336 if ( $callbackReturn ) {
338 $this->filterMatch =
true;
339 $this->filterMatchType = $callbackReturn;
346 private function elementData( $data ) {
348 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
355 private function processingInstructionHandler( $target, $data ) {
356 $callbackReturn =
false;
357 if ( $this->parserOptions[
'processing_instruction_handler'] ) {
359 $callbackReturn = $this->parserOptions[
'processing_instruction_handler'](
364 if ( $callbackReturn ) {
366 $this->filterMatch =
true;
367 $this->filterMatchType = $callbackReturn;
376 private function dtdHandler( XMLReader $reader ) {
377 $externalCallback = $this->parserOptions[
'external_dtd_handler'];
378 $generalCallback = $this->parserOptions[
'dtd_handler'];
379 $checkIfSafe = $this->parserOptions[
'require_safe_dtd'];
380 if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
383 $dtd = $reader->readOuterXml();
384 $callbackReturn =
false;
386 if ( $generalCallback ) {
387 $callbackReturn = $generalCallback( $dtd );
389 if ( $callbackReturn ) {
391 $this->filterMatch =
true;
392 $this->filterMatchType = $callbackReturn;
393 $callbackReturn =
false;
396 $parsedDTD = $this->parseDTD( $dtd );
397 if ( $externalCallback && isset( $parsedDTD[
'type'] ) ) {
398 $callbackReturn = $externalCallback(
400 $parsedDTD[
'publicid'] ??
null,
401 $parsedDTD[
'systemid'] ??
null
404 if ( $callbackReturn ) {
406 $this->filterMatch =
true;
407 $this->filterMatchType = $callbackReturn;
410 if ( $checkIfSafe && isset( $parsedDTD[
'internal'] ) &&
411 !$this->checkDTDIsSafe( $parsedDTD[
'internal'] )
413 $this->wellFormed =
false;
437 private function checkDTDIsSafe( $internalSubset ) {
439 '/^(?:\s*<!ENTITY\s+\S+\s+' .
440 '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
441 '|\'(?:&[^\'%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
442 '|\s*<!--(?:[^-]|-[^-])*-->' .
443 '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
444 '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
459 private function parseDTD( $dtd ) {
462 '/^<!DOCTYPE\s*\S+\s*' .
463 '(?:(?P<typepublic>PUBLIC)\s*' .
464 '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' .
465 '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' .
466 '|(?P<typesystem>SYSTEM)\s*' .
467 '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
469 '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
474 $this->wellFormed =
false;
478 foreach ( $m as $field => $value ) {
479 if ( $value ===
'' || is_numeric( $field ) ) {
485 $parsed[
'type'] = $value;
489 $parsed[
'publicid'] = $value;
495 $parsed[
'systemid'] = $value;
498 $parsed[
'internal'] = $value;