MediaWiki REL1_31
ParserTestResultNormalizer.php
Go to the documentation of this file.
1<?php
8 protected $doc, $xpath, $invalid;
9
10 public static function normalize( $text, $funcs ) {
11 $norm = new self( $text );
12 if ( $norm->invalid ) {
13 return $text;
14 }
15 foreach ( $funcs as $func ) {
16 $norm->$func();
17 }
18 return $norm->serialize();
19 }
20
21 protected function __construct( $text ) {
22 $this->doc = new DOMDocument( '1.0', 'utf-8' );
23
24 // Note: parsing a supposedly XHTML document with an XML parser is not
25 // guaranteed to give accurate results. For example, it may introduce
26 // differences in the number of line breaks in <pre> tags.
27
28 Wikimedia\suppressWarnings();
29 if ( !$this->doc->loadXML( '<html><body>' . $text . '</body></html>' ) ) {
30 $this->invalid = true;
31 }
32 Wikimedia\restoreWarnings();
33 $this->xpath = new DOMXPath( $this->doc );
34 $this->body = $this->xpath->query( '//body' )->item( 0 );
35 }
36
37 protected function removeTbody() {
38 foreach ( $this->xpath->query( '//tbody' ) as $tbody ) {
39 while ( $tbody->firstChild ) {
40 $child = $tbody->firstChild;
41 $tbody->removeChild( $child );
42 $tbody->parentNode->insertBefore( $child, $tbody );
43 }
44 $tbody->parentNode->removeChild( $tbody );
45 }
46 }
47
59 protected function trimWhitespace() {
60 foreach ( $this->xpath->query( '//text()' ) as $child ) {
61 if ( strtolower( $child->parentNode->nodeName ) === 'pre' ) {
62 // Just trim one line break from the start and end
63 if ( substr_compare( $child->data, "\n", 0 ) === 0 ) {
64 $child->data = substr( $child->data, 1 );
65 }
66 if ( substr_compare( $child->data, "\n", -1 ) === 0 ) {
67 $child->data = substr( $child->data, 0, -1 );
68 }
69 } else {
70 // Trim all whitespace
71 $child->data = trim( $child->data );
72 }
73 if ( $child->data === '' ) {
74 $child->parentNode->removeChild( $child );
75 }
76 }
77 }
78
83 protected function serialize() {
84 return strtr( $this->doc->saveXML( $this->body ),
85 [ '<body>' => '', '</body>' => '' ] );
86 }
87}
serialize()
Serialize the XML DOM for comparison purposes.
trimWhitespace()
The point of this function is to produce a normalized DOM in which Tidy's output matches the output o...