11 $norm =
new self( $text );
12 if ( $norm->invalid ) {
15 foreach ( $funcs
as $func ) {
18 return $norm->serialize();
22 $this->doc =
new DOMDocument(
'1.0',
'utf-8' );
28 Wikimedia\suppressWarnings();
29 if ( !$this->doc->loadXML(
'<html><body>' . $text .
'</body></html>' ) ) {
30 $this->invalid =
true;
32 Wikimedia\restoreWarnings();
33 $this->xpath =
new DOMXPath( $this->doc );
34 $this->body = $this->xpath->query(
'//body' )->item( 0 );
38 foreach ( $this->xpath->query(
'//tbody' )
as $tbody ) {
39 while ( $tbody->firstChild ) {
40 $child = $tbody->firstChild;
41 $tbody->removeChild( $child );
42 $tbody->parentNode->insertBefore( $child, $tbody );
44 $tbody->parentNode->removeChild( $tbody );
60 foreach ( $this->xpath->query(
'//text()' )
as $child ) {
61 if ( strtolower( $child->parentNode->nodeName ) ===
'pre' ) {
63 if ( substr_compare( $child->data,
"\n", 0 ) === 0 ) {
64 $child->data = substr( $child->data, 1 );
66 if ( substr_compare( $child->data,
"\n", -1 ) === 0 ) {
67 $child->data = substr( $child->data, 0, -1 );
71 $child->data = trim( $child->data );
73 if ( $child->data ===
'' ) {
74 $child->parentNode->removeChild( $child );
84 return strtr( $this->doc->saveXML( $this->body ),
85 [
'<body>' =>
'',
'</body>' =>
'' ] );