MediaWiki  master
RemexCompatFormatter.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\HtmlFormatter;
7 use RemexHtml\Serializer\SerializerNode;
8 use Sanitizer;
9 
13 class RemexCompatFormatter extends HtmlFormatter {
14  private static $markedEmptyElements = [
15  'li' => true,
16  'p' => true,
17  'tr' => true,
18  ];
19 
21  private $textProcessor;
22 
23  public function __construct( $options = [] ) {
24  parent::__construct( $options );
25  $this->attributeEscapes["\u{00A0}"] = '&#160;';
26  unset( $this->attributeEscapes["&"] );
27  $this->textEscapes["\u{00A0}"] = '&#160;';
28  unset( $this->textEscapes["&"] );
29  $this->textProcessor = $options['textProcessor'] ?? null;
30  }
31 
32  public function startDocument( $fragmentNamespace, $fragmentName ) {
33  return '';
34  }
35 
36  public function characters( SerializerNode $parent, $text, $start, $length ) {
37  $text = parent::characters( $parent, $text, $start, $length );
38 
39  if ( $parent->namespace !== HTMLData::NS_HTML
40  || !isset( $this->rawTextElements[$parent->name] )
41  ) {
42  if ( $this->textProcessor !== null ) {
43  $text = call_user_func( $this->textProcessor, $text );
44  }
45  }
46 
47  // Ensure a consistent representation for all entities
48  $text = Sanitizer::normalizeCharReferences( $text );
49  return $text;
50  }
51 
52  public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
53  $data = $node->snData;
54  if ( $data && $data->isPWrapper ) {
55  if ( $data->nonblankNodeCount ) {
56  return "<p>$contents</p>";
57  } else {
58  return $contents;
59  }
60  }
61 
62  $name = $node->name;
63  $attrs = $node->attrs;
64  if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0
65  && strspn( $contents, "\t\n\f\r " ) === strlen( $contents )
66  ) {
67  return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>";
68  }
69 
70  $s = "<$name";
71  foreach ( $attrs->getValues() as $attrName => $attrValue ) {
72  $encValue = strtr( $attrValue, $this->attributeEscapes );
73  $encValue = Sanitizer::normalizeCharReferences( $encValue );
74  $s .= " $attrName=\"$encValue\"";
75  }
76  if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
77  $s .= ' />';
78  return $s;
79  }
80 
81  $s .= '>';
82  if ( $node->namespace === HTMLData::NS_HTML
83  && isset( $contents[0] ) && $contents[0] === "\n"
84  && isset( $this->prefixLfElements[$name] )
85  ) {
86  $s .= "\n$contents</$name>";
87  } else {
88  $s .= "$contents</$name>";
89  }
90  return $s;
91  }
92 }
MediaWiki\Tidy\RemexCompatFormatter\startDocument
startDocument( $fragmentNamespace, $fragmentName)
Definition: RemexCompatFormatter.php:32
MediaWiki\Tidy\RemexCompatFormatter\__construct
__construct( $options=[])
Definition: RemexCompatFormatter.php:23
MediaWiki\Tidy\RemexCompatFormatter
Definition: RemexCompatFormatter.php:13
MediaWiki\Tidy\RemexCompatFormatter\element
element(SerializerNode $parent, SerializerNode $node, $contents)
Definition: RemexCompatFormatter.php:52
MediaWiki\Tidy\RemexCompatFormatter\$textProcessor
callable $textProcessor
Definition: RemexCompatFormatter.php:21
$s
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
Definition: mergeMessageFileList.php:206
MediaWiki\Tidy\RemexCompatFormatter\$markedEmptyElements
static $markedEmptyElements
Definition: RemexCompatFormatter.php:14
MediaWiki\Tidy\RemexCompatFormatter\characters
characters(SerializerNode $parent, $text, $start, $length)
Definition: RemexCompatFormatter.php:36
Sanitizer\normalizeCharReferences
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1123
Sanitizer
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:34
MediaWiki\Tidy
Definition: RemexCompatFormatter.php:3