MediaWiki master
RemexStripTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Wikimedia\RemexHtml\Tokenizer\Attributes;
6use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
7
12class RemexStripTagHandler extends NullTokenHandler {
14 private $insideNonVisibleTag = false;
16 private $text = '';
17
19 public function getResult() {
20 return $this->text;
21 }
22
24 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
25 if ( !$this->insideNonVisibleTag ) {
26 $this->text .= substr( $text, $start, $length );
27 }
28 }
29
31 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
32 if ( $this->isNonVisibleTag( $name ) ) {
33 $this->insideNonVisibleTag = true;
34 }
35 // Inject whitespace for typical block-level tags to
36 // prevent merging unrelated<br>words.
37 if ( $this->isBlockLevelTag( $name ) ) {
38 $this->text .= ' ';
39 }
40 }
41
43 public function endTag( $name, $sourceStart, $sourceLength ) {
44 if ( $this->isNonVisibleTag( $name ) ) {
45 $this->insideNonVisibleTag = false;
46 }
47 // Inject whitespace for typical block-level tags to
48 // prevent merging unrelated<br>words.
49 if ( $this->isBlockLevelTag( $name ) ) {
50 $this->text .= ' ';
51 }
52 }
53
54 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
55 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
56 // The following is a complete list of all HTML block level elements
57 // (although "block-level" is not technically defined for elements that are
58 // new in HTML5).
59 // Structured as tag => true to allow O(1) membership test.
60 private const BLOCK_LEVEL_TAGS = [
61 'address' => true,
62 'article' => true,
63 'aside' => true,
64 'blockquote' => true,
65 'br' => true,
66 'canvas' => true,
67 'dd' => true,
68 'div' => true,
69 'dl' => true,
70 'dt' => true,
71 'fieldset' => true,
72 'figcaption' => true,
73 'figure' => true,
74 'footer' => true,
75 'form' => true,
76 'h1' => true,
77 'h2' => true,
78 'h3' => true,
79 'h4' => true,
80 'h5' => true,
81 'h6' => true,
82 'header' => true,
83 'hgroup' => true,
84 'hr' => true,
85 'li' => true,
86 'main' => true,
87 'nav' => true,
88 'noscript' => true,
89 'ol' => true,
90 'output' => true,
91 'p' => true,
92 'pre' => true,
93 'section' => true,
94 'table' => true,
95 'td' => true,
96 'tfoot' => true,
97 'th' => true,
98 'tr' => true,
99 'ul' => true,
100 'video' => true,
101 ];
102
110 private function isBlockLevelTag( $tagName ) {
111 $key = strtolower( trim( $tagName ) );
112 return isset( self::BLOCK_LEVEL_TAGS[$key] );
113 }
114
115 private const NON_VISIBLE_TAGS = [
116 'style' => true,
117 'script' => true,
118 ];
119
131 private function isNonVisibleTag( $tagName ) {
132 $key = strtolower( trim( $tagName ) );
133 return isset( self::NON_VISIBLE_TAGS[$key] );
134 }
135
136}
Helper class for Sanitizer::stripAllTags().
endTag( $name, $sourceStart, $sourceLength)
characters( $text, $start, $length, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)