MediaWiki master
RemexStripTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Wikimedia\RemexHtml\Tokenizer\Attributes;
6use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
7
12class RemexStripTagHandler extends NullTokenHandler {
13 private $insideNonVisibleTag = false;
14 private $text = '';
15
16 public function getResult() {
17 return $this->text;
18 }
19
20 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
21 if ( !$this->insideNonVisibleTag ) {
22 $this->text .= substr( $text, $start, $length );
23 }
24 }
25
26 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
27 if ( $this->isNonVisibleTag( $name ) ) {
28 $this->insideNonVisibleTag = true;
29 }
30 // Inject whitespace for typical block-level tags to
31 // prevent merging unrelated<br>words.
32 if ( $this->isBlockLevelTag( $name ) ) {
33 $this->text .= ' ';
34 }
35 }
36
37 public function endTag( $name, $sourceStart, $sourceLength ) {
38 if ( $this->isNonVisibleTag( $name ) ) {
39 $this->insideNonVisibleTag = false;
40 }
41 // Inject whitespace for typical block-level tags to
42 // prevent merging unrelated<br>words.
43 if ( $this->isBlockLevelTag( $name ) ) {
44 $this->text .= ' ';
45 }
46 }
47
48 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
49 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
50 // The following is a complete list of all HTML block level elements
51 // (although "block-level" is not technically defined for elements that are
52 // new in HTML5).
53 // Structured as tag => true to allow O(1) membership test.
54 private const BLOCK_LEVEL_TAGS = [
55 'address' => true,
56 'article' => true,
57 'aside' => true,
58 'blockquote' => true,
59 'br' => true,
60 'canvas' => true,
61 'dd' => true,
62 'div' => true,
63 'dl' => true,
64 'dt' => true,
65 'fieldset' => true,
66 'figcaption' => true,
67 'figure' => true,
68 'footer' => true,
69 'form' => true,
70 'h1' => true,
71 'h2' => true,
72 'h3' => true,
73 'h4' => true,
74 'h5' => true,
75 'h6' => true,
76 'header' => true,
77 'hgroup' => true,
78 'hr' => true,
79 'li' => true,
80 'main' => true,
81 'nav' => true,
82 'noscript' => true,
83 'ol' => true,
84 'output' => true,
85 'p' => true,
86 'pre' => true,
87 'section' => true,
88 'table' => true,
89 'td' => true,
90 'tfoot' => true,
91 'th' => true,
92 'tr' => true,
93 'ul' => true,
94 'video' => true,
95 ];
96
104 private function isBlockLevelTag( $tagName ) {
105 $key = strtolower( trim( $tagName ) );
106 return isset( self::BLOCK_LEVEL_TAGS[$key] );
107 }
108
109 private const NON_VISIBLE_TAGS = [
110 'style' => true,
111 'script' => true,
112 ];
113
125 private function isNonVisibleTag( $tagName ) {
126 $key = strtolower( trim( $tagName ) );
127 return isset( self::NON_VISIBLE_TAGS[$key] );
128 }
129
130}
Helper class for Sanitizer::stripAllTags().
endTag( $name, $sourceStart, $sourceLength)
characters( $text, $start, $length, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)