MediaWiki master
RemexStripTagHandler.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Parser;
4
5use Wikimedia\RemexHtml\Tokenizer\Attributes;
6use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
7
12class RemexStripTagHandler extends NullTokenHandler {
14 private $insideNonVisibleTag = false;
16 private $text = '';
17
18 public function getResult() {
19 return $this->text;
20 }
21
22 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
23 if ( !$this->insideNonVisibleTag ) {
24 $this->text .= substr( $text, $start, $length );
25 }
26 }
27
28 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
29 if ( $this->isNonVisibleTag( $name ) ) {
30 $this->insideNonVisibleTag = true;
31 }
32 // Inject whitespace for typical block-level tags to
33 // prevent merging unrelated<br>words.
34 if ( $this->isBlockLevelTag( $name ) ) {
35 $this->text .= ' ';
36 }
37 }
38
39 public function endTag( $name, $sourceStart, $sourceLength ) {
40 if ( $this->isNonVisibleTag( $name ) ) {
41 $this->insideNonVisibleTag = false;
42 }
43 // Inject whitespace for typical block-level tags to
44 // prevent merging unrelated<br>words.
45 if ( $this->isBlockLevelTag( $name ) ) {
46 $this->text .= ' ';
47 }
48 }
49
50 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
51 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
52 // The following is a complete list of all HTML block level elements
53 // (although "block-level" is not technically defined for elements that are
54 // new in HTML5).
55 // Structured as tag => true to allow O(1) membership test.
56 private const BLOCK_LEVEL_TAGS = [
57 'address' => true,
58 'article' => true,
59 'aside' => true,
60 'blockquote' => true,
61 'br' => true,
62 'canvas' => true,
63 'dd' => true,
64 'div' => true,
65 'dl' => true,
66 'dt' => true,
67 'fieldset' => true,
68 'figcaption' => true,
69 'figure' => true,
70 'footer' => true,
71 'form' => true,
72 'h1' => true,
73 'h2' => true,
74 'h3' => true,
75 'h4' => true,
76 'h5' => true,
77 'h6' => true,
78 'header' => true,
79 'hgroup' => true,
80 'hr' => true,
81 'li' => true,
82 'main' => true,
83 'nav' => true,
84 'noscript' => true,
85 'ol' => true,
86 'output' => true,
87 'p' => true,
88 'pre' => true,
89 'section' => true,
90 'table' => true,
91 'td' => true,
92 'tfoot' => true,
93 'th' => true,
94 'tr' => true,
95 'ul' => true,
96 'video' => true,
97 ];
98
106 private function isBlockLevelTag( $tagName ) {
107 $key = strtolower( trim( $tagName ) );
108 return isset( self::BLOCK_LEVEL_TAGS[$key] );
109 }
110
111 private const NON_VISIBLE_TAGS = [
112 'style' => true,
113 'script' => true,
114 ];
115
127 private function isNonVisibleTag( $tagName ) {
128 $key = strtolower( trim( $tagName ) );
129 return isset( self::NON_VISIBLE_TAGS[$key] );
130 }
131
132}
Helper class for Sanitizer::stripAllTags().
endTag( $name, $sourceStart, $sourceLength)
characters( $text, $start, $length, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)