MediaWiki REL1_35
RemexStripTagHandler.php
Go to the documentation of this file.
1<?php
2
3use RemexHtml\Tokenizer\Attributes;
4use RemexHtml\Tokenizer\NullTokenHandler;
5
9class RemexStripTagHandler extends NullTokenHandler {
10 private $text = '';
11
12 public function getResult() {
13 return $this->text;
14 }
15
16 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
17 $this->text .= substr( $text, $start, $length );
18 }
19
20 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
21 // Inject whitespace for typical block-level tags to
22 // prevent merging unrelated<br>words.
23 if ( $this->isBlockLevelTag( $name ) ) {
24 $this->text .= ' ';
25 }
26 }
27
28 public function endTag( $name, $sourceStart, $sourceLength ) {
29 // Inject whitespace for typical block-level tags to
30 // prevent merging unrelated<br>words.
31 if ( $this->isBlockLevelTag( $name ) ) {
32 $this->text .= ' ';
33 }
34 }
35
36 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
37 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
38 // The following is a complete list of all HTML block level elements
39 // (although "block-level" is not technically defined for elements that are
40 // new in HTML5).
41 // Structured as tag => true to allow O(1) membership test.
42 private const BLOCK_LEVEL_TAGS = [
43 'address' => true,
44 'article' => true,
45 'aside' => true,
46 'blockquote' => true,
47 'br' => true,
48 'canvas' => true,
49 'dd' => true,
50 'div' => true,
51 'dl' => true,
52 'dt' => true,
53 'fieldset' => true,
54 'figcaption' => true,
55 'figure' => true,
56 'footer' => true,
57 'form' => true,
58 'h1' => true,
59 'h2' => true,
60 'h3' => true,
61 'h4' => true,
62 'h5' => true,
63 'h6' => true,
64 'header' => true,
65 'hgroup' => true,
66 'hr' => true,
67 'li' => true,
68 'main' => true,
69 'nav' => true,
70 'noscript' => true,
71 'ol' => true,
72 'output' => true,
73 'p' => true,
74 'pre' => true,
75 'section' => true,
76 'table' => true,
77 'td' => true,
78 'tfoot' => true,
79 'th' => true,
80 'tr' => true,
81 'ul' => true,
82 'video' => true,
83 ];
84
92 private function isBlockLevelTag( $tagName ) {
93 $key = strtolower( trim( $tagName ) );
94 return isset( self::BLOCK_LEVEL_TAGS[$key] );
95 }
96}
endTag( $name, $sourceStart, $sourceLength)
isBlockLevelTag( $tagName)
Detect block level tags.
characters( $text, $start, $length, $sourceStart, $sourceLength)
startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength)