6 use HtmlFormatter\HtmlFormatter;
14 const SECTION_MARKER_START =
"\1\2";
15 const SECTION_MARKER_END =
"\2\1";
27 parent::__construct( HtmlFormatter::wrapHTML( $text ) );
28 $this->plainText = $plainText;
30 $this->setRemoveMedia(
true );
33 $this->flattenAllTags();
35 $this->flatten( [
'a' ] );
47 public function getText( $element =
null ) {
48 $this->filterContent();
49 $text = parent::getText();
50 if ( $this->plainText ) {
51 $text = html_entity_decode( $text );
53 $text = str_replace(
"\u{00A0}",
' ', $text );
55 $text = str_replace(
"\r",
"\n", $text );
57 $text = preg_replace(
"/\n{3,}/",
"\n\n", $text );
67 if ( $this->plainText ) {
68 $html = preg_replace(
'/\s*(<h([1-6])\b)/i',
69 "\n\n" . self::SECTION_MARKER_START .
'$2' . self::SECTION_MARKER_END .
'$1',
83 $removed = parent::filterContent();
85 $doc = $this->getDoc();
86 $spans = $doc->getElementsByTagName(
'span' );
89 foreach ( $spans as $span ) {
90 $span->removeAttribute(
'class' );
91 $span->removeAttribute(
'style' );