15 private ?
string $openingText =
null;
16 private ?
string $allText =
null;
18 private array $auxText = [];
24 private const EXCLUDED_ELEMENT_SELECTORS = [
34 'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
39 '.navigation-not-searchable',
41 '.wbmi-entityview-emptyCaption',
47 private const AUXILIARY_ELEMENT_SELECTORS = [
62 $this->parserOutput = $parserOutput;
84 $tocData = $this->parserOutput->getTOCData();
85 if ( $tocData ===
null ) {
88 $ignoredHeadings = $this->getIgnoredHeadings();
89 foreach ( $tocData->getSections() as $heading ) {
90 $heading = $heading->line;
94 $heading = preg_replace(
'/<\/?span>/',
'', $heading );
96 $heading = preg_replace( [
'/[/',
'/]/' ], [
'[',
']' ], $heading );
97 $heading = preg_replace(
'/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/i',
'', $heading );
100 $heading = trim( Sanitizer::stripAllTags( $heading ) );
104 if ( !in_array( $heading, $ignoredHeadings ) ) {
105 $headings[] = $heading;
121 $lines = explode(
"\n", $message );
123 $lines = preg_replace(
'/#.*$/',
'', $lines );
125 $lines = array_map(
'trim', $lines );
128 return array_filter( $lines );
136 private function getIgnoredHeadings() {
137 static $ignoredHeadings =
null;
138 if ( $ignoredHeadings ===
null ) {
139 $ignoredHeadings = [];
141 if ( !
$source->isDisabled() ) {
144 $ignoredHeadings = $lines;
148 return $ignoredHeadings;
154 private function extractWikitextParts() {
155 if ( $this->allText !==
null ) {
158 $text = $this->parserOutput->getRawText();
159 if ( $text ===
'' ) {
166 $this->openingText = $this->extractTextBeforeFirstHeading( $text );
168 $doc = DOMUtils::parseHTML( $text );
171 foreach ( self::EXCLUDED_ELEMENT_SELECTORS as $selector ) {
172 foreach ( DOMCompat::querySelectorAll( $doc, $selector ) as $element ) {
173 $element->parentNode->removeChild( $element );
180 foreach ( self::AUXILIARY_ELEMENT_SELECTORS as $selector ) {
181 foreach ( DOMCompat::querySelectorAll( $doc, $selector ) as $element ) {
182 $this->auxText[] = trim( Sanitizer::stripAllTags( DOMCompat::getInnerHTML( $element ) ) );
183 $element->parentNode->removeChild( $element );
187 $this->allText = trim( Sanitizer::stripAllTags( DOMCompat::getInnerHTML( DOMCompat::getBody( $doc ) ) ) );
197 private function extractTextBeforeFirstHeading( $text ) {
199 if ( !preg_match(
'/<h[123456]\b/', $text,
$matches, PREG_OFFSET_CAPTURE ) ) {
204 $text = substr( $text, 0,
$matches[ 0 ][ 1 ] );
211 $doc = DOMUtils::parseHTML( $text );
212 foreach ( array_merge( self::EXCLUDED_ELEMENT_SELECTORS, self::AUXILIARY_ELEMENT_SELECTORS ) as $selector ) {
213 foreach ( DOMCompat::querySelectorAll( $doc, $selector ) as $element ) {
214 $element->parentNode->removeChild( $element );
218 $text = trim( Sanitizer::stripAllTags( DOMCompat::getInnerHTML( DOMCompat::getBody( $doc ) ) ) );
233 $this->extractWikitextParts();
235 return $this->openingText;
242 $this->extractWikitextParts();
244 return $this->allText;
251 $this->extractWikitextParts();
253 return $this->auxText;
262 $sort = $this->parserOutput->getPageProperty(
'defaultsort' );
263 if ( $sort ===
false ) {