14 private ?
string $openingText =
null;
15 private ?
string $allText =
null;
17 private array $auxText = [];
23 private const EXCLUDED_ELEMENT_SELECTORS = [
33 'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
38 '.navigation-not-searchable',
40 '.wbmi-entityview-emptyCaption',
46 private const AUXILIARY_ELEMENT_SELECTORS = [
61 $this->parserOutput = $parserOutput;
83 $tocData = $this->parserOutput->getTOCData();
84 if ( $tocData ===
null ) {
87 $ignoredHeadings = $this->getIgnoredHeadings();
88 foreach ( $tocData->getSections() as $heading ) {
89 $heading = $heading->line;
93 $heading = preg_replace(
'/<\/?span>/',
'', $heading );
95 $heading = preg_replace( [
'/[/',
'/]/' ], [
'[',
']' ], $heading );
96 $heading = preg_replace(
'/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/i',
'', $heading );
99 $heading = trim( Sanitizer::stripAllTags( $heading ) );
103 if ( !in_array( $heading, $ignoredHeadings ) ) {
104 $headings[] = $heading;
120 $lines = explode(
"\n", $message );
122 $lines = preg_replace(
'/#.*$/',
'', $lines );
124 $lines = array_map(
'trim', $lines );
127 return array_filter( $lines );
135 private function getIgnoredHeadings() {
136 static $ignoredHeadings =
null;
137 if ( $ignoredHeadings ===
null ) {
138 $ignoredHeadings = [];
140 if ( !
$source->isDisabled() ) {
143 $ignoredHeadings = $lines;
147 return $ignoredHeadings;
153 private function extractWikitextParts() {
154 if ( $this->allText !==
null ) {
157 $text = $this->parserOutput->getRawText();
158 if ( $text ===
'' ) {
165 $this->openingText = $this->extractTextBeforeFirstHeading( $text );
167 $formatter =
new HtmlFormatter( $text );
170 $formatter->remove( self::EXCLUDED_ELEMENT_SELECTORS );
171 $formatter->filterContent();
176 $formatter->remove( self::AUXILIARY_ELEMENT_SELECTORS );
177 $auxiliaryElements = $formatter->filterContent();
178 $this->allText = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
179 foreach ( $auxiliaryElements as $auxiliaryElement ) {
181 trim( Sanitizer::stripAllTags( $formatter->getText( $auxiliaryElement ) ) );
192 private function extractTextBeforeFirstHeading( $text ) {
194 if ( !preg_match(
'/<h[123456]\b/', $text,
$matches, PREG_OFFSET_CAPTURE ) ) {
199 $text = substr( $text, 0,
$matches[ 0 ][ 1 ] );
206 $formatter =
new HtmlFormatter( $text );
207 $formatter->remove( self::EXCLUDED_ELEMENT_SELECTORS );
208 $formatter->remove( self::AUXILIARY_ELEMENT_SELECTORS );
209 $formatter->filterContent();
210 $text = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
225 $this->extractWikitextParts();
227 return $this->openingText;
234 $this->extractWikitextParts();
236 return $this->allText;
243 $this->extractWikitextParts();
245 return $this->auxText;
254 $sort = $this->parserOutput->getPageProperty(
'defaultsort' );
255 if ( $sort ===
false ) {