20 private $auxText = [];
24 private $parserOutput;
29 private $excludedElementSelectors = [
39 'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
44 '.navigation-not-searchable',
46 '.wbmi-entityview-emptyCaption',
52 private $auxiliaryElementSelectors = [
69 $this->parserOutput = $parserOutput;
87 $tocData = $this->parserOutput->getTOCData();
88 if ( $tocData ===
null ) {
91 $ignoredHeadings = $this->getIgnoredHeadings();
92 foreach ( $tocData->getSections() as $heading ) {
93 $heading = $heading->line;
97 $heading = preg_replace(
'/<\/?span>/',
'', $heading );
99 $heading = preg_replace( [
'/[/',
'/]/' ], [
'[',
']' ], $heading );
100 $heading = preg_replace(
'/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/is',
'', $heading );
103 $heading = trim( Sanitizer::stripAllTags( $heading ) );
107 if ( !in_array( $heading, $ignoredHeadings ) ) {
108 $headings[] = $heading;
122 $lines = explode(
"\n", $message );
133 private function getIgnoredHeadings() {
134 static $ignoredHeadings =
null;
135 if ( $ignoredHeadings ===
null ) {
136 $ignoredHeadings = [];
140 $source =
wfMessage(
'cirrussearch-ignored-headings' )->inContentLanguage();
142 if ( !
$source->isDisabled() ) {
144 $ignoredHeadings =
$lines;
147 return $ignoredHeadings;
153 private function extractWikitextParts() {
154 if ( $this->allText !==
null ) {
157 $text = $this->parserOutput->getText( [
158 'enableSectionEditTokens' =>
false,
161 if ( $text ===
'' ) {
167 $this->openingText = $this->extractHeadingBeforeFirstHeading( $text );
169 $formatter =
new HtmlFormatter( $text );
172 $formatter->remove( $this->excludedElementSelectors );
173 $formatter->filterContent();
178 $formatter->remove( $this->auxiliaryElementSelectors );
179 $auxiliaryElements = $formatter->filterContent();
180 $this->allText = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
181 foreach ( $auxiliaryElements as $auxiliaryElement ) {
183 trim( Sanitizer::stripAllTags( $formatter->getText( $auxiliaryElement ) ) );
192 private function extractHeadingBeforeFirstHeading( $text ) {
194 if ( !preg_match(
'/<h[123456]>/', $text,
$matches, PREG_OFFSET_CAPTURE ) ) {
199 $text = substr( $text, 0,
$matches[ 0 ][ 1 ] );
206 $formatter =
new HtmlFormatter( $text );
207 $formatter->remove( $this->excludedElementSelectors );
208 $formatter->remove( $this->auxiliaryElementSelectors );
209 $formatter->filterContent();
210 $text = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
225 $this->extractWikitextParts();
226 return $this->openingText;
233 $this->extractWikitextParts();
234 return $this->allText;
241 $this->extractWikitextParts();
242 return $this->auxText;
250 $sort = $this->parserOutput->getPageProperty(
'defaultsort' );
251 if ( $sort ===
false ) {
Class allowing to explore structure of parsed wikitext.
getDefaultSort()
Get the defaultsort property.
static parseSettingsInMessage( $message)
Parse a message content into an array.
headings()
Get headings on the page.
__construct(ParserOutput $parserOutput)