3 use HtmlFormatter\HtmlFormatter;
39 'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
44 '.navigation-not-searchable',
46 '.wbmi-entityview-emptyCaption',
88 foreach ( $this->parserOutput->getSections() as $heading ) {
89 $heading = $heading[
'line' ];
93 $heading = preg_replace(
'/<\/?span>/',
'', $heading );
95 $heading = preg_replace( [
'/[/',
'/]/' ], [
'[',
']' ], $heading );
96 $heading = preg_replace(
'/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/is',
'', $heading );
103 if ( !in_array( $heading, $ignoredHeadings ) ) {
104 $headings[] = $heading;
118 $lines = explode(
"\n", $message );
130 static $ignoredHeadings =
null;
131 if ( $ignoredHeadings ===
null ) {
132 $ignoredHeadings = [];
136 $source =
wfMessage(
'cirrussearch-ignored-headings' )->inContentLanguage();
138 if ( !
$source->isDisabled() ) {
140 $ignoredHeadings =
$lines;
143 return $ignoredHeadings;
150 if ( $this->allText !==
null ) {
153 $text = $this->parserOutput->getText( [
154 'enableSectionEditTokens' =>
false,
157 if ( $text ===
'' ) {
166 $formatter =
new HtmlFormatter( $text );
169 $formatter->remove( $this->excludedElementSelectors );
170 $formatter->filterContent();
175 $formatter->remove( $this->auxiliaryElementSelectors );
176 $auxiliaryElements = $formatter->filterContent();
178 foreach ( $auxiliaryElements as $auxiliaryElement ) {
191 if ( !preg_match(
'/<h[123456]>/', $text,
$matches, PREG_OFFSET_CAPTURE ) ) {
196 $text = substr( $text, 0,
$matches[ 0 ][ 1 ] );
203 $formatter =
new HtmlFormatter( $text );
204 $formatter->remove( $this->excludedElementSelectors );
205 $formatter->remove( $this->auxiliaryElementSelectors );
206 $formatter->filterContent();
247 return $this->parserOutput->getPageProperty(
'defaultsort' );
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Class allowing to explore structure of parsed wikitext.
getDefaultSort()
Get the defaultsort property.
extractHeadingBeforeFirstHeading( $text)
Get text before first heading.
string[] $auxiliaryElementSelectors
selectors to elements that are considered auxiliary to article text for search
ParserOutput $parserOutput
static parseSettingsInMessage( $message)
Parse a message content into an array.
extractWikitextParts()
Extract parts of the text - opening, main and auxiliary.
string[] $excludedElementSelectors
selectors to elements that are excluded entirely from search
headings()
Get headings on the page.
getIgnoredHeadings()
Get list of heading to ignore.
__construct(ParserOutput $parserOutput)
if(!file_exists( $CREDITS)) $lines