64 $contextlines = self::DEFAULT_CONTEXT_LINES,
65 $contextchars = self::DEFAULT_CONTEXT_CHARS
67 $searchHighlightBoundaries = MediaWikiServices::getInstance()
68 ->getMainConfig()->get( MainConfigNames::SearchHighlightBoundaries );
75 $spat =
"/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
78 1 =>
'/(\{\{)|(\}\})/',
79 2 =>
'/(\[\[)|(\]\])/',
80 3 =>
"/(\n\\{\\|)|(\n\\|\\})/" ];
84 if ( \ExtensionRegistry::getInstance()->isLoaded(
'Cite' ) ) {
86 $endPatterns[4] =
'/(<ref>)|(<\/ref>)/';
92 $textLen = strlen( $text );
94 while ( $start < $textLen ) {
96 if ( preg_match( $spat, $text,
$matches, PREG_OFFSET_CAPTURE, $start ) ) {
98 foreach (
$matches as $key => $val ) {
99 if ( $key > 0 && $val[1] != -1 ) {
102 $ns = substr( $val[0], 2, -1 );
104 MediaWikiServices::getInstance()->getContentLanguage()->
111 $epat = $endPatterns[$key];
112 $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
120 $offset = $start + 1;
122 while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) {
123 if ( array_key_exists( 2, $endMatches ) ) {
126 $len = strlen( $endMatches[2][0] );
127 $off = $endMatches[2][1];
128 $this->splitAndAdd( $otherExt, $count,
129 substr( $text, $start, $off + $len - $start ) );
130 $start = $off + $len;
141 $offset = $endMatches[0][1] + strlen( $endMatches[0][0] );
145 $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen(
$matches[0][0] ) ) );
152 $this->splitAndAdd( $textExt, $count, substr( $text, $start ) );
155 '@phan-var string[] $textExt';
157 $all = $textExt + $otherExt;
160 foreach ( $terms as $index => $term ) {
162 if ( preg_match(
'/[\x80-\xff]/', $term ) ) {
163 $terms[$index] = preg_replace_callback(
165 [ $this,
'caseCallback' ],
169 $terms[$index] = $term;
172 $anyterm = implode(
'|', $terms );
173 $phrase = implode(
"{$searchHighlightBoundaries}+", $terms );
178 $scale = strlen( $anyterm ) / mb_strlen( $anyterm );
179 $contextchars = intval( $contextchars * $scale );
181 $patPre =
"(^|{$searchHighlightBoundaries})";
182 $patPost =
"({$searchHighlightBoundaries}|$)";
184 $pat1 =
"/(" . $phrase .
")/ui";
185 $pat2 =
"/$patPre(" . $anyterm .
")$patPost/ui";
187 $left = $contextlines;
195 foreach ( $textExt as $index => $line ) {
196 if ( strlen( $line ) > 0 && $line[0] !=
';' && $line[0] !=
':' ) {
197 $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
205 foreach ( $terms as $term ) {
206 if ( !preg_match(
"/$patPre" . $term .
"$patPost/ui", $firstText ) ) {
212 $snippets[$first] = $firstText;
213 $offsets[$first] = 0;
218 $this->process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets );
220 $this->process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets );
222 $this->process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets );
224 $this->process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets );
231 if ( count( $snippets ) == 0 ) {
233 if ( array_key_exists( $first, $all ) ) {
234 $targetchars = $contextchars * $contextlines;
235 $snippets[$first] =
'';
236 $offsets[$first] = 0;
240 if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
241 && $offsets[$first] < $contextchars * 2 ) {
242 $snippets = [ $first => $snippets[$first] ];
246 $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) );
249 foreach ( $snippets as $index => $line ) {
250 $extended[$index] = $line;
251 $len = strlen( $line );
254 if ( $len < $targetchars - 20 ) {
256 if ( $len < strlen( $all[$index] ) ) {
257 $extended[$index] = $this->extract(
262 $offsets[$index] + $targetchars,
265 $len = strlen( $extended[$index] );
272 while ( $len < $targetchars - 20
273 && array_key_exists( $add, $all )
274 && !array_key_exists( $add, $snippets ) ) {
278 $tt =
"\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
279 $extended[$add] = $tt;
280 $len += strlen( $tt );
287 $snippets = $extended;
290 foreach ( $snippets as $index => $line ) {
293 } elseif ( $last + 1 == $index
294 && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
296 $extract .=
" " . $line;
298 $extract .=
'<b> ... </b>' . $line;
304 $extract .=
'<b> ... </b>';
308 foreach ( $terms as $term ) {
309 if ( !isset( $processed[$term] ) ) {
310 $pat3 =
"/$patPre(" . $term .
")$patPost/ui";
311 $extract = preg_replace( $pat3,
312 "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
313 $processed[$term] =
true;