63 $contextlines = self::DEFAULT_CONTEXT_LINES,
64 $contextchars = self::DEFAULT_CONTEXT_CHARS
73 $spat =
"/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
76 1 =>
'/(\{\{)|(\}\})/',
77 2 =>
'/(\[\[)|(\]\])/',
78 3 =>
"/(\n\\{\\|)|(\n\\|\\})/" ];
82 if ( \ExtensionRegistry::getInstance()->isLoaded(
'Cite' ) ) {
84 $endPatterns[4] =
'/(<ref>)|(<\/ref>)/';
90 $textLen = strlen( $text );
92 while ( $start < $textLen ) {
94 if ( preg_match( $spat, $text,
$matches, PREG_OFFSET_CAPTURE, $start ) ) {
96 foreach (
$matches as $key => $val ) {
97 if ( $key > 0 && $val[1] != -1 ) {
100 $ns = substr( $val[0], 2, -1 );
102 MediaWikiServices::getInstance()->getContentLanguage()->
109 $epat = $endPatterns[$key];
110 $this->
splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
118 $offset = $start + 1;
120 while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) {
121 if ( array_key_exists( 2, $endMatches ) ) {
124 $len = strlen( $endMatches[2][0] );
125 $off = $endMatches[2][1];
127 substr( $text, $start, $off + $len - $start ) );
128 $start = $off + $len;
139 $offset = $endMatches[0][1] + strlen( $endMatches[0][0] );
150 $this->
splitAndAdd( $textExt, $count, substr( $text, $start ) );
153 '@phan-var string[] $textExt';
155 $all = $textExt + $otherExt;
158 foreach ( $terms as $index => $term ) {
160 if ( preg_match(
'/[\x80-\xff]/', $term ) ) {
161 $terms[$index] = preg_replace_callback(
163 [ $this,
'caseCallback' ],
167 $terms[$index] = $term;
170 $anyterm = implode(
'|', $terms );
171 $phrase = implode(
"$wgSearchHighlightBoundaries+", $terms );
176 $scale = strlen( $anyterm ) / mb_strlen( $anyterm );
177 $contextchars = intval( $contextchars * $scale );
179 $patPre =
"(^|$wgSearchHighlightBoundaries)";
180 $patPost =
"($wgSearchHighlightBoundaries|$)";
182 $pat1 =
"/(" . $phrase .
")/ui";
183 $pat2 =
"/$patPre(" . $anyterm .
")$patPost/ui";
185 $left = $contextlines;
193 foreach ( $textExt as $index =>
$line ) {
195 $firstText = $this->
extract(
$line, 0, $contextchars * $contextlines );
203 foreach ( $terms as $term ) {
204 if ( !preg_match(
"/$patPre" . $term .
"$patPost/ui", $firstText ) ) {
210 $snippets[$first] = $firstText;
211 $offsets[$first] = 0;
216 $this->
process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets );
218 $this->
process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets );
220 $this->
process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets );
222 $this->
process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets );
229 if ( count( $snippets ) == 0 ) {
231 if ( array_key_exists( $first, $all ) ) {
232 $targetchars = $contextchars * $contextlines;
233 $snippets[$first] =
'';
234 $offsets[$first] = 0;
238 if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
239 && $offsets[$first] < $contextchars * 2 ) {
240 $snippets = [ $first => $snippets[$first] ];
244 $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) );
247 foreach ( $snippets as $index =>
$line ) {
248 $extended[$index] =
$line;
249 $len = strlen(
$line );
250 if ( $len < $targetchars - 20 ) {
252 if ( $len < strlen( $all[$index] ) ) {
253 $extended[$index] = $this->
extract(
256 $offsets[$index] + $targetchars,
259 $len = strlen( $extended[$index] );
264 while ( $len < $targetchars - 20
265 && array_key_exists( $add, $all )
266 && !array_key_exists( $add, $snippets ) ) {
268 $tt =
"\n" . $this->
extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
269 $extended[$add] = $tt;
270 $len += strlen( $tt );
277 $snippets = $extended;
280 foreach ( $snippets as $index =>
$line ) {
283 } elseif ( $last + 1 == $index
284 && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
286 $extract .=
" " .
$line;
288 $extract .=
'<b> ... </b>' .
$line;
294 $extract .=
'<b> ... </b>';
298 foreach ( $terms as $term ) {
299 if ( !isset( $processed[$term] ) ) {
300 $pat3 =
"/$patPre(" . $term .
")$patPost/ui";
301 $extract = preg_replace( $pat3,
302 "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
303 $processed[$term] =
true;
425 private function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
426 if ( $linesleft == 0 ) {
429 foreach ( $extracts as $index =>
$line ) {
430 if ( array_key_exists( $index, $out ) ) {
435 if ( !preg_match( $pattern,
$line, $m, PREG_OFFSET_CAPTURE ) ) {
440 $len = strlen( $m[0][0] );
441 if ( $offset + $len < $contextchars ) {
443 } elseif ( $len > $contextchars ) {
446 $begin = $offset + intval( ( $len - $contextchars ) / 2 );
449 $end = $begin + $contextchars;
453 $out[$index] = $this->
extract(
$line, $begin, $end, $posBegin );
454 $offsets[$index] = $posBegin;
456 if ( $linesleft == 0 ) {