43 $this->mCleanWikitext = $cleanupWikitext;
59 $contextlines = self::DEFAULT_CONTEXT_LINES,
60 $contextchars = self::DEFAULT_CONTEXT_CHARS
69 $spat =
"/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
72 1 =>
'/(\{\{)|(\}\})/',
73 2 =>
'/(\[\[)|(\]\])/',
74 3 =>
"/(\n\\{\\|)|(\n\\|\\})/" ];
80 $endPatterns[4] =
'/(<ref>)|(<\/ref>)/';
86 $textLen = strlen( $text );
88 while ( $start < $textLen ) {
90 if ( preg_match( $spat, $text,
$matches, PREG_OFFSET_CAPTURE, $start ) ) {
92 foreach (
$matches as $key => $val ) {
93 if ( $key > 0 && $val[1] != -1 ) {
96 $ns = substr( $val[0], 2, -1 );
98 MediaWikiServices::getInstance()->getContentLanguage()->
105 $epat = $endPatterns[$key];
106 $this->
splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
114 $offset = $start + 1;
116 while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) {
117 if ( array_key_exists( 2, $endMatches ) ) {
120 $len = strlen( $endMatches[2][0] );
121 $off = $endMatches[2][1];
123 substr( $text, $start, $off + $len - $start ) );
124 $start = $off + $len;
135 $offset = $endMatches[0][1] + strlen( $endMatches[0][0] );
146 $this->
splitAndAdd( $textExt, $count, substr( $text, $start ) );
150 $all = $textExt + $otherExt;
153 foreach ( $terms as $index => $term ) {
155 if ( preg_match(
'/[\x80-\xff]/', $term ) ) {
156 $terms[$index] = preg_replace_callback(
158 [ $this,
'caseCallback' ],
162 $terms[$index] = $term;
165 $anyterm = implode(
'|', $terms );
166 $phrase = implode(
"$wgSearchHighlightBoundaries+", $terms );
171 $scale = strlen( $anyterm ) / mb_strlen( $anyterm );
172 $contextchars = intval( $contextchars * $scale );
174 $patPre =
"(^|$wgSearchHighlightBoundaries)";
175 $patPost =
"($wgSearchHighlightBoundaries|$)";
177 $pat1 =
"/(" . $phrase .
")/ui";
178 $pat2 =
"/$patPre(" . $anyterm .
")$patPost/ui";
180 $left = $contextlines;
188 foreach ( $textExt as $index =>
$line ) {
190 $firstText = $this->
extract(
$line, 0, $contextchars * $contextlines );
198 foreach ( $terms as $term ) {
199 if ( !preg_match(
"/$patPre" . $term .
"$patPost/ui", $firstText ) ) {
205 $snippets[$first] = $firstText;
206 $offsets[$first] = 0;
211 $this->
process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets );
213 $this->
process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets );
215 $this->
process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets );
217 $this->
process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets );
224 if ( count( $snippets ) == 0 ) {
226 if ( array_key_exists( $first, $all ) ) {
227 $targetchars = $contextchars * $contextlines;
228 $snippets[$first] =
'';
229 $offsets[$first] = 0;
233 if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
234 && $offsets[$first] < $contextchars * 2 ) {
235 $snippets = [ $first => $snippets[$first] ];
239 $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) );
242 foreach ( $snippets as $index =>
$line ) {
243 $extended[$index] =
$line;
244 $len = strlen(
$line );
245 if ( $len < $targetchars - 20 ) {
247 if ( $len < strlen( $all[$index] ) ) {
248 $extended[$index] = $this->
extract(
251 $offsets[$index] + $targetchars,
254 $len = strlen( $extended[$index] );
259 while ( $len < $targetchars - 20
260 && array_key_exists( $add, $all )
261 && !array_key_exists( $add, $snippets ) ) {
263 $tt =
"\n" . $this->
extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
264 $extended[$add] = $tt;
265 $len += strlen( $tt );
272 $snippets = $extended;
275 foreach ( $snippets as $index =>
$line ) {
278 } elseif (
$last + 1 == $index
279 && $offsets[
$last] + strlen( $snippets[
$last] ) >= strlen( $all[
$last] )
281 $extract .=
" " .
$line;
283 $extract .=
'<b> ... </b>' .
$line;
289 $extract .=
'<b> ... </b>';
293 foreach ( $terms as $term ) {
294 if ( !isset( $processed[$term] ) ) {
295 $pat3 =
"/$patPre(" . $term .
")$patPost/ui";
296 $extract = preg_replace( $pat3,
297 "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
298 $processed[$term] =
true;
313 $split = explode(
"\n", $this->mCleanWikitext ? $this->
removeWiki( $text ) : $text );
314 foreach ( $split as
$line ) {
317 $extracts[$count++] = $tt;
330 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
331 return '[' . $contLang->lc(
$matches[0] ) .
348 function extract( $text, $start, $end, &$posStart =
null, &$posEnd =
null ) {
350 $start = $this->
position( $text, $start, 1 );
352 if ( $end >= strlen( $text ) ) {
353 $end = strlen( $text );
355 $end = $this->
position( $text, $end );
358 if ( !is_null( $posStart ) ) {
361 if ( !is_null( $posEnd ) ) {
365 if ( $end > $start ) {
366 return substr( $text, $start, $end - $start );
382 $s = max( 0, $point - $tolerance );
383 $l = min( strlen( $text ), $point + $tolerance ) -
$s;
387 '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/',
388 substr( $text,
$s, $l ),
392 return $m[0][1] +
$s + $offset;
395 $char = ord( $text[$point] );
396 while ( $char >= 0x80 && $char < 0xc0 ) {
399 if ( $point >= strlen( $text ) ) {
400 return strlen( $text );
402 $char = ord( $text[$point] );
421 function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
422 if ( $linesleft == 0 ) {
425 foreach ( $extracts as $index =>
$line ) {
426 if ( array_key_exists( $index, $out ) ) {
431 if ( !preg_match( $pattern,
$line, $m, PREG_OFFSET_CAPTURE ) ) {
436 $len = strlen( $m[0][0] );
437 if ( $offset + $len < $contextchars ) {
439 } elseif ( $len > $contextchars ) {
442 $begin = $offset + intval( ( $len - $contextchars ) / 2 );
445 $end = $begin + $contextchars;
449 $out[$index] = $this->
extract(
$line, $begin, $end, $posBegin );
450 $offsets[$index] = $posBegin;
452 if ( $linesleft == 0 ) {
465 $text = preg_replace(
"/\\{\\{([^|]+?)\\}\\}/",
"", $text );
466 $text = preg_replace(
"/\\{\\{([^|]+\\|)(.*?)\\}\\}/",
"\\2", $text );
467 $text = preg_replace(
"/\\[\\[([^|]+?)\\]\\]/",
"\\1", $text );
468 $text = preg_replace_callback(
469 "/\\[\\[([^|]+\\|)(.*?)\\]\\]/",
470 [ $this,
'linkReplace' ],
473 $text = preg_replace(
"/<\/?[^>]+>/",
"", $text );
474 $text = preg_replace(
"/'''''/",
"", $text );
475 $text = preg_replace(
"/('''|<\/?[iIuUbB]>)/",
"", $text );
476 $text = preg_replace(
"/''/",
"", $text );
481 $text = Sanitizer::escapeHtmlAllowEntities( $text );
493 $colon = strpos(
$matches[1],
':' );
494 if ( $colon ===
false ) {
497 $ns = substr(
$matches[1], 0, $colon );
498 $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $ns );
521 $contextlines = self::DEFAULT_CONTEXT_LINES,
522 $contextchars = self::DEFAULT_CONTEXT_CHARS
524 $lines = explode(
"\n", $text );
526 $terms = implode(
'|', $terms );
527 $max = intval( $contextchars ) + 1;
528 $pat1 =
"/(.*)($terms)(.{0,$max})/i";
533 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
535 if ( $contextlines == 0 ) {
540 if ( !preg_match( $pat1,
$line, $m ) ) {
545 $pre = $contLang->truncateForVisual( $m[1], - $contextchars,
'...',
false );
547 if ( count( $m ) < 3 ) {
550 $post = $contLang->truncateForVisual( $m[3], $contextchars,
'...',
false );
555 $line = htmlspecialchars( $pre . $found . $post );
556 $pat2 =
'/(' . $terms .
")/i";
557 $line = preg_replace( $pat2,
"<span class='searchmatch'>\\1</span>",
$line );
559 $extract .=
"${line}\n";
575 $contextlines = self::DEFAULT_CONTEXT_LINES,
576 $contextchars = self::DEFAULT_CONTEXT_CHARS
579 $text = ltrim( $text ) .
"\n";
580 $text = str_replace(
"\n\n",
"\n", $text );
581 preg_match(
"/^(.*\n){0,$contextlines}/", $text, $match );
584 $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) );
585 return str_replace(
"\n",
'<br>', $text );