37 $this->mCleanWikitext = $cleanupWikitext;
49 public function highlightText( $text, $terms, $contextlines, $contextchars ) {
51 global $wgSearchHighlightBoundaries;
59 $spat =
"/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
62 1 =>
'/(\{\{)|(\}\})/',
63 2 =>
'/(\[\[)|(\]\])/',
64 3 =>
"/(\n\\{\\|)|(\n\\|\\})/" );
67 if ( function_exists(
'wfCite' ) ) {
69 $endPatterns[4] =
'/(<ref>)|(<\/ref>)/';
76 $textLen = strlen( $text );
78 while ( $start < $textLen ) {
80 if ( preg_match( $spat, $text,
$matches, PREG_OFFSET_CAPTURE, $start ) ) {
83 if ( $key > 0 && $val[1] != - 1 ) {
86 $ns = substr( $val[0], 2, - 1 );
92 $epat = $endPatterns[$key];
93 $this->
splitAndAdd( $textExt,
$count, substr( $text, $start, $val[1] - $start ) );
101 $offset = $start + 1;
103 while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) {
104 if ( array_key_exists( 2, $endMatches ) ) {
107 $len = strlen( $endMatches[2][0] );
108 $off = $endMatches[2][1];
110 substr( $text, $start, $off + $len - $start ) );
111 $start = $off + $len;
122 $offset = $endMatches[0][1] + strlen( $endMatches[0][0] );
137 $all = $textExt + $otherExt;
142 foreach ( $terms
as $index =>
$term ) {
144 if ( preg_match(
'/[\x80-\xff]/',
$term ) ) {
145 $terms[$index] = preg_replace_callback(
'/./us',
array( $this,
'caseCallback' ), $terms[$index] );
147 $terms[$index] =
$term;
150 $anyterm = implode(
'|', $terms );
151 $phrase = implode(
"$wgSearchHighlightBoundaries+", $terms );
157 $scale = strlen( $anyterm ) / mb_strlen( $anyterm );
158 $contextchars = intval( $contextchars * $scale );
160 $patPre =
"(^|$wgSearchHighlightBoundaries)";
161 $patPost =
"($wgSearchHighlightBoundaries|$)";
163 $pat1 =
"/(" . $phrase .
")/ui";
164 $pat2 =
"/$patPre(" . $anyterm .
")$patPost/ui";
168 $left = $contextlines;
176 foreach ( $textExt
as $index =>
$line ) {
178 $firstText = $this->
extract(
$line, 0, $contextchars * $contextlines );
187 if ( ! preg_match(
"/$patPre" .
$term .
"$patPost/ui", $firstText ) ) {
193 $snippets[$first] = $firstText;
194 $offsets[$first] = 0;
199 $this->
process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets );
201 $this->
process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets );
203 $this->
process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets );
205 $this->
process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets );
212 if ( count( $snippets ) == 0 ) {
214 if ( array_key_exists( $first, $all ) ) {
215 $targetchars = $contextchars * $contextlines;
216 $snippets[$first] =
'';
217 $offsets[$first] = 0;
221 if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
222 && $offsets[$first] < $contextchars * 2 ) {
223 $snippets =
array( $first => $snippets[$first] );
227 $targetchars = intval( ( $contextchars * $contextlines ) / count ( $snippets ) );
230 foreach ( $snippets
as $index =>
$line ) {
231 $extended[$index] =
$line;
232 $len = strlen(
$line );
233 if ( $len < $targetchars - 20 ) {
235 if ( $len < strlen( $all[$index] ) ) {
236 $extended[$index] = $this->
extract( $all[$index], $offsets[$index], $offsets[$index] + $targetchars, $offsets[$index] );
237 $len = strlen( $extended[$index] );
242 while ( $len < $targetchars - 20
243 && array_key_exists( $add, $all )
244 && !array_key_exists( $add, $snippets ) ) {
246 $tt =
"\n" . $this->
extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
247 $extended[$add] = $tt;
248 $len += strlen( $tt );
255 $snippets = $extended;
258 foreach ( $snippets
as $index =>
$line ) {
259 if (
$last == - 1 ) {
261 } elseif (
$last + 1 == $index && $offsets[
$last] + strlen( $snippets[
$last] ) >= strlen( $all[
$last] ) ) {
262 $extract .=
" " .
$line;
264 $extract .=
'<b> ... </b>' .
$line;
270 $extract .=
'<b> ... </b>';
276 $pat3 =
"/$patPre(" .
$term .
")$patPost/ui";
277 $extract = preg_replace( $pat3,
278 "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
296 $split = explode(
"\n", $this->mCleanWikitext ? $this->
removeWiki( $text ) : $text );
300 $extracts[
$count++] = $tt;
330 function extract( $text, $start, $end, &$posStart =
null, &$posEnd =
null ) {
332 $start = $this->
position( $text, $start, 1 );
334 if ( $end >= strlen( $text ) ) {
335 $end = strlen( $text );
337 $end = $this->
position( $text, $end );
340 if ( !is_null( $posStart ) ) {
343 if ( !is_null( $posEnd ) ) {
347 if ( $end > $start ) {
348 return substr( $text, $start, $end - $start );
364 $s = max( 0, $point - $tolerance );
365 $l = min( strlen( $text ), $point + $tolerance ) -
$s;
367 if ( preg_match(
'/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr( $text,
$s, $l ), $m, PREG_OFFSET_CAPTURE ) ) {
368 return $m[0][1] +
$s + $offset;
371 $char = ord( $text[$point] );
372 while ( $char >= 0x80 && $char < 0xc0 ) {
375 if ( $point >= strlen( $text ) ) {
376 return strlen( $text );
378 $char = ord( $text[$point] );
396 function process( $pattern, $extracts, &$linesleft, &$contextchars, &
$out, &$offsets ) {
397 if ( $linesleft == 0 ) {
400 foreach ( $extracts
as $index =>
$line ) {
401 if ( array_key_exists( $index,
$out ) ) {
406 if ( !preg_match( $pattern,
$line, $m, PREG_OFFSET_CAPTURE ) ) {
411 $len = strlen( $m[0][0] );
412 if ( $offset + $len < $contextchars ) {
414 } elseif ( $len > $contextchars ) {
417 $begin = $offset + intval( ( $len - $contextchars ) / 2 );
420 $end = $begin + $contextchars;
425 $offsets[$index] = $posBegin;
427 if ( $linesleft == 0 ) {
448 $text = preg_replace(
"/\\{\\{([^|]+?)\\}\\}/",
"", $text );
449 $text = preg_replace(
"/\\{\\{([^|]+\\|)(.*?)\\}\\}/",
"\\2", $text );
450 $text = preg_replace(
"/\\[\\[([^|]+?)\\]\\]/",
"\\1", $text );
451 $text = preg_replace_callback(
"/\\[\\[([^|]+\\|)(.*?)\\]\\]/",
array( $this,
'linkReplace' ), $text );
453 $text = preg_replace(
"/<\/?[^>]+>/",
"", $text );
454 $text = preg_replace(
"/'''''/",
"", $text );
455 $text = preg_replace(
"/('''|<\/?[iIuUbB]>)/",
"", $text );
456 $text = preg_replace(
"/''/",
"", $text );
474 $colon = strpos(
$matches[1],
':' );
475 if ( $colon ===
false ) {
479 $ns = substr(
$matches[1], 0, $colon );
502 $lines = explode(
"\n", $text );
504 $terms = implode(
'|', $terms );
505 $max = intval( $contextchars ) + 1;
506 $pat1 =
"/(.*)($terms)(.{0,$max})/i";
513 if ( 0 == $contextlines ) {
518 if ( ! preg_match( $pat1,
$line, $m ) ) {
525 if ( count( $m ) < 3 ) {
528 $post =
$wgContLang->truncate( $m[3], $contextchars,
'...',
false );
533 $line = htmlspecialchars(
$pre . $found . $post );
534 $pat2 =
'/(' . $terms .
")/i";
535 $line = preg_replace( $pat2,
"<span class='searchmatch'>\\1</span>",
$line );
537 $extract .=
"${line}\n";