Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
7.95% |
21 / 264 |
|
0.00% |
0 / 11 |
CRAP | |
0.00% |
0 / 1 |
| SearchHighlighter | |
7.98% |
21 / 263 |
|
0.00% |
0 / 11 |
5320.50 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| highlightText | |
0.00% |
0 / 149 |
|
0.00% |
0 / 1 |
1980 | |||
| splitAndAdd | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| caseCallback | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| extract | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 | |||
| position | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
30 | |||
| process | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
72 | |||
| removeWiki | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
| linkReplace | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
| highlightSimple | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
5.02 | |||
| highlightNone | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Basic search engine highlighting |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Search |
| 8 | */ |
| 9 | |
| 10 | namespace MediaWiki\Search; |
| 11 | |
| 12 | use MediaWiki\MainConfigNames; |
| 13 | use MediaWiki\MediaWikiServices; |
| 14 | use MediaWiki\Parser\Sanitizer; |
| 15 | use MediaWiki\Registration\ExtensionRegistry; |
| 16 | |
| 17 | /** |
| 18 | * Highlight bits of wikitext |
| 19 | * |
| 20 | * @newable |
| 21 | * @note marked as newable in 1.35 for lack of a better alternative, |
| 22 | * but should use a factory in the future. |
| 23 | * @ingroup Search |
| 24 | */ |
| 25 | class SearchHighlighter { |
| 26 | public const DEFAULT_CONTEXT_LINES = 2; |
| 27 | public const DEFAULT_CONTEXT_CHARS = 75; |
| 28 | |
| 29 | /** @var bool */ |
| 30 | protected $mCleanWikitext = true; |
| 31 | |
| 32 | /** |
| 33 | * @stable to call |
| 34 | * @warning If you pass false to this constructor, then |
| 35 | * the caller is responsible for HTML escaping. |
| 36 | * @param bool $cleanupWikitext |
| 37 | */ |
| 38 | public function __construct( $cleanupWikitext = true ) { |
| 39 | $this->mCleanWikitext = $cleanupWikitext; |
| 40 | } |
| 41 | |
| 42 | /** |
| 43 | * Wikitext highlighting when $wgAdvancedSearchHighlighting = true |
| 44 | * |
| 45 | * @param string $text |
| 46 | * @param string[] $terms Terms to highlight (not html escaped but |
| 47 | * regex escaped via SearchDatabase::regexTerm()) |
| 48 | * @param int $contextlines |
| 49 | * @param int $contextchars |
| 50 | * @return string |
| 51 | */ |
| 52 | public function highlightText( |
| 53 | $text, |
| 54 | $terms, |
| 55 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
| 56 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
| 57 | ) { |
| 58 | $searchHighlightBoundaries = MediaWikiServices::getInstance() |
| 59 | ->getMainConfig()->get( MainConfigNames::SearchHighlightBoundaries ); |
| 60 | |
| 61 | if ( $text == '' ) { |
| 62 | return ''; |
| 63 | } |
| 64 | |
| 65 | // split text into text + templates/links/tables |
| 66 | $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
| 67 | // first capture group is for detecting nested templates/links/tables/references |
| 68 | $endPatterns = [ |
| 69 | 1 => '/(\{\{)|(\}\})/', // template |
| 70 | 2 => '/(\[\[)|(\]\])/', // image |
| 71 | 3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table |
| 72 | |
| 73 | // @todo FIXME: This should prolly be a hook or something |
| 74 | // instead of hardcoding the name of the Cite extension |
| 75 | if ( ExtensionRegistry::getInstance()->isLoaded( 'Cite' ) ) { |
| 76 | $spat .= '|(<ref>)'; // references via cite extension |
| 77 | $endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
| 78 | } |
| 79 | $spat .= '/'; |
| 80 | $textExt = []; // text extracts |
| 81 | $otherExt = []; // other extracts |
| 82 | $start = 0; |
| 83 | $textLen = strlen( $text ); |
| 84 | $count = 0; // sequence number to maintain ordering |
| 85 | while ( $start < $textLen ) { |
| 86 | // find start of template/image/table |
| 87 | if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) { |
| 88 | $epat = ''; |
| 89 | foreach ( $matches as $key => $val ) { |
| 90 | if ( $key > 0 && $val[1] != -1 ) { |
| 91 | if ( $key == 2 ) { |
| 92 | // see if this is an image link |
| 93 | $ns = substr( $val[0], 2, -1 ); |
| 94 | if ( |
| 95 | MediaWikiServices::getInstance()->getContentLanguage()-> |
| 96 | getNsIndex( $ns ) !== NS_FILE |
| 97 | ) { |
| 98 | break; |
| 99 | } |
| 100 | |
| 101 | } |
| 102 | $epat = $endPatterns[$key]; |
| 103 | $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
| 104 | $start = $val[1]; |
| 105 | break; |
| 106 | } |
| 107 | } |
| 108 | if ( $epat ) { |
| 109 | // find end (and detect any nested elements) |
| 110 | $level = 0; |
| 111 | $offset = $start + 1; |
| 112 | $found = false; |
| 113 | while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) { |
| 114 | if ( array_key_exists( 2, $endMatches ) ) { |
| 115 | // found end |
| 116 | if ( $level == 0 ) { |
| 117 | $len = strlen( $endMatches[2][0] ); |
| 118 | $off = $endMatches[2][1]; |
| 119 | $this->splitAndAdd( $otherExt, $count, |
| 120 | substr( $text, $start, $off + $len - $start ) ); |
| 121 | $start = $off + $len; |
| 122 | $found = true; |
| 123 | break; |
| 124 | } else { |
| 125 | // end of nested element |
| 126 | $level--; |
| 127 | } |
| 128 | } else { |
| 129 | // nested |
| 130 | $level++; |
| 131 | } |
| 132 | $offset = $endMatches[0][1] + strlen( $endMatches[0][0] ); |
| 133 | } |
| 134 | if ( !$found ) { |
| 135 | // couldn't find appropriate closing tag, skip |
| 136 | $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen( $matches[0][0] ) ) ); |
| 137 | $start += strlen( $matches[0][0] ); |
| 138 | } |
| 139 | continue; |
| 140 | } |
| 141 | } |
| 142 | // else: add as text extract |
| 143 | $this->splitAndAdd( $textExt, $count, substr( $text, $start ) ); |
| 144 | break; |
| 145 | } |
| 146 | '@phan-var string[] $textExt'; |
| 147 | |
| 148 | $all = $textExt + $otherExt; // these have disjunct key sets |
| 149 | |
| 150 | // prepare regexps |
| 151 | foreach ( $terms as $index => $term ) { |
| 152 | // manually do upper/lowercase stuff for utf-8 since PHP won't do it |
| 153 | if ( preg_match( '/[\x80-\xff]/', $term ) ) { |
| 154 | $terms[$index] = preg_replace_callback( |
| 155 | '/./us', |
| 156 | $this->caseCallback( ... ), |
| 157 | $terms[$index] |
| 158 | ); |
| 159 | } else { |
| 160 | $terms[$index] = $term; |
| 161 | } |
| 162 | } |
| 163 | $anyterm = implode( '|', $terms ); |
| 164 | $phrase = implode( "{$searchHighlightBoundaries}+", $terms ); |
| 165 | // @todo FIXME: A hack to scale contextchars, a correct solution |
| 166 | // would be to have contextchars actually be char and not byte |
| 167 | // length, and do proper utf-8 substrings and lengths everywhere, |
| 168 | // but PHP is making that very hard and unclean to implement :( |
| 169 | $scale = strlen( $anyterm ) / mb_strlen( $anyterm ); |
| 170 | $contextchars = intval( $contextchars * $scale ); |
| 171 | |
| 172 | $patPre = "(^|{$searchHighlightBoundaries})"; |
| 173 | $patPost = "({$searchHighlightBoundaries}|$)"; |
| 174 | |
| 175 | $pat1 = "/(" . $phrase . ")/ui"; |
| 176 | $pat2 = "/$patPre(" . $anyterm . ")$patPost/ui"; |
| 177 | |
| 178 | $left = $contextlines; |
| 179 | |
| 180 | $snippets = []; |
| 181 | $offsets = []; |
| 182 | |
| 183 | // show beginning only if it contains all words |
| 184 | $first = 0; |
| 185 | $firstText = ''; |
| 186 | foreach ( $textExt as $index => $line ) { |
| 187 | if ( $line !== '' && $line[0] != ';' && $line[0] != ':' ) { |
| 188 | $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
| 189 | $first = $index; |
| 190 | break; |
| 191 | } |
| 192 | } |
| 193 | if ( $firstText ) { |
| 194 | $succ = true; |
| 195 | // check if first text contains all terms |
| 196 | foreach ( $terms as $term ) { |
| 197 | if ( !preg_match( "/$patPre" . $term . "$patPost/ui", $firstText ) ) { |
| 198 | $succ = false; |
| 199 | break; |
| 200 | } |
| 201 | } |
| 202 | if ( $succ ) { |
| 203 | $snippets[$first] = $firstText; |
| 204 | $offsets[$first] = 0; |
| 205 | } |
| 206 | } |
| 207 | if ( !$snippets ) { |
| 208 | // match whole query on text |
| 209 | $this->process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets ); |
| 210 | // match whole query on templates/tables/images |
| 211 | $this->process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets ); |
| 212 | // match any words on text |
| 213 | $this->process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets ); |
| 214 | // match any words on templates/tables/images |
| 215 | $this->process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets ); |
| 216 | |
| 217 | ksort( $snippets ); |
| 218 | } |
| 219 | |
| 220 | // add extra chars to each snippet to make snippets constant size |
| 221 | $extended = []; |
| 222 | if ( count( $snippets ) == 0 ) { |
| 223 | // couldn't find the target words, just show beginning of article |
| 224 | if ( array_key_exists( $first, $all ) ) { |
| 225 | $targetchars = $contextchars * $contextlines; |
| 226 | $snippets[$first] = ''; |
| 227 | $offsets[$first] = 0; |
| 228 | } |
| 229 | } else { |
| 230 | // if begin of the article contains the whole phrase, show only that !! |
| 231 | if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) |
| 232 | && $offsets[$first] < $contextchars * 2 ) { |
| 233 | $snippets = [ $first => $snippets[$first] ]; |
| 234 | } |
| 235 | |
| 236 | // calc by how much to extend existing snippets |
| 237 | $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) ); |
| 238 | } |
| 239 | |
| 240 | foreach ( $snippets as $index => $line ) { |
| 241 | $extended[$index] = $line; |
| 242 | $len = strlen( $line ); |
| 243 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
| 244 | // $targetchars is set when $snippes contains anything |
| 245 | if ( $len < $targetchars - 20 ) { |
| 246 | // complete this line |
| 247 | if ( $len < strlen( $all[$index] ) ) { |
| 248 | $extended[$index] = $this->extract( |
| 249 | $all[$index], |
| 250 | $offsets[$index], |
| 251 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
| 252 | // $targetchars is set when $snippes contains anything |
| 253 | $offsets[$index] + $targetchars, |
| 254 | $offsets[$index] |
| 255 | ); |
| 256 | $len = strlen( $extended[$index] ); |
| 257 | } |
| 258 | |
| 259 | // add more lines |
| 260 | $add = $index + 1; |
| 261 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
| 262 | // $targetchars is set when $snippes contains anything |
| 263 | while ( $len < $targetchars - 20 |
| 264 | && array_key_exists( $add, $all ) |
| 265 | && !array_key_exists( $add, $snippets ) ) { |
| 266 | $offsets[$add] = 0; |
| 267 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
| 268 | // $targetchars is set when $snippes contains anything |
| 269 | $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
| 270 | $extended[$add] = $tt; |
| 271 | $len += strlen( $tt ); |
| 272 | $add++; |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | // $snippets = array_map( 'htmlspecialchars', $extended ); |
| 278 | $snippets = $extended; |
| 279 | $last = -1; |
| 280 | $extract = ''; |
| 281 | foreach ( $snippets as $index => $line ) { |
| 282 | if ( $last == -1 ) { |
| 283 | $extract .= $line; // first line |
| 284 | } elseif ( $last + 1 == $index |
| 285 | && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) |
| 286 | ) { |
| 287 | $extract .= " " . $line; // continuous lines |
| 288 | } else { |
| 289 | $extract .= '<b> ... </b>' . $line; |
| 290 | } |
| 291 | |
| 292 | $last = $index; |
| 293 | } |
| 294 | if ( $extract ) { |
| 295 | $extract .= '<b> ... </b>'; |
| 296 | } |
| 297 | |
| 298 | $processed = []; |
| 299 | foreach ( $terms as $term ) { |
| 300 | if ( !isset( $processed[$term] ) ) { |
| 301 | $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word |
| 302 | $extract = preg_replace( $pat3, |
| 303 | "\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
| 304 | $processed[$term] = true; |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | return $extract; |
| 309 | } |
| 310 | |
| 311 | /** |
| 312 | * Split text into lines and add it to extracts array |
| 313 | * |
| 314 | * @param string[] &$extracts Index -> $line |
| 315 | * @param int &$count |
| 316 | * @param string $text |
| 317 | */ |
| 318 | private function splitAndAdd( &$extracts, &$count, $text ) { |
| 319 | $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); |
| 320 | foreach ( $split as $line ) { |
| 321 | $tt = trim( $line ); |
| 322 | if ( $tt ) { |
| 323 | $extracts[$count++] = $tt; |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | /** |
| 329 | * Do manual case conversion for non-ascii chars |
| 330 | * |
| 331 | * @param array $matches |
| 332 | * @return string |
| 333 | */ |
| 334 | private function caseCallback( $matches ) { |
| 335 | if ( strlen( $matches[0] ) > 1 ) { |
| 336 | $contLang = MediaWikiServices::getInstance()->getContentLanguage(); |
| 337 | return '[' . $contLang->lc( $matches[0] ) . |
| 338 | $contLang->uc( $matches[0] ) . ']'; |
| 339 | } else { |
| 340 | return $matches[0]; |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | /** |
| 345 | * Extract part of the text from start to end, but by |
| 346 | * not chopping up words |
| 347 | * @param string $text |
| 348 | * @param int $start |
| 349 | * @param int $end |
| 350 | * @param int|null &$posStart (out) actual start position |
| 351 | * @param int|null &$posEnd (out) actual end position |
| 352 | * @return string |
| 353 | */ |
| 354 | private function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) { |
| 355 | if ( $start != 0 ) { |
| 356 | $start = $this->position( $text, $start, 1 ); |
| 357 | } |
| 358 | if ( $end >= strlen( $text ) ) { |
| 359 | $end = strlen( $text ); |
| 360 | } else { |
| 361 | $end = $this->position( $text, $end ); |
| 362 | } |
| 363 | |
| 364 | if ( $posStart !== null ) { |
| 365 | $posStart = $start; |
| 366 | } |
| 367 | if ( $posEnd !== null ) { |
| 368 | $posEnd = $end; |
| 369 | } |
| 370 | |
| 371 | if ( $end > $start ) { |
| 372 | return substr( $text, $start, $end - $start ); |
| 373 | } else { |
| 374 | return ''; |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | /** |
| 379 | * Find a nonletter near a point (index) in the text |
| 380 | * |
| 381 | * @param string $text |
| 382 | * @param int $point |
| 383 | * @param int $offset Offset to found index |
| 384 | * @return int Nearest nonletter index, or beginning of utf8 char if none |
| 385 | */ |
| 386 | private function position( $text, $point, $offset = 0 ) { |
| 387 | $tolerance = 10; |
| 388 | $s = max( 0, $point - $tolerance ); |
| 389 | $l = min( strlen( $text ), $point + $tolerance ) - $s; |
| 390 | $m = []; |
| 391 | |
| 392 | if ( preg_match( |
| 393 | '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', |
| 394 | substr( $text, $s, $l ), |
| 395 | $m, |
| 396 | PREG_OFFSET_CAPTURE |
| 397 | ) ) { |
| 398 | return $m[0][1] + $s + $offset; |
| 399 | } else { |
| 400 | // check if point is on a valid first UTF8 char |
| 401 | $char = ord( $text[$point] ); |
| 402 | while ( $char >= 0x80 && $char < 0xc0 ) { |
| 403 | // skip trailing bytes |
| 404 | $point++; |
| 405 | if ( $point >= strlen( $text ) ) { |
| 406 | return strlen( $text ); |
| 407 | } |
| 408 | $char = ord( $text[$point] ); |
| 409 | } |
| 410 | |
| 411 | return $point; |
| 412 | |
| 413 | } |
| 414 | } |
| 415 | |
| 416 | /** |
| 417 | * Search extracts for a pattern, and return snippets |
| 418 | * |
| 419 | * @param string $pattern Regexp for matching lines |
| 420 | * @param array $extracts Extracts to search |
| 421 | * @param int &$linesleft Number of extracts to make |
| 422 | * @param int &$contextchars Length of snippet |
| 423 | * @param array &$out Map for highlighted snippets |
| 424 | * @param array &$offsets Map of starting points of snippets |
| 425 | */ |
| 426 | private function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { |
| 427 | if ( $linesleft == 0 ) { |
| 428 | return; // nothing to do |
| 429 | } |
| 430 | foreach ( $extracts as $index => $line ) { |
| 431 | if ( array_key_exists( $index, $out ) ) { |
| 432 | continue; // this line already highlighted |
| 433 | } |
| 434 | |
| 435 | $m = []; |
| 436 | if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { |
| 437 | continue; |
| 438 | } |
| 439 | |
| 440 | $offset = $m[0][1]; |
| 441 | $len = strlen( $m[0][0] ); |
| 442 | if ( $offset + $len < $contextchars ) { |
| 443 | $begin = 0; |
| 444 | } elseif ( $len > $contextchars ) { |
| 445 | $begin = $offset; |
| 446 | } else { |
| 447 | $begin = $offset + intval( ( $len - $contextchars ) / 2 ); |
| 448 | } |
| 449 | |
| 450 | $end = $begin + $contextchars; |
| 451 | |
| 452 | $posBegin = $begin; |
| 453 | // basic snippet from this line |
| 454 | $out[$index] = $this->extract( $line, $begin, $end, $posBegin ); |
| 455 | $offsets[$index] = $posBegin; |
| 456 | $linesleft--; |
| 457 | if ( $linesleft == 0 ) { |
| 458 | return; |
| 459 | } |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | /** |
| 464 | * Basic wikitext removal |
| 465 | * @param string $text |
| 466 | * @return string |
| 467 | */ |
| 468 | private function removeWiki( $text ) { |
| 469 | $text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text ); |
| 470 | $text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text ); |
| 471 | $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text ); |
| 472 | $text = preg_replace_callback( |
| 473 | "/\\[\\[([^|]+\\|)(.*?)\\]\\]/", |
| 474 | $this->linkReplace( ... ), |
| 475 | $text |
| 476 | ); |
| 477 | $text = preg_replace( "/<\/?[^>]+>/", "", $text ); |
| 478 | $text = preg_replace( "/'''''/", "", $text ); |
| 479 | $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text ); |
| 480 | $text = preg_replace( "/''/", "", $text ); |
| 481 | |
| 482 | // Note, the previous /<\/?[^>]+>/ is insufficient |
| 483 | // for XSS safety as the HTML tag can span multiple |
| 484 | // search results (T144845). |
| 485 | $text = Sanitizer::escapeHtmlAllowEntities( $text ); |
| 486 | return $text; |
| 487 | } |
| 488 | |
| 489 | /** |
| 490 | * callback to replace [[target|caption]] kind of links, if |
| 491 | * the target is category or image, leave it |
| 492 | * |
| 493 | * @param array $matches |
| 494 | * @return string |
| 495 | */ |
| 496 | private function linkReplace( $matches ) { |
| 497 | $colon = strpos( $matches[1], ':' ); |
| 498 | if ( $colon === false ) { |
| 499 | return $matches[2]; // replace with caption |
| 500 | } |
| 501 | $ns = substr( $matches[1], 0, $colon ); |
| 502 | $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $ns ); |
| 503 | if ( $index !== false && ( $index === NS_FILE || $index === NS_CATEGORY ) ) { |
| 504 | return $matches[0]; // return the whole thing |
| 505 | } else { |
| 506 | return $matches[2]; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | /** |
| 511 | * Simple & fast snippet extraction, but gives completely irrelevant |
| 512 | * snippets |
| 513 | * |
| 514 | * Used when $wgAdvancedSearchHighlighting is false. |
| 515 | * |
| 516 | * @param string $text |
| 517 | * @param string[] $terms Escaped for regex by SearchDatabase::regexTerm() |
| 518 | * @param int $contextlines |
| 519 | * @param int $contextchars |
| 520 | * @return string |
| 521 | */ |
| 522 | public function highlightSimple( |
| 523 | $text, |
| 524 | $terms, |
| 525 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
| 526 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
| 527 | ) { |
| 528 | $lines = explode( "\n", $text ); |
| 529 | |
| 530 | $terms = implode( '|', $terms ); |
| 531 | $max = intval( $contextchars ) + 1; |
| 532 | $pat1 = "/(.*)($terms)(.{0,$max})/ui"; |
| 533 | |
| 534 | $extract = ''; |
| 535 | $contLang = MediaWikiServices::getInstance()->getContentLanguage(); |
| 536 | foreach ( $lines as $line ) { |
| 537 | if ( $contextlines == 0 ) { |
| 538 | break; |
| 539 | } |
| 540 | $m = []; |
| 541 | if ( !preg_match( $pat1, $line, $m ) ) { |
| 542 | continue; |
| 543 | } |
| 544 | --$contextlines; |
| 545 | // truncate function changes ... to relevant i18n message. |
| 546 | $pre = $contLang->truncateForVisual( $m[1], -$contextchars, '...', false ); |
| 547 | |
| 548 | if ( count( $m ) < 3 ) { |
| 549 | $post = ''; |
| 550 | } else { |
| 551 | $post = $contLang->truncateForVisual( $m[3], $contextchars, '...', false ); |
| 552 | } |
| 553 | |
| 554 | $found = $m[2]; |
| 555 | |
| 556 | $line = htmlspecialchars( $pre . $found . $post ); |
| 557 | $pat2 = '/(' . $terms . ')/ui'; |
| 558 | $line = preg_replace( $pat2, '<span class="searchmatch">\1</span>', $line ); |
| 559 | |
| 560 | $extract .= "{$line}\n"; |
| 561 | } |
| 562 | |
| 563 | return $extract; |
| 564 | } |
| 565 | |
| 566 | /** |
| 567 | * Returns the first few lines of the text |
| 568 | * |
| 569 | * @param string $text |
| 570 | * @param int $contextlines Max number of returned lines |
| 571 | * @param int $contextchars Average number of characters per line |
| 572 | * @return string |
| 573 | */ |
| 574 | public function highlightNone( |
| 575 | $text, |
| 576 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
| 577 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
| 578 | ) { |
| 579 | $match = []; |
| 580 | $text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line |
| 581 | $text = str_replace( "\n\n", "\n", $text ); // remove empty lines |
| 582 | preg_match( "/^(.*\n){0,$contextlines}/", $text, $match ); |
| 583 | |
| 584 | // Trim and limit to max number of chars |
| 585 | $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) ); |
| 586 | return str_replace( "\n", '<br>', $text ); |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | /** @deprecated class alias since 1.46 */ |
| 591 | class_alias( SearchHighlighter::class, 'SearchHighlighter' ); |