Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
7.98% |
21 / 263 |
|
0.00% |
0 / 11 |
CRAP | |
0.00% |
0 / 1 |
SearchHighlighter | |
7.98% |
21 / 263 |
|
0.00% |
0 / 11 |
5320.50 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
highlightText | |
0.00% |
0 / 149 |
|
0.00% |
0 / 1 |
1980 | |||
splitAndAdd | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
caseCallback | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
extract | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 | |||
position | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
30 | |||
process | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
72 | |||
removeWiki | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
linkReplace | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
highlightSimple | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
5.02 | |||
highlightNone | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Basic search engine highlighting |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Search |
22 | */ |
23 | |
24 | use MediaWiki\MainConfigNames; |
25 | use MediaWiki\MediaWikiServices; |
26 | use MediaWiki\Parser\Sanitizer; |
27 | use MediaWiki\Registration\ExtensionRegistry; |
28 | |
29 | /** |
30 | * Highlight bits of wikitext |
31 | * |
32 | * @newable |
33 | * @note marked as newable in 1.35 for lack of a better alternative, |
34 | * but should use a factory in the future. |
35 | * @ingroup Search |
36 | */ |
37 | class SearchHighlighter { |
38 | public const DEFAULT_CONTEXT_LINES = 2; |
39 | public const DEFAULT_CONTEXT_CHARS = 75; |
40 | |
41 | /** @var bool */ |
42 | protected $mCleanWikitext = true; |
43 | |
44 | /** |
45 | * @stable to call |
46 | * @warning If you pass false to this constructor, then |
47 | * the caller is responsible for HTML escaping. |
48 | * @param bool $cleanupWikitext |
49 | */ |
50 | public function __construct( $cleanupWikitext = true ) { |
51 | $this->mCleanWikitext = $cleanupWikitext; |
52 | } |
53 | |
54 | /** |
55 | * Wikitext highlighting when $wgAdvancedSearchHighlighting = true |
56 | * |
57 | * @param string $text |
58 | * @param string[] $terms Terms to highlight (not html escaped but |
59 | * regex escaped via SearchDatabase::regexTerm()) |
60 | * @param int $contextlines |
61 | * @param int $contextchars |
62 | * @return string |
63 | */ |
64 | public function highlightText( |
65 | $text, |
66 | $terms, |
67 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
68 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
69 | ) { |
70 | $searchHighlightBoundaries = MediaWikiServices::getInstance() |
71 | ->getMainConfig()->get( MainConfigNames::SearchHighlightBoundaries ); |
72 | |
73 | if ( $text == '' ) { |
74 | return ''; |
75 | } |
76 | |
77 | // split text into text + templates/links/tables |
78 | $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
79 | // first capture group is for detecting nested templates/links/tables/references |
80 | $endPatterns = [ |
81 | 1 => '/(\{\{)|(\}\})/', // template |
82 | 2 => '/(\[\[)|(\]\])/', // image |
83 | 3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table |
84 | |
85 | // @todo FIXME: This should prolly be a hook or something |
86 | // instead of hardcoding the name of the Cite extension |
87 | if ( ExtensionRegistry::getInstance()->isLoaded( 'Cite' ) ) { |
88 | $spat .= '|(<ref>)'; // references via cite extension |
89 | $endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
90 | } |
91 | $spat .= '/'; |
92 | $textExt = []; // text extracts |
93 | $otherExt = []; // other extracts |
94 | $start = 0; |
95 | $textLen = strlen( $text ); |
96 | $count = 0; // sequence number to maintain ordering |
97 | while ( $start < $textLen ) { |
98 | // find start of template/image/table |
99 | if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) { |
100 | $epat = ''; |
101 | foreach ( $matches as $key => $val ) { |
102 | if ( $key > 0 && $val[1] != -1 ) { |
103 | if ( $key == 2 ) { |
104 | // see if this is an image link |
105 | $ns = substr( $val[0], 2, -1 ); |
106 | if ( |
107 | MediaWikiServices::getInstance()->getContentLanguage()-> |
108 | getNsIndex( $ns ) !== NS_FILE |
109 | ) { |
110 | break; |
111 | } |
112 | |
113 | } |
114 | $epat = $endPatterns[$key]; |
115 | $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
116 | $start = $val[1]; |
117 | break; |
118 | } |
119 | } |
120 | if ( $epat ) { |
121 | // find end (and detect any nested elements) |
122 | $level = 0; |
123 | $offset = $start + 1; |
124 | $found = false; |
125 | while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) { |
126 | if ( array_key_exists( 2, $endMatches ) ) { |
127 | // found end |
128 | if ( $level == 0 ) { |
129 | $len = strlen( $endMatches[2][0] ); |
130 | $off = $endMatches[2][1]; |
131 | $this->splitAndAdd( $otherExt, $count, |
132 | substr( $text, $start, $off + $len - $start ) ); |
133 | $start = $off + $len; |
134 | $found = true; |
135 | break; |
136 | } else { |
137 | // end of nested element |
138 | $level--; |
139 | } |
140 | } else { |
141 | // nested |
142 | $level++; |
143 | } |
144 | $offset = $endMatches[0][1] + strlen( $endMatches[0][0] ); |
145 | } |
146 | if ( !$found ) { |
147 | // couldn't find appropriate closing tag, skip |
148 | $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen( $matches[0][0] ) ) ); |
149 | $start += strlen( $matches[0][0] ); |
150 | } |
151 | continue; |
152 | } |
153 | } |
154 | // else: add as text extract |
155 | $this->splitAndAdd( $textExt, $count, substr( $text, $start ) ); |
156 | break; |
157 | } |
158 | '@phan-var string[] $textExt'; |
159 | |
160 | $all = $textExt + $otherExt; // these have disjunct key sets |
161 | |
162 | // prepare regexps |
163 | foreach ( $terms as $index => $term ) { |
164 | // manually do upper/lowercase stuff for utf-8 since PHP won't do it |
165 | if ( preg_match( '/[\x80-\xff]/', $term ) ) { |
166 | $terms[$index] = preg_replace_callback( |
167 | '/./us', |
168 | [ $this, 'caseCallback' ], |
169 | $terms[$index] |
170 | ); |
171 | } else { |
172 | $terms[$index] = $term; |
173 | } |
174 | } |
175 | $anyterm = implode( '|', $terms ); |
176 | $phrase = implode( "{$searchHighlightBoundaries}+", $terms ); |
177 | // @todo FIXME: A hack to scale contextchars, a correct solution |
178 | // would be to have contextchars actually be char and not byte |
179 | // length, and do proper utf-8 substrings and lengths everywhere, |
180 | // but PHP is making that very hard and unclean to implement :( |
181 | $scale = strlen( $anyterm ) / mb_strlen( $anyterm ); |
182 | $contextchars = intval( $contextchars * $scale ); |
183 | |
184 | $patPre = "(^|{$searchHighlightBoundaries})"; |
185 | $patPost = "({$searchHighlightBoundaries}|$)"; |
186 | |
187 | $pat1 = "/(" . $phrase . ")/ui"; |
188 | $pat2 = "/$patPre(" . $anyterm . ")$patPost/ui"; |
189 | |
190 | $left = $contextlines; |
191 | |
192 | $snippets = []; |
193 | $offsets = []; |
194 | |
195 | // show beginning only if it contains all words |
196 | $first = 0; |
197 | $firstText = ''; |
198 | foreach ( $textExt as $index => $line ) { |
199 | if ( $line !== '' && $line[0] != ';' && $line[0] != ':' ) { |
200 | $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
201 | $first = $index; |
202 | break; |
203 | } |
204 | } |
205 | if ( $firstText ) { |
206 | $succ = true; |
207 | // check if first text contains all terms |
208 | foreach ( $terms as $term ) { |
209 | if ( !preg_match( "/$patPre" . $term . "$patPost/ui", $firstText ) ) { |
210 | $succ = false; |
211 | break; |
212 | } |
213 | } |
214 | if ( $succ ) { |
215 | $snippets[$first] = $firstText; |
216 | $offsets[$first] = 0; |
217 | } |
218 | } |
219 | if ( !$snippets ) { |
220 | // match whole query on text |
221 | $this->process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets ); |
222 | // match whole query on templates/tables/images |
223 | $this->process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets ); |
224 | // match any words on text |
225 | $this->process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets ); |
226 | // match any words on templates/tables/images |
227 | $this->process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets ); |
228 | |
229 | ksort( $snippets ); |
230 | } |
231 | |
232 | // add extra chars to each snippet to make snippets constant size |
233 | $extended = []; |
234 | if ( count( $snippets ) == 0 ) { |
235 | // couldn't find the target words, just show beginning of article |
236 | if ( array_key_exists( $first, $all ) ) { |
237 | $targetchars = $contextchars * $contextlines; |
238 | $snippets[$first] = ''; |
239 | $offsets[$first] = 0; |
240 | } |
241 | } else { |
242 | // if begin of the article contains the whole phrase, show only that !! |
243 | if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) |
244 | && $offsets[$first] < $contextchars * 2 ) { |
245 | $snippets = [ $first => $snippets[$first] ]; |
246 | } |
247 | |
248 | // calc by how much to extend existing snippets |
249 | $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) ); |
250 | } |
251 | |
252 | foreach ( $snippets as $index => $line ) { |
253 | $extended[$index] = $line; |
254 | $len = strlen( $line ); |
255 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
256 | // $targetchars is set when $snippes contains anything |
257 | if ( $len < $targetchars - 20 ) { |
258 | // complete this line |
259 | if ( $len < strlen( $all[$index] ) ) { |
260 | $extended[$index] = $this->extract( |
261 | $all[$index], |
262 | $offsets[$index], |
263 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
264 | // $targetchars is set when $snippes contains anything |
265 | $offsets[$index] + $targetchars, |
266 | $offsets[$index] |
267 | ); |
268 | $len = strlen( $extended[$index] ); |
269 | } |
270 | |
271 | // add more lines |
272 | $add = $index + 1; |
273 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
274 | // $targetchars is set when $snippes contains anything |
275 | while ( $len < $targetchars - 20 |
276 | && array_key_exists( $add, $all ) |
277 | && !array_key_exists( $add, $snippets ) ) { |
278 | $offsets[$add] = 0; |
279 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
280 | // $targetchars is set when $snippes contains anything |
281 | $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
282 | $extended[$add] = $tt; |
283 | $len += strlen( $tt ); |
284 | $add++; |
285 | } |
286 | } |
287 | } |
288 | |
289 | // $snippets = array_map( 'htmlspecialchars', $extended ); |
290 | $snippets = $extended; |
291 | $last = -1; |
292 | $extract = ''; |
293 | foreach ( $snippets as $index => $line ) { |
294 | if ( $last == -1 ) { |
295 | $extract .= $line; // first line |
296 | } elseif ( $last + 1 == $index |
297 | && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) |
298 | ) { |
299 | $extract .= " " . $line; // continuous lines |
300 | } else { |
301 | $extract .= '<b> ... </b>' . $line; |
302 | } |
303 | |
304 | $last = $index; |
305 | } |
306 | if ( $extract ) { |
307 | $extract .= '<b> ... </b>'; |
308 | } |
309 | |
310 | $processed = []; |
311 | foreach ( $terms as $term ) { |
312 | if ( !isset( $processed[$term] ) ) { |
313 | $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word |
314 | $extract = preg_replace( $pat3, |
315 | "\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
316 | $processed[$term] = true; |
317 | } |
318 | } |
319 | |
320 | return $extract; |
321 | } |
322 | |
323 | /** |
324 | * Split text into lines and add it to extracts array |
325 | * |
326 | * @param string[] &$extracts Index -> $line |
327 | * @param int &$count |
328 | * @param string $text |
329 | */ |
330 | private function splitAndAdd( &$extracts, &$count, $text ) { |
331 | $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); |
332 | foreach ( $split as $line ) { |
333 | $tt = trim( $line ); |
334 | if ( $tt ) { |
335 | $extracts[$count++] = $tt; |
336 | } |
337 | } |
338 | } |
339 | |
340 | /** |
341 | * Do manual case conversion for non-ascii chars |
342 | * |
343 | * @param array $matches |
344 | * @return string |
345 | */ |
346 | private function caseCallback( $matches ) { |
347 | if ( strlen( $matches[0] ) > 1 ) { |
348 | $contLang = MediaWikiServices::getInstance()->getContentLanguage(); |
349 | return '[' . $contLang->lc( $matches[0] ) . |
350 | $contLang->uc( $matches[0] ) . ']'; |
351 | } else { |
352 | return $matches[0]; |
353 | } |
354 | } |
355 | |
356 | /** |
357 | * Extract part of the text from start to end, but by |
358 | * not chopping up words |
359 | * @param string $text |
360 | * @param int $start |
361 | * @param int $end |
362 | * @param int|null &$posStart (out) actual start position |
363 | * @param int|null &$posEnd (out) actual end position |
364 | * @return string |
365 | */ |
366 | private function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) { |
367 | if ( $start != 0 ) { |
368 | $start = $this->position( $text, $start, 1 ); |
369 | } |
370 | if ( $end >= strlen( $text ) ) { |
371 | $end = strlen( $text ); |
372 | } else { |
373 | $end = $this->position( $text, $end ); |
374 | } |
375 | |
376 | if ( $posStart !== null ) { |
377 | $posStart = $start; |
378 | } |
379 | if ( $posEnd !== null ) { |
380 | $posEnd = $end; |
381 | } |
382 | |
383 | if ( $end > $start ) { |
384 | return substr( $text, $start, $end - $start ); |
385 | } else { |
386 | return ''; |
387 | } |
388 | } |
389 | |
390 | /** |
391 | * Find a nonletter near a point (index) in the text |
392 | * |
393 | * @param string $text |
394 | * @param int $point |
395 | * @param int $offset Offset to found index |
396 | * @return int Nearest nonletter index, or beginning of utf8 char if none |
397 | */ |
398 | private function position( $text, $point, $offset = 0 ) { |
399 | $tolerance = 10; |
400 | $s = max( 0, $point - $tolerance ); |
401 | $l = min( strlen( $text ), $point + $tolerance ) - $s; |
402 | $m = []; |
403 | |
404 | if ( preg_match( |
405 | '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', |
406 | substr( $text, $s, $l ), |
407 | $m, |
408 | PREG_OFFSET_CAPTURE |
409 | ) ) { |
410 | return $m[0][1] + $s + $offset; |
411 | } else { |
412 | // check if point is on a valid first UTF8 char |
413 | $char = ord( $text[$point] ); |
414 | while ( $char >= 0x80 && $char < 0xc0 ) { |
415 | // skip trailing bytes |
416 | $point++; |
417 | if ( $point >= strlen( $text ) ) { |
418 | return strlen( $text ); |
419 | } |
420 | $char = ord( $text[$point] ); |
421 | } |
422 | |
423 | return $point; |
424 | |
425 | } |
426 | } |
427 | |
428 | /** |
429 | * Search extracts for a pattern, and return snippets |
430 | * |
431 | * @param string $pattern Regexp for matching lines |
432 | * @param array $extracts Extracts to search |
433 | * @param int &$linesleft Number of extracts to make |
434 | * @param int &$contextchars Length of snippet |
435 | * @param array &$out Map for highlighted snippets |
436 | * @param array &$offsets Map of starting points of snippets |
437 | */ |
438 | private function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { |
439 | if ( $linesleft == 0 ) { |
440 | return; // nothing to do |
441 | } |
442 | foreach ( $extracts as $index => $line ) { |
443 | if ( array_key_exists( $index, $out ) ) { |
444 | continue; // this line already highlighted |
445 | } |
446 | |
447 | $m = []; |
448 | if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { |
449 | continue; |
450 | } |
451 | |
452 | $offset = $m[0][1]; |
453 | $len = strlen( $m[0][0] ); |
454 | if ( $offset + $len < $contextchars ) { |
455 | $begin = 0; |
456 | } elseif ( $len > $contextchars ) { |
457 | $begin = $offset; |
458 | } else { |
459 | $begin = $offset + intval( ( $len - $contextchars ) / 2 ); |
460 | } |
461 | |
462 | $end = $begin + $contextchars; |
463 | |
464 | $posBegin = $begin; |
465 | // basic snippet from this line |
466 | $out[$index] = $this->extract( $line, $begin, $end, $posBegin ); |
467 | $offsets[$index] = $posBegin; |
468 | $linesleft--; |
469 | if ( $linesleft == 0 ) { |
470 | return; |
471 | } |
472 | } |
473 | } |
474 | |
475 | /** |
476 | * Basic wikitext removal |
477 | * @param string $text |
478 | * @return string |
479 | */ |
480 | private function removeWiki( $text ) { |
481 | $text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text ); |
482 | $text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text ); |
483 | $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text ); |
484 | $text = preg_replace_callback( |
485 | "/\\[\\[([^|]+\\|)(.*?)\\]\\]/", |
486 | [ $this, 'linkReplace' ], |
487 | $text |
488 | ); |
489 | $text = preg_replace( "/<\/?[^>]+>/", "", $text ); |
490 | $text = preg_replace( "/'''''/", "", $text ); |
491 | $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text ); |
492 | $text = preg_replace( "/''/", "", $text ); |
493 | |
494 | // Note, the previous /<\/?[^>]+>/ is insufficient |
495 | // for XSS safety as the HTML tag can span multiple |
496 | // search results (T144845). |
497 | $text = Sanitizer::escapeHtmlAllowEntities( $text ); |
498 | return $text; |
499 | } |
500 | |
501 | /** |
502 | * callback to replace [[target|caption]] kind of links, if |
503 | * the target is category or image, leave it |
504 | * |
505 | * @param array $matches |
506 | * @return string |
507 | */ |
508 | private function linkReplace( $matches ) { |
509 | $colon = strpos( $matches[1], ':' ); |
510 | if ( $colon === false ) { |
511 | return $matches[2]; // replace with caption |
512 | } |
513 | $ns = substr( $matches[1], 0, $colon ); |
514 | $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $ns ); |
515 | if ( $index !== false && ( $index === NS_FILE || $index === NS_CATEGORY ) ) { |
516 | return $matches[0]; // return the whole thing |
517 | } else { |
518 | return $matches[2]; |
519 | } |
520 | } |
521 | |
522 | /** |
523 | * Simple & fast snippet extraction, but gives completely irrelevant |
524 | * snippets |
525 | * |
526 | * Used when $wgAdvancedSearchHighlighting is false. |
527 | * |
528 | * @param string $text |
529 | * @param string[] $terms Escaped for regex by SearchDatabase::regexTerm() |
530 | * @param int $contextlines |
531 | * @param int $contextchars |
532 | * @return string |
533 | */ |
534 | public function highlightSimple( |
535 | $text, |
536 | $terms, |
537 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
538 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
539 | ) { |
540 | $lines = explode( "\n", $text ); |
541 | |
542 | $terms = implode( '|', $terms ); |
543 | $max = intval( $contextchars ) + 1; |
544 | $pat1 = "/(.*)($terms)(.{0,$max})/ui"; |
545 | |
546 | $extract = ''; |
547 | $contLang = MediaWikiServices::getInstance()->getContentLanguage(); |
548 | foreach ( $lines as $line ) { |
549 | if ( $contextlines == 0 ) { |
550 | break; |
551 | } |
552 | $m = []; |
553 | if ( !preg_match( $pat1, $line, $m ) ) { |
554 | continue; |
555 | } |
556 | --$contextlines; |
557 | // truncate function changes ... to relevant i18n message. |
558 | $pre = $contLang->truncateForVisual( $m[1], -$contextchars, '...', false ); |
559 | |
560 | if ( count( $m ) < 3 ) { |
561 | $post = ''; |
562 | } else { |
563 | $post = $contLang->truncateForVisual( $m[3], $contextchars, '...', false ); |
564 | } |
565 | |
566 | $found = $m[2]; |
567 | |
568 | $line = htmlspecialchars( $pre . $found . $post ); |
569 | $pat2 = '/(' . $terms . ')/ui'; |
570 | $line = preg_replace( $pat2, '<span class="searchmatch">\1</span>', $line ); |
571 | |
572 | $extract .= "{$line}\n"; |
573 | } |
574 | |
575 | return $extract; |
576 | } |
577 | |
578 | /** |
579 | * Returns the first few lines of the text |
580 | * |
581 | * @param string $text |
582 | * @param int $contextlines Max number of returned lines |
583 | * @param int $contextchars Average number of characters per line |
584 | * @return string |
585 | */ |
586 | public function highlightNone( |
587 | $text, |
588 | $contextlines = self::DEFAULT_CONTEXT_LINES, |
589 | $contextchars = self::DEFAULT_CONTEXT_CHARS |
590 | ) { |
591 | $match = []; |
592 | $text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line |
593 | $text = str_replace( "\n\n", "\n", $text ); // remove empty lines |
594 | preg_match( "/^(.*\n){0,$contextlines}/", $text, $match ); |
595 | |
596 | // Trim and limit to max number of chars |
597 | $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) ); |
598 | return str_replace( "\n", '<br>', $text ); |
599 | } |
600 | } |