Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
151 / 151 |
|
100.00% |
33 / 33 |
CRAP | |
100.00% |
1 / 1 |
AnalyzerBuilder | |
100.00% |
151 / 151 |
|
100.00% |
33 / 33 |
56 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
withCharFilters | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
withTokenizer | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
withFilters | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
withCharMap | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
withLimitedCharMap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
withNumberCharFilter | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withElision | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
withLangLowercase | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
withStop | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withExtraStop | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
withExtraStemmer | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withStemmerOverride | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withUnpackedAnalyzer | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
unpackedCheck | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
insertFiltersBefore | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
appendFilters | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
prependFilters | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
omitDottedI | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withLightStemmer | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
omitStemmer | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withAsciifoldingPreserve | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
omitAsciifolding | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withRemoveEmpty | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
withDecimalDigit | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
build | |
100.00% |
65 / 65 |
|
100.00% |
1 / 1 |
20 | |||
patternFilter | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
mappingCharFilter | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
numberCharFilter | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
elisionFilter | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
stopFilterFromList | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
overrideFilter | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
stemmerFilter | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Maintenance; |
4 | |
5 | /** |
6 | * Builds one elasticsearch analyzer to add to an analysis config array. |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | */ |
23 | class AnalyzerBuilder { |
24 | /** |
25 | * Indicate that filters should be automatically appended or prepended, rather |
26 | * than inserted before a given filter. |
27 | */ |
28 | public const APPEND = 1; |
29 | public const PREPEND = 2; |
30 | |
31 | /** @var string */ |
32 | private $langName; |
33 | |
34 | /** @var string */ |
35 | private $analyzerName; |
36 | |
37 | /** @var string[]|null list of char_filters */ |
38 | private $charFilters; |
39 | |
40 | /** @var string|null name of tokenizer */ |
41 | private $tokenizer = 'standard'; |
42 | |
43 | /** @var string[]|null list of filters */ |
44 | private $filters; |
45 | |
46 | /** @var string[]|null list of lang-specific character filter mappings */ |
47 | private $charMap; |
48 | |
49 | /** @var bool */ |
50 | private $charMapLimited = false; |
51 | |
52 | /** @var string|null */ |
53 | private $charMapName; |
54 | |
55 | /** @var int|null Unicode value for script-specific zero */ |
56 | private $langZero; |
57 | |
58 | /** @var string|null name of char filter mapping digits (using $langZero) */ |
59 | private $numCharMapName; |
60 | |
61 | /** @var bool is elision processing case INsensitive? */ |
62 | private $elisionArticleCase = true; |
63 | |
64 | /** @var string[]|null list of articles to elide */ |
65 | private $elisionArticles; |
66 | |
67 | /** @var string|null */ |
68 | private $elisionName; |
69 | |
70 | /** @var bool use language-specific lowercasing? */ |
71 | private $langLowercase = false; |
72 | |
73 | /** @var mixed|null stopword _list_ or array of stopwords */ |
74 | private $customStopList; |
75 | |
76 | /** @var string|null */ |
77 | private $stopName; |
78 | |
79 | /** @var mixed|null stopword _list_ or array of stopwords */ |
80 | private $extraStopList; |
81 | |
82 | /** @var string|null */ |
83 | private $extraStopName; |
84 | |
85 | /** @var bool|null */ |
86 | private $extraStopIgnoreCase; |
87 | |
88 | /** @var string|null */ |
89 | private $extraStemmerLang; |
90 | |
91 | /** @var string|null */ |
92 | private $extraStemmerName; |
93 | |
94 | /** @var string[]|null list of stemmer override rules */ |
95 | private $overrideRules; |
96 | |
97 | /** @var string|null */ |
98 | private $overrideName; |
99 | |
100 | /********** |
101 | * The properties below are only used by unpacked analyzers |
102 | */ |
103 | |
104 | /** @var bool */ |
105 | private $unpacked = false; |
106 | |
107 | /** @var array<int, array<string, string[]>> */ |
108 | private $insertFilterList = []; |
109 | |
110 | /** @var string */ |
111 | private $dottedIFix = 'dotted_I_fix'; |
112 | |
113 | /** @var bool */ |
114 | private $useStemmer = true; |
115 | |
116 | /** @var string|null */ |
117 | private $stemmerLang; |
118 | |
119 | /** @var string|null asciifolding flavor to use (null for none) */ |
120 | private $asciifolding = 'asciifolding'; |
121 | |
122 | /** @var string|null */ |
123 | private $removeEmpty; |
124 | |
125 | /** @var string|null */ |
126 | private $decimalDigit; |
127 | |
128 | /** |
129 | * @param string $langName |
130 | * @param string $analyzerName (default to 'text') |
131 | */ |
132 | public function __construct( string $langName, string $analyzerName = 'text' ) { |
133 | $this->langName = $langName; |
134 | $this->analyzerName = $analyzerName; |
135 | } |
136 | |
137 | /** |
138 | * @param string[] $charFilters |
139 | * @return self |
140 | */ |
141 | public function withCharFilters( array $charFilters ): self { |
142 | $this->charFilters = $charFilters; |
143 | return $this; |
144 | } |
145 | |
146 | /** |
147 | * @param string $tokenizer |
148 | * @return self |
149 | */ |
150 | public function withTokenizer( string $tokenizer ): self { |
151 | $this->tokenizer = $tokenizer; |
152 | return $this; |
153 | } |
154 | |
155 | /** |
156 | * @param string[] $filters |
157 | * @return self |
158 | */ |
159 | public function withFilters( array $filters ): self { |
160 | $this->filters = $filters; |
161 | return $this; |
162 | } |
163 | |
164 | /** |
165 | * @param string[] $mappings |
166 | * @param string|null $name |
167 | * @param bool $limited |
168 | * @return self |
169 | */ |
170 | public function withCharMap( array $mappings, string $name = null, bool $limited = false ): self { |
171 | $this->charMap = $mappings; |
172 | $this->charMapName = $name ?? "{$this->langName}_charfilter"; |
173 | $this->charMapLimited = false; |
174 | return $this; |
175 | } |
176 | |
177 | /** |
178 | * @param string[] $mappings |
179 | * @param string|null $name |
180 | * @return self |
181 | */ |
182 | public function withLimitedCharMap( array $mappings, string $name = null ): self { |
183 | return $this->withCharMap( $mappings, $name, true ); |
184 | } |
185 | |
186 | /** |
187 | * @param int $langZero |
188 | * @param string|null $name |
189 | * @return self |
190 | */ |
191 | public function withNumberCharFilter( int $langZero, string $name = null ): self { |
192 | $this->langZero = $langZero; |
193 | $this->numCharMapName = $name ?? "{$this->langName}_numbers"; |
194 | return $this; |
195 | } |
196 | |
197 | /** |
198 | * @param string[] $articles "articles" to be elided |
199 | * @param bool $articleCase whether elision is case insensitive |
200 | * @return self |
201 | */ |
202 | public function withElision( array $articles, bool $articleCase = true ): self { |
203 | $this->elisionArticleCase = $articleCase; |
204 | $this->elisionArticles = $articles; |
205 | $this->elisionName = "{$this->langName}_elision"; |
206 | return $this; |
207 | } |
208 | |
209 | /** @return self */ |
210 | public function withLangLowercase(): self { |
211 | $this->langLowercase = true; |
212 | return $this; |
213 | } |
214 | |
215 | /** |
216 | * @param mixed $stop pre-defined list like _french_ or an array of stopwords |
217 | * @param string|null $name |
218 | * @return self |
219 | */ |
220 | public function withStop( $stop, string $name = null ): self { |
221 | $this->customStopList = $stop; |
222 | $this->stopName = $name ?? "{$this->langName}_stop"; |
223 | return $this; |
224 | } |
225 | |
226 | /** |
227 | * @param mixed $stop pre-defined list like _french_ or an array of stopwords |
228 | * @param string $name |
229 | * @param mixed $beforeFilter filter to insert extra stop before |
230 | * @param bool|null $ignoreCase |
231 | * @return self |
232 | */ |
233 | public function withExtraStop( $stop, string $name, $beforeFilter = self::APPEND, |
234 | bool $ignoreCase = null ): self { |
235 | $this->extraStopList = $stop; |
236 | $this->extraStopName = $name; |
237 | $this->extraStopIgnoreCase = $ignoreCase; |
238 | $this->insertFiltersBefore( $beforeFilter, [ $name ] ); |
239 | return $this; |
240 | } |
241 | |
242 | /** |
243 | * @param string $lang |
244 | * @param string|null $name |
245 | * @return self |
246 | */ |
247 | public function withExtraStemmer( string $lang, string $name = null ): self { |
248 | $this->extraStemmerLang = $lang; |
249 | $this->extraStemmerName = $name ?? $lang; |
250 | return $this; |
251 | } |
252 | |
253 | /** |
254 | * Rules can be a single rule string, or an array of rules |
255 | * |
256 | * @param mixed $rules stemmer override rules |
257 | * @param string|null $name |
258 | * @return self |
259 | */ |
260 | public function withStemmerOverride( $rules, string $name = null ): self { |
261 | $this->overrideRules = $rules; |
262 | $this->overrideName = $name ?? "{$this->langName}_override"; |
263 | return $this; |
264 | } |
265 | |
266 | /********** |
267 | * The with.., omit.., and insert.. methods below are only used by unpacked analyzers |
268 | */ |
269 | |
270 | /** @return self */ |
271 | public function withUnpackedAnalyzer(): self { |
272 | $this->unpacked = true; |
273 | return $this; |
274 | } |
275 | |
276 | private function unpackedCheck(): void { |
277 | if ( !$this->unpacked ) { |
278 | $caller = debug_backtrace()[1]['function']; |
279 | throw new \ConfigException( "$caller() is only compatible with unpacked analyzers;" . |
280 | "call withUnpackedAnalyzer() before calling $caller()." ); |
281 | } |
282 | } |
283 | |
284 | /** |
285 | * @param mixed $beforeFilter specific filter to insert $filters before; use APPEND |
286 | * or PREPEND to always add to beginning or end of the list |
287 | * @param string[] $filterList list of additional filters to insert |
288 | * @return self |
289 | */ |
290 | public function insertFiltersBefore( $beforeFilter, array $filterList ): self { |
291 | $this->unpackedCheck(); |
292 | $this->insertFilterList[] = [ $beforeFilter => $filterList ]; |
293 | return $this; |
294 | } |
295 | |
296 | /** |
297 | * @param string[] $filterList list of additional filters to append |
298 | * @return self |
299 | */ |
300 | public function appendFilters( array $filterList ): self { |
301 | $this->unpackedCheck(); |
302 | $this->insertFiltersBefore( self::APPEND, $filterList ); |
303 | return $this; |
304 | } |
305 | |
306 | /** |
307 | * @param string[] $filterList list of additional filters to prepend |
308 | * @return self |
309 | */ |
310 | public function prependFilters( array $filterList ): self { |
311 | $this->unpackedCheck(); |
312 | $this->insertFiltersBefore( self::PREPEND, $filterList ); |
313 | return $this; |
314 | } |
315 | |
316 | /** @return self */ |
317 | public function omitDottedI(): self { |
318 | $this->unpackedCheck(); |
319 | $this->dottedIFix = ''; |
320 | return $this; |
321 | } |
322 | |
323 | /** @return self */ |
324 | public function withLightStemmer(): self { |
325 | $this->unpackedCheck(); |
326 | $this->stemmerLang = "light_{$this->langName}"; |
327 | return $this; |
328 | } |
329 | |
330 | /** @return self */ |
331 | public function omitStemmer(): self { |
332 | $this->unpackedCheck(); |
333 | $this->useStemmer = false; |
334 | return $this; |
335 | } |
336 | |
337 | /** @return self */ |
338 | public function withAsciifoldingPreserve(): self { |
339 | $this->unpackedCheck(); |
340 | $this->asciifolding = 'asciifolding_preserve'; |
341 | return $this; |
342 | } |
343 | |
344 | /** @return self */ |
345 | public function omitAsciifolding(): self { |
346 | $this->unpackedCheck(); |
347 | $this->asciifolding = ''; |
348 | return $this; |
349 | } |
350 | |
351 | /** @return self */ |
352 | public function withRemoveEmpty(): self { |
353 | $this->unpackedCheck(); |
354 | $this->removeEmpty = 'remove_empty'; |
355 | return $this; |
356 | } |
357 | |
358 | /** @return self */ |
359 | public function withDecimalDigit(): self { |
360 | $this->unpackedCheck(); |
361 | $this->decimalDigit = 'decimal_digit'; |
362 | return $this; |
363 | } |
364 | |
365 | /** |
366 | * Create a basic analyzer with support for various common options |
367 | * |
368 | * Can create various filters and character filters as specified. |
369 | * None are automatically added to the char_filter or filter list |
370 | * because the best order for these basic analyzers depends on the |
371 | * details of various third-party plugins. |
372 | * |
373 | * type: custom |
374 | * tokenizer: standard |
375 | * char_filter: as per $this->charFilters |
376 | * filter: as per $this->filters |
377 | * |
378 | * @param mixed[] $config to be updated |
379 | * @return mixed[] updated config |
380 | */ |
381 | public function build( array $config ): array { |
382 | $langStem = "{$this->langName}_stemmer"; |
383 | |
384 | if ( $this->unpacked ) { |
385 | // Analyzer config for char_filter and filter will be in the order below, |
386 | // if the relevant filters are enabled/configured. |
387 | // |
388 | // type: custom |
389 | // tokenizer: standard |
390 | // char_filter: dotted_I_fix, lang_charfilter, lang_numbers |
391 | // filter: elision, aggressive_splitting, lowercase, stopwords, lang_norm, |
392 | // stemmer_override, stemmer, asciifolding, remove_empty |
393 | if ( $this->useStemmer ) { |
394 | $this->stemmerLang ??= $this->langName; |
395 | } else { |
396 | $langStem = ''; |
397 | } |
398 | $this->withStop( $this->customStopList ?? "_{$this->langName}_" ); |
399 | |
400 | // build up the char_filter list--everything is optional |
401 | $this->charFilters[] = $this->dottedIFix; |
402 | $this->charFilters[] = $this->charMapName; |
403 | $this->charFilters[] = $this->numCharMapName; |
404 | |
405 | // remove 'falsey' (== not configured) values from the list |
406 | $this->charFilters = array_values( array_filter( $this->charFilters ) ); |
407 | |
408 | // build up the filter list--lowercase, stop, and stem are required |
409 | $this->filters[] = $this->elisionName; |
410 | $this->filters[] = 'lowercase'; |
411 | $this->filters[] = $this->decimalDigit; |
412 | $this->filters[] = $this->stopName; |
413 | $this->filters[] = $this->overrideName; |
414 | $this->filters[] = $langStem; |
415 | $this->filters[] = $this->asciifolding; |
416 | $this->filters[] = $this->removeEmpty; |
417 | |
418 | // remove 'falsey' (== not configured) values from the list |
419 | $this->filters = array_values( array_filter( $this->filters ) ); |
420 | |
421 | // iterate over all lists of sets of filters to insert, in order, and insert |
422 | // them before the specified filter. If no such filter exists, $idx == -1 and |
423 | // the filters will be prepended, but you shouldn't count on that. APPEND and |
424 | // PREPEND constants can be used to add to beginning or end, regardless of |
425 | // other filters |
426 | foreach ( $this->insertFilterList as $filterPatch ) { |
427 | foreach ( $filterPatch as $beforeFilter => $filterList ) { |
428 | switch ( $beforeFilter ) { |
429 | case self::APPEND: |
430 | $this->filters = array_merge( $this->filters, $filterList ); |
431 | break; |
432 | case self::PREPEND: |
433 | $this->filters = array_merge( $filterList, $this->filters ); |
434 | break; |
435 | default: |
436 | $idx = array_search( $beforeFilter, $this->filters ); |
437 | array_splice( $this->filters, $idx, 0, $filterList ); |
438 | break; |
439 | } |
440 | } |
441 | } |
442 | |
443 | } |
444 | |
445 | $config[ 'analyzer' ][ $this->analyzerName ] = [ |
446 | 'type' => 'custom', |
447 | 'tokenizer' => $this->tokenizer, |
448 | ]; |
449 | |
450 | if ( $this->charMapName ) { |
451 | $config[ 'char_filter' ][ $this->charMapName ] = |
452 | $this->mappingCharFilter( $this->charMap, $this->charMapLimited ); |
453 | } |
454 | |
455 | if ( $this->numCharMapName ) { |
456 | $config[ 'char_filter' ][ $this->numCharMapName ] = |
457 | $this->numberCharFilter( $this->langZero ); |
458 | } |
459 | |
460 | if ( $this->elisionName ) { |
461 | $config[ 'filter' ][ $this->elisionName ] = |
462 | $this->elisionFilter( $this->elisionArticles, $this->elisionArticleCase ); |
463 | } |
464 | |
465 | if ( $this->langLowercase ) { |
466 | $config[ 'filter' ][ 'lowercase' ][ 'language' ] = $this->langName; |
467 | } |
468 | |
469 | if ( $this->overrideName ) { |
470 | $config[ 'filter' ][ $this->overrideName ] = |
471 | $this->overrideFilter( $this->overrideRules ); |
472 | } |
473 | |
474 | if ( $this->stopName ) { |
475 | $config[ 'filter' ][ $this->stopName ] = |
476 | $this->stopFilterFromList( $this->customStopList ); |
477 | } |
478 | |
479 | if ( $this->extraStopName ) { |
480 | $config[ 'filter' ][ $this->extraStopName ] = |
481 | $this->stopFilterFromList( $this->extraStopList, $this->extraStopIgnoreCase ); |
482 | } |
483 | |
484 | if ( $this->charFilters ) { |
485 | $config[ 'analyzer' ][ $this->analyzerName ][ 'char_filter' ] = $this->charFilters; |
486 | } |
487 | |
488 | if ( $this->filters ) { |
489 | $config[ 'analyzer' ][ $this->analyzerName ][ 'filter' ] = $this->filters; |
490 | } |
491 | |
492 | if ( $this->stemmerLang && $this->useStemmer ) { |
493 | $config[ 'filter' ][ $langStem ] = |
494 | $this->stemmerFilter( $this->stemmerLang ); |
495 | } |
496 | |
497 | if ( $this->extraStemmerName ) { |
498 | $config[ 'filter' ][ $this->extraStemmerName ] = |
499 | $this->stemmerFilter( $this->extraStemmerLang ); |
500 | } |
501 | |
502 | return $config; |
503 | } |
504 | |
505 | /** |
506 | * Create a pattern_replace filter/char_filter with the mappings provided. |
507 | * |
508 | * @param string $pat |
509 | * @param string $repl |
510 | * @return mixed[] filter |
511 | */ |
512 | public static function patternFilter( string $pat, string $repl = '' ): array { |
513 | return [ 'type' => 'pattern_replace', 'pattern' => $pat, 'replacement' => $repl ]; |
514 | } |
515 | |
516 | /** |
517 | * Create a mapping or limited_mapping character filter with the mappings provided. |
518 | * |
519 | * @param string[] $mappings |
520 | * @param bool $limited |
521 | * @return mixed[] character filter |
522 | */ |
523 | public static function mappingCharFilter( array $mappings, bool $limited ): array { |
524 | $type = $limited ? 'limited_mapping' : 'mapping'; |
525 | return [ 'type' => $type, 'mappings' => $mappings ]; |
526 | } |
527 | |
528 | /** |
529 | * Create a character filter that maps non-Arabic digits (e.g., ០-៩ or 0-9) to |
530 | * Arabic digits (0-9). Since they are usually all in a row, we just need the |
531 | * starting digit (equal to 0) |
532 | * |
533 | * @param int $langZero |
534 | * @return mixed[] character filter |
535 | */ |
536 | public static function numberCharFilter( int $langZero ): array { |
537 | $numMap = []; |
538 | for ( $i = 0; $i <= 9; $i++ ) { |
539 | $numMap[] = sprintf( '\\u%04x=>%d', $langZero + $i, $i ); |
540 | } |
541 | return self::mappingCharFilter( $numMap, true ); |
542 | } |
543 | |
544 | /** |
545 | * Create an elision filter with the "articles" provided; $case determines whether |
546 | * stripping is case sensitive or not |
547 | * |
548 | * @param string[] $articles |
549 | * @param bool $case |
550 | * @return mixed[] token filter |
551 | */ |
552 | public static function elisionFilter( array $articles, bool $case = true ): array { |
553 | return [ 'type' => 'elision', 'articles_case' => $case, 'articles' => $articles ]; |
554 | } |
555 | |
556 | /** |
557 | * Create a stop word filter with the provided config. The config can be an array |
558 | * of stop words, or a string like _french_ that refers to a pre-defined list. |
559 | * |
560 | * @param mixed $stopwords |
561 | * @param bool|null $ignoreCase |
562 | * @return mixed[] token filter |
563 | */ |
564 | public static function stopFilterFromList( $stopwords, bool $ignoreCase = null ): array { |
565 | $retArray = [ 'type' => 'stop', 'stopwords' => $stopwords ]; |
566 | if ( isset( $ignoreCase ) ) { |
567 | $retArray['ignore_case'] = $ignoreCase; |
568 | } |
569 | return $retArray; |
570 | } |
571 | |
572 | /** |
573 | * Create an stemming override filter with the rules provided, which can be a string |
574 | * with one rule or an array of such rules |
575 | * |
576 | * @param mixed $rules |
577 | * @return mixed[] token filter |
578 | */ |
579 | private function overrideFilter( $rules ): array { |
580 | return [ 'type' => 'stemmer_override', 'rules' => $rules ]; |
581 | } |
582 | |
583 | /** |
584 | * Create a stemmer filter with the provided config. |
585 | * |
586 | * @param string $stemmer |
587 | * @return mixed[] token filter |
588 | */ |
589 | public static function stemmerFilter( string $stemmer ): array { |
590 | return [ 'type' => 'stemmer', 'language' => $stemmer ]; |
591 | } |
592 | |
593 | } |