Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
196 / 196
100.00% covered (success)
100.00%
35 / 35
CRAP
100.00% covered (success)
100.00%
1 / 1
AnalyzerBuilder
100.00% covered (success)
100.00%
196 / 196
100.00% covered (success)
100.00%
35 / 35
68
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withLangName
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withCharFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withTokenizer
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withCharMap
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 withLimitedCharMap
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 withInvisCharMap
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withReversedNumberCharFilter
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 withNumberCharFilter
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 withElision
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 withLangLowercase
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 withStop
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withExtraStop
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 withExtraStemmer
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withStemmerOverride
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withUnpackedAnalyzer
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 unpackedCheck
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 insertFiltersBefore
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 appendFilters
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 prependFilters
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withLightStemmer
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 omitStemmer
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withAsciifolding
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 omitFolding
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withRemoveEmpty
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 withDecimalDigit
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 build
100.00% covered (success)
100.00%
103 / 103
100.00% covered (success)
100.00%
1 / 1
27
 patternFilter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 mappingCharFilter
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 numberCharFilter
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 elisionFilter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 stopFilterFromList
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 overrideFilter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 stemmerFilter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Maintenance;
4
5use MediaWiki\Config\ConfigException;
6
7/**
8 * Builds one search analyzer to add to an analysis config array.
9 *
10 * @license GPL-2.0-or-later
11 */
12class AnalyzerBuilder {
13    /**
14     * Indicate that filters should be automatically appended or prepended, rather
15     * than inserted before a given filter.
16     */
17    public const APPEND = 1;
18    public const PREPEND = 2;
19
20    /** @var string */
21    private $langName;
22
23    /** @var string */
24    private $analyzerName = 'text';
25
26    /** @var bool */
27    private $icuEnabled;
28
29    /** @var string[]|null list of char_filters */
30    private $charFilters;
31
32    /** @var string|null name of tokenizer */
33    private $tokenizer = 'standard';
34
35    /** @var string[]|null list of filters */
36    private $filters;
37
38    /** @var string[]|null list of lang-specific character filter mappings */
39    private $charMap;
40
41    /** @var bool */
42    private $charMapLimited = false;
43
44    /** @var string|null */
45    private $charMapName;
46
47    /** @var int|null Unicode value for script-specific zero */
48    private $langZero;
49
50    /** @var bool should langZero's map be reversed (Arabic to non-Arabic)? */
51    private $numCharMapReversed = false;
52
53    /** @var string|null name of char filter mapping digits (using $langZero) */
54    private $numCharMapName;
55
56    /** @var string|null name of char filter for cleaning up invisibles */
57    private $invisCharMapName;
58
59    /** @var bool is elision processing case INsensitive? */
60    private $elisionArticleCase = true;
61
62    /** @var string[]|null list of articles to elide */
63    private $elisionArticles;
64
65    /** @var string|null */
66    private $elisionName;
67
68    /** @var string|null */
69    private $langLowercase;
70
71    /** @var mixed|null stopword _list_ or array of stopwords */
72    private $customStopList;
73
74    /** @var string|null */
75    private $stopName;
76
77    /** @var mixed|null stopword _list_ or array of stopwords */
78    private $extraStopList;
79
80    /** @var string|null */
81    private $extraStopName;
82
83    /** @var bool|null */
84    private $extraStopIgnoreCase;
85
86    /** @var string|null */
87    private $extraStemmerLang;
88
89    /** @var string|null */
90    private $extraStemmerName;
91
92    /** @var string[]|null list of stemmer override rules */
93    private $overrideRules;
94
95    /** @var string|null */
96    private $overrideName;
97
98    /**********
99     * The properties below are only used by unpacked analyzers
100     */
101
102    /** @var bool */
103    private $unpacked = false;
104
105    /** @var array<int, array<string, string[]>> */
106    private $insertFilterList = [];
107
108    /** @var bool */
109    private $useStemmer = true;
110
111    /** @var string|null */
112    private $stemmerLang;
113
114    /** @var string|null folding flavor to use (null for none) */
115    private $folding = 'icu_folding';
116
117    /** @var string|null */
118    private $removeEmpty;
119
120    /** @var string|null */
121    private $decimalDigit;
122
123    /**
124     * @param string $langName
125     * @param bool $icuEnabled
126     */
127    public function __construct( string $langName, bool $icuEnabled = false ) {
128        $this->langName = $langName;
129        $this->icuEnabled = $icuEnabled;
130    }
131
132    public function withLangName( string $langName ): self {
133        $this->langName = $langName;
134        return $this;
135    }
136
137    /**
138     * @param string[] $charFilters
139     * @return self
140     */
141    public function withCharFilters( array $charFilters ): self {
142        $this->charFilters = $charFilters;
143        return $this;
144    }
145
146    public function withTokenizer( string $tokenizer ): self {
147        $this->tokenizer = $tokenizer;
148        return $this;
149    }
150
151    /**
152     * @param string[] $filters
153     * @return self
154     */
155    public function withFilters( array $filters ): self {
156        $this->filters = $filters;
157        return $this;
158    }
159
160    /**
161     * @param string[] $mappings
162     * @param string|null $name
163     * @param bool $limited
164     * @return self
165     */
166    public function withCharMap( array $mappings, ?string $name = null, bool $limited = false ): self {
167        $this->charMap = $mappings;
168        $this->charMapName = $name ?? "{$this->langName}_charfilter";
169        $this->charMapLimited = $limited;
170        return $this;
171    }
172
173    /**
174     * @param string[] $mappings
175     * @param string|null $name
176     * @return self
177     */
178    public function withLimitedCharMap( array $mappings, ?string $name = null ): self {
179        return $this->withCharMap( $mappings, $name, true );
180    }
181
182    /**
183     * @param string|null $name
184     * @return self
185     */
186    public function withInvisCharMap( ?string $name = 'invis_cleanup' ): self {
187        $this->invisCharMapName = $name;
188        return $this;
189    }
190
191    /**
192     * @param int $langZero
193     * @param string|null $name
194     * @return self
195     */
196    public function withReversedNumberCharFilter( int $langZero, ?string $name = null ): self {
197        $this->withNumberCharFilter( $langZero, $name, true );
198        return $this;
199    }
200
201    /**
202     * @param int $langZero
203     * @param string|null $name
204     * @param bool $reversed reverse the mapping from Arabic to non-Arabic
205     * @return self
206     */
207    public function withNumberCharFilter( int $langZero, ?string $name = null, bool $reversed = false ): self {
208        $defName = $reversed ? "{$this->langName}_reversed_numbers" : "{$this->langName}_numbers";
209        $this->langZero = $langZero;
210        $this->numCharMapName = $name ?? $defName;
211        $this->numCharMapReversed = $reversed;
212        return $this;
213    }
214
215    /**
216     * @param string[] $articles "articles" to be elided
217     * @param bool $articleCase whether elision is case insensitive
218     * @return self
219     */
220    public function withElision( array $articles, bool $articleCase = true ): self {
221        $this->elisionArticleCase = $articleCase;
222        $this->elisionArticles = $articles;
223        $this->elisionName = "{$this->langName}_elision";
224        return $this;
225    }
226
227    /**
228     * @param string|null $name
229     * @return self
230     */
231    public function withLangLowercase( ?string $name = null ): self {
232        $this->langLowercase = $name ?: $this->langName;
233        return $this;
234    }
235
236    /**
237     * @param mixed $stop pre-defined list like _french_ or an array of stopwords
238     * @param string|null $name
239     * @return self
240     */
241    public function withStop( $stop, ?string $name = null ): self {
242        $this->customStopList = $stop;
243        $this->stopName = $name ?? "{$this->langName}_stop";
244        return $this;
245    }
246
247    /**
248     * @param mixed $stop pre-defined list like _french_ or an array of stopwords
249     * @param string $name
250     * @param mixed $beforeFilter filter to insert extra stop before
251     * @param bool|null $ignoreCase
252     * @return self
253     */
254    public function withExtraStop( $stop, string $name, $beforeFilter = self::APPEND,
255            ?bool $ignoreCase = null ): self {
256        $this->extraStopList = $stop;
257        $this->extraStopName = $name;
258        $this->extraStopIgnoreCase = $ignoreCase;
259        $this->insertFiltersBefore( $beforeFilter, [ $name ] );
260        return $this;
261    }
262
263    /**
264     * @param string $lang
265     * @param string|null $name
266     * @return self
267     */
268    public function withExtraStemmer( string $lang, ?string $name = null ): self {
269        $this->extraStemmerLang = $lang;
270        $this->extraStemmerName = $name ?? $lang;
271        return $this;
272    }
273
274    /**
275     * Rules can be a single rule string, or an array of rules
276     *
277     * @param mixed $rules stemmer override rules
278     * @param string|null $name
279     * @return self
280     */
281    public function withStemmerOverride( $rules, ?string $name = null ): self {
282        $this->overrideRules = $rules;
283        $this->overrideName = $name ?? "{$this->langName}_override";
284        return $this;
285    }
286
287    public function withUnpackedAnalyzer(): self {
288        $this->unpacked = true;
289        return $this;
290    }
291
292    private function unpackedCheck(): void {
293        if ( !$this->unpacked ) {
294            $caller = debug_backtrace()[1]['function'];
295            throw new ConfigException( "$caller() is only compatible with unpacked analyzers;" .
296                "call withUnpackedAnalyzer() before calling $caller()." );
297        }
298    }
299
300    /**
301     * @param mixed $beforeFilter specific filter to insert $filters before; use APPEND
302     *                            or PREPEND to always add to beginning or end of the list
303     * @param string[] $filterList list of additional filters to insert
304     * @return self
305     */
306    public function insertFiltersBefore( $beforeFilter, array $filterList ): self {
307        $this->unpackedCheck();
308        $this->insertFilterList[] = [ $beforeFilter => $filterList ];
309        return $this;
310    }
311
312    /**
313     * @param string[] $filterList list of additional filters to append
314     * @return self
315     */
316    public function appendFilters( array $filterList ): self {
317        $this->unpackedCheck();
318        $this->insertFiltersBefore( self::APPEND, $filterList );
319        return $this;
320    }
321
322    /**
323     * @param string[] $filterList list of additional filters to prepend
324     * @return self
325     */
326    public function prependFilters( array $filterList ): self {
327        $this->unpackedCheck();
328        $this->insertFiltersBefore( self::PREPEND, $filterList );
329        return $this;
330    }
331
332    public function withLightStemmer(): self {
333        $this->unpackedCheck();
334        $this->stemmerLang = "light_{$this->langName}";
335        return $this;
336    }
337
338    public function omitStemmer(): self {
339        $this->unpackedCheck();
340        $this->useStemmer = false;
341        return $this;
342    }
343
344    public function withAsciifolding(): self {
345        $this->unpackedCheck();
346        $this->folding = 'asciifolding';
347        return $this;
348    }
349
350    public function omitFolding(): self {
351        $this->unpackedCheck();
352        $this->folding = '';
353        return $this;
354    }
355
356    public function withRemoveEmpty(): self {
357        $this->unpackedCheck();
358        $this->removeEmpty = 'remove_empty';
359        return $this;
360    }
361
362    public function withDecimalDigit(): self {
363        $this->unpackedCheck();
364        $this->decimalDigit = 'decimal_digit';
365        return $this;
366    }
367
368    /**
369     * Create a basic analyzer with support for various common options
370     *
371     * Can create various filters and character filters as specified.
372     * None are automatically added to the char_filter or filter list
373     * because the best order for these basic analyzers depends on the
374     * details of various third-party plugins.
375     *
376     * type: custom
377     * tokenizer: standard
378     * char_filter: as per $this->charFilters
379     * filter: as per $this->filters
380     *
381     * @param mixed[] $config to be updated
382     * @return mixed[] updated config
383     */
384    public function build( array $config ): array {
385        $langStem = "{$this->langName}_stemmer";
386
387        if ( $this->unpacked ) {
388            // Analyzer config for char_filter and filter will be in the order below,
389            // if the relevant filters are enabled/configured.
390            //
391            // type: custom
392            // tokenizer: standard
393            // char_filter: lang_charfilter, lang_numbers
394            // filter: elision, aggressive_splitting, lowercase, stopwords, lang_norm,
395            //         stemmer_override, stemmer, folding, remove_empty
396            if ( $this->useStemmer ) {
397                $this->stemmerLang ??= $this->langName;
398            } else {
399                $langStem = '';
400            }
401            $this->withStop( $this->customStopList ?? "_{$this->langName}_" );
402
403            // remove icu_folding if icu plugin unavailable or unwanted
404            if ( $this->folding == 'icu_folding' ) {
405                if ( !$this->icuEnabled ) {
406                    $this->folding = '';
407                }
408            }
409
410            // build up the char_filter list--everything is optional
411            $this->charFilters[] = $this->charMapName;
412            $this->charFilters[] = $this->numCharMapName;
413
414            // remove 'falsey' (== not configured) values from the list
415            $this->charFilters = array_values( array_filter( $this->charFilters ) );
416
417            // build up the filter list--lowercase, stop, and stem are required
418            $this->filters[] = $this->elisionName;
419            $this->filters[] = 'lowercase';
420            $this->filters[] = $this->decimalDigit;
421            $this->filters[] = $this->stopName;
422            $this->filters[] = $this->overrideName;
423            $this->filters[] = $langStem;
424            $this->filters[] = $this->folding;
425            $this->filters[] = $this->removeEmpty;
426
427            // remove 'falsey' (== not configured) values from the list
428            $this->filters = array_values( array_filter( $this->filters ) );
429
430            // iterate over all lists of sets of filters to insert, in order, and insert
431            // them before the specified filter. If no such filter exists, $idx == -1 and
432            // the filters will be prepended, but you shouldn't count on that. APPEND and
433            // PREPEND constants can be used to add to beginning or end, regardless of
434            // other filters
435            foreach ( $this->insertFilterList as $filterPatch ) {
436                foreach ( $filterPatch as $beforeFilter => $filterList ) {
437                    switch ( $beforeFilter ) {
438                        case self::APPEND:
439                            $this->filters = array_merge( $this->filters, $filterList );
440                            break;
441                        case self::PREPEND:
442                            $this->filters = array_merge( $filterList, $this->filters );
443                            break;
444                        default:
445                            $idx = array_search( $beforeFilter, $this->filters );
446                            array_splice( $this->filters, $idx, 0, $filterList );
447                            break;
448                    }
449                }
450            }
451
452        } else {
453            // for simple filter lists, remove icu_folding if ICU not enabled
454            if ( !$this->icuEnabled ) {
455                $if_idx = array_search( 'icu_folding', $this->filters );
456                if ( $if_idx !== false ) {
457                    array_splice( $this->filters, $if_idx, 1 );
458                }
459            }
460        }
461
462        $config[ 'analyzer' ][ $this->analyzerName ] = [
463            'type' => 'custom',
464            'tokenizer' => $this->tokenizer,
465        ];
466
467        if ( $this->charMapName ) {
468            $config[ 'char_filter' ][ $this->charMapName ] =
469                $this->mappingCharFilter( $this->charMap, $this->charMapLimited );
470        }
471
472        if ( $this->numCharMapName ) {
473            $config[ 'char_filter' ][ $this->numCharMapName ] =
474                $this->numberCharFilter( $this->langZero, $this->numCharMapReversed );
475        }
476
477        if ( $this->invisCharMapName ) {
478            $config[ 'char_filter' ][ $this->invisCharMapName ] = [
479                'type' => 'mapping',
480                'mappings' => [
481                    // split on ...
482                    '\u200B=>\u0020', // ... zero-width space
483                    // remove ...
484                    '\u00AD=>', // ... soft hyphen
485                    '\u200C=>', // ... zero-width non-joiner
486                    '\u200D=>', // ... zero-width joiner
487                    '\u2060=>', // ... word joiner
488                    '\uFEFF=>', // ... zero-width non-breaking space
489                    '\u200E=>', // ... LTR mark
490                    '\u200F=>', // ... RTL mark
491                    '\u202A=>', // ... LTR embedding
492                    '\u202B=>', // ... RTL embedding
493                    '\u202C=>', // ... pop directional formatting
494                    '\u202D=>', // ... LTR override
495                    '\u202E=>', // ... RTL override
496                    '\u2066=>', // ... LTR isolate
497                    '\u2067=>', // ... RTL isolate
498                    '\u2068=>', // ... first strong isolate
499                    '\u2069=>', // ... pop directional isolate
500                    '\u2061=>', // ... function application
501                    '\u2062=>', // ... invisible times
502                    '\u2063=>', // ... invisible separator
503                    '\u2064=>', // ... invisible plus
504                    // remove variation selectors 1-16 & 17-256 (below)
505                ]
506            ];
507
508            // add all 256 variation selectors to invisCharMapName
509            for ( $varIdx = 1; $varIdx <= 16; $varIdx++ ) { // 1-16 (FE00-FE0F)
510                $chr = mb_chr( 65023 + $varIdx, 'UTF-8' ); // xFE00 = 65024
511                $config[ 'char_filter' ][ $this->invisCharMapName ][ 'mappings' ][] = "$chr=>";
512            }
513            for ( $varIdx = 17; $varIdx <= 256; $varIdx++ ) { // 17-256 (E0100-E01EF)
514                $chr = mb_chr( 917743 + $varIdx, 'UTF-8' ); // E0100 = 917760
515                $config[ 'char_filter' ][ $this->invisCharMapName ][ 'mappings' ][] = "$chr=>";
516            }
517        }
518
519        if ( $this->elisionName ) {
520            $config[ 'filter' ][ $this->elisionName ] =
521                $this->elisionFilter( $this->elisionArticles, $this->elisionArticleCase );
522        }
523
524        if ( $this->langLowercase ) {
525            $config[ 'filter' ][ 'lowercase' ][ 'language' ] = $this->langLowercase;
526        }
527
528        if ( $this->overrideName ) {
529            $config[ 'filter' ][ $this->overrideName ] =
530                $this->overrideFilter( $this->overrideRules );
531        }
532
533        if ( $this->stopName ) {
534            $config[ 'filter' ][ $this->stopName ] =
535                $this->stopFilterFromList( $this->customStopList );
536        }
537
538        if ( $this->extraStopName ) {
539            $config[ 'filter' ][ $this->extraStopName ] =
540                $this->stopFilterFromList( $this->extraStopList, $this->extraStopIgnoreCase );
541        }
542
543        if ( $this->charFilters ) {
544            $config[ 'analyzer' ][ $this->analyzerName ][ 'char_filter' ] = $this->charFilters;
545        }
546
547        if ( $this->filters ) {
548            $config[ 'analyzer' ][ $this->analyzerName ][ 'filter' ] = $this->filters;
549        }
550
551        if ( $this->stemmerLang && $this->useStemmer ) {
552            $config[ 'filter' ][ $langStem ] =
553                $this->stemmerFilter( $this->stemmerLang );
554        }
555
556        if ( $this->extraStemmerName ) {
557            $config[ 'filter' ][ $this->extraStemmerName ] =
558                $this->stemmerFilter( $this->extraStemmerLang );
559        }
560
561        return $config;
562    }
563
564    /**
565     * Create a pattern_replace filter/char_filter with the mappings provided.
566     *
567     * @param string $pat
568     * @param string $repl
569     * @return mixed[] filter
570     */
571    public static function patternFilter( string $pat, string $repl = '' ): array {
572        return [ 'type' => 'pattern_replace', 'pattern' => $pat, 'replacement' => $repl ];
573    }
574
575    /**
576     * Create a mapping or limited_mapping character filter with the mappings provided.
577     *
578     * @param string[] $mappings
579     * @param bool $limited
580     * @return mixed[] character filter
581     */
582    public static function mappingCharFilter( array $mappings, bool $limited ): array {
583        $type = $limited ? 'limited_mapping' : 'mapping';
584        return [ 'type' => $type, 'mappings' => $mappings ];
585    }
586
587    /**
588     * Create a character filter that maps non-Arabic digits (e.g., ០-៩ or 0-9) to
589     * Arabic digits (0-9). Since they are usually all in a row, we just need the
590     * starting digit (equal to 0).
591     *
592     * Optionally reverse the mapping from Arabic to non-Arabic. For example, the ICU
593     * tokenizer works better on tokenizing Thai digits in Thai text than it does on
594     * Arabic digits.
595     *
596     * @param int $langZero
597     * @param bool $reversed reverse the mapping from Arabic to non-Arabic
598     * @return mixed[] character filter
599     */
600    public static function numberCharFilter( int $langZero, bool $reversed = false ): array {
601        $numMap = [];
602        for ( $i = 0; $i <= 9; $i++ ) {
603            if ( $reversed ) {
604                $numMap[] = sprintf( '%d=>\\u%04x', $i, $langZero + $i );
605            } else {
606                $numMap[] = sprintf( '\\u%04x=>%d', $langZero + $i, $i );
607            }
608        }
609        return self::mappingCharFilter( $numMap, true );
610    }
611
612    /**
613     * Create an elision filter with the "articles" provided; $case determines whether
614     * stripping is case sensitive or not
615     *
616     * @param string[] $articles
617     * @param bool $case
618     * @return mixed[] token filter
619     */
620    public static function elisionFilter( array $articles, bool $case = true ): array {
621        return [ 'type' => 'elision', 'articles_case' => $case, 'articles' => $articles ];
622    }
623
624    /**
625     * Create a stop word filter with the provided config. The config can be an array
626     * of stop words, or a string like _french_ that refers to a pre-defined list.
627     *
628     * @param mixed $stopwords
629     * @param bool|null $ignoreCase
630     * @return mixed[] token filter
631     */
632    public static function stopFilterFromList( $stopwords, ?bool $ignoreCase = null ): array {
633        $retArray = [ 'type' => 'stop', 'stopwords' => $stopwords ];
634        if ( $ignoreCase !== null ) {
635            $retArray['ignore_case'] = $ignoreCase;
636        }
637        return $retArray;
638    }
639
640    /**
641     * Create an stemming override filter with the rules provided, which can be a string
642     * with one rule or an array of such rules
643     *
644     * @param mixed $rules
645     * @return mixed[] token filter
646     */
647    private function overrideFilter( $rules ): array {
648        return [ 'type' => 'stemmer_override', 'rules' => $rules ];
649    }
650
651    /**
652     * Create a stemmer filter with the provided config.
653     *
654     * @param string $stemmer
655     * @return mixed[] token filter
656     */
657    public static function stemmerFilter( string $stemmer ): array {
658        return [ 'type' => 'stemmer', 'language' => $stemmer ];
659    }
660
661}