Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
74 / 74
100.00% covered (success)
100.00%
20 / 20
CRAP
100.00% covered (success)
100.00%
1 / 1
GlobalCustomFilter
100.00% covered (success)
100.00%
74 / 74
100.00% covered (success)
100.00%
20 / 20
46
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setLanguageDenyList
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setLanguageAllowList
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setRequiredPlugins
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setFallbackFilter
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setApplyToAnalyzers
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getApplyToAnalyzers
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setRequiredTokenizer
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setDisallowedTokenizers
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setDisallowedTokenFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setDisallowedCharFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setMustFollowFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 enableGlobalCustomFilters
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
7
 languageCheck
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
4
 pluginsAvailable
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
3
 requiredTokenizerUsed
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
4
 disallowedComponentsPresent
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
3
 disallowedComponentCheck
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 analyzerCheck
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
6
 insertGlobalCustomFilter
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace CirrusSearch\Maintenance;
4
5class GlobalCustomFilter {
6    /** @var string filter type, probably 'filter' or 'char_filter'; 'filter' by default */
7    private $type;
8
9    /** @var string[] which analyzers to apply to; 'text' and 'text_search' by default */
10    private $applyToAnalyzers = [ 'text', 'text_search' ];
11
12    /** @var string[] languages where this filter should not be used, by language codes */
13    private $languageDenyList = [];
14
15    /** @var string[] only languages where this filter should be used, by language codes */
16    private $languageAllowList = [];
17
18    /** @var string[] plugins that must be present to use the filter */
19    private $requiredPlugins = [];
20
21    /** @var string local filter to use instead if requiredPlugins are not available */
22    private $fallbackFilter = '';
23
24    /** @var string tokenizer that must be present to use the filter */
25    private $requiredTokenizer = '';
26
27    /** @var string[] tokenizers with which the filter is not allowed/needed */
28    private $disallowedTokenizers = [];
29
30    /** @var string[] token filters with which the filter is not allowed/needed */
31    private $disallowedTokenFilters = [];
32
33    /** @var string[] character filters with which the filter is not allowed/needed */
34    private $disallowedCharFilters = [];
35
36    /** @var string[] filters this one must come after. see T268730 */
37    private $mustFollowFilters = [];
38
39    public function __construct( string $type = 'filter' ) {
40        $this->type = $type;
41    }
42
43    /**
44     * @param string[] $languageDenyList
45     * @return self
46     */
47    public function setLanguageDenyList( array $languageDenyList ): self {
48        $this->languageDenyList = $languageDenyList;
49        return $this;
50    }
51
52    /**
53     * @param string[] $languageAllowList
54     * @return self
55     */
56    public function setLanguageAllowList( array $languageAllowList ): self {
57        $this->languageAllowList = $languageAllowList;
58        return $this;
59    }
60
61    /**
62     * @param string[] $requiredPlugins
63     * @return self
64     */
65    public function setRequiredPlugins( array $requiredPlugins ): self {
66        $this->requiredPlugins = $requiredPlugins;
67        return $this;
68    }
69
70    public function setFallbackFilter( string $fallbackFilter ): self {
71        $this->fallbackFilter = $fallbackFilter;
72        return $this;
73    }
74
75    /**
76     * @param string[] $applyToAnalyzers
77     * @return self
78     */
79    public function setApplyToAnalyzers( array $applyToAnalyzers ): self {
80        $this->applyToAnalyzers = $applyToAnalyzers;
81        return $this;
82    }
83
84    /**
85     * @return string[]
86     */
87    public function getApplyToAnalyzers() {
88        return $this->applyToAnalyzers;
89    }
90
91    public function setRequiredTokenizer( string $requiredTokenizer ): self {
92        $this->requiredTokenizer = $requiredTokenizer;
93        return $this;
94    }
95
96    /**
97     * @param string[] $disallowedTokenizers
98     * @return self
99     */
100    public function setDisallowedTokenizers( array $disallowedTokenizers ): self {
101        $this->disallowedTokenizers = $disallowedTokenizers;
102        return $this;
103    }
104
105    /**
106     * @param string[] $disallowedTokenFilters
107     * @return self
108     */
109    public function setDisallowedTokenFilters( array $disallowedTokenFilters ): self {
110        $this->disallowedTokenFilters = $disallowedTokenFilters;
111        return $this;
112    }
113
114    /**
115     * @param string[] $disallowedCharFilters
116     * @return self
117     */
118    public function setDisallowedCharFilters( array $disallowedCharFilters ): self {
119        $this->disallowedCharFilters = $disallowedCharFilters;
120        return $this;
121    }
122
123    /**
124     * @param string[] $mustFollowFilters
125     * @return self
126     */
127    public function setMustFollowFilters( array $mustFollowFilters ): self {
128        $this->mustFollowFilters = $mustFollowFilters;
129        return $this;
130    }
131
132    /**
133     * update languages with global custom filters (e.g., homoglyph & nnbsp filters)
134     *
135     * @param mixed[] $config
136     * @param string $language
137     * @param GlobalCustomFilter[] $customFilters list of filters and info
138     * @param string[] $installedPlugins
139     * @return mixed[] updated config
140     */
141    public static function enableGlobalCustomFilters( array $config, string $language,
142            array $customFilters, array $installedPlugins ) {
143        foreach ( $customFilters as $filterName => $gcfInfo ) {
144            if ( !$gcfInfo->languageCheck( $language ) ) {
145                continue;
146            }
147
148            if ( !$gcfInfo->pluginsAvailable( $installedPlugins ) ) {
149                if ( $gcfInfo->fallbackFilter ) {
150                    $filterName = $gcfInfo->fallbackFilter;
151                } else {
152                    continue;
153                }
154            }
155
156            foreach ( $gcfInfo->getApplyToAnalyzers() as $analyzer ) {
157                if ( $gcfInfo->analyzerCheck( $config, $analyzer, $filterName ) ) {
158                    $config = $gcfInfo->insertGlobalCustomFilter( $config, $analyzer,
159                        $filterName );
160                }
161            }
162        }
163
164        return $config;
165    }
166
167    /**
168     * check language deny and allow lists to see if this filter is allowed in this
169     * analyzer
170     *
171     * @param string $language
172     * @return bool
173     */
174    private function languageCheck( string $language ): bool {
175        if ( in_array( $language, $this->languageDenyList )
176             || ( $this->languageAllowList &&
177                !in_array( $language, $this->languageAllowList ) )
178            ) {
179             return false;
180        }
181        return true;
182    }
183
184    /**
185     * check to see if the filter is compatible with the set of installed plugins
186     *
187     * @param string[] $installedPlugins
188     * @return bool
189     */
190    private function pluginsAvailable( array $installedPlugins ): bool {
191        foreach ( $this->requiredPlugins as $reqPlugin ) {
192            if ( !Plugins::contains( $reqPlugin, $installedPlugins ) ) {
193                return false;
194            }
195        }
196        return true;
197    }
198
199    /**
200     * check to see if the filter is compatible with the configured tokenizer
201     *
202     * @param mixed[] $analyzerConfig
203     * @return bool
204     */
205    private function requiredTokenizerUsed( array $analyzerConfig ): bool {
206        if ( $this->requiredTokenizer ) {
207            if ( !array_key_exists( 'tokenizer', $analyzerConfig ) ||
208                    $analyzerConfig[ 'tokenizer' ] != $this->requiredTokenizer ) {
209                return false;
210            }
211        }
212        return true;
213    }
214
215    /**
216     * check if any disqualifying analysis components are already presentin the config
217     *
218     * @param mixed[] $config
219     * @param string $analyzer
220     * @return bool
221     */
222    private function disallowedComponentsPresent( array $config, string $analyzer ): bool {
223        return $this->disallowedComponentCheck( $config, $analyzer, 'filter', $this->disallowedTokenFilters )
224            || $this->disallowedComponentCheck( $config, $analyzer, 'char_filter', $this->disallowedCharFilters )
225            || $this->disallowedComponentCheck( $config, $analyzer, 'tokenizer', $this->disallowedTokenizers );
226    }
227
228    /**
229     * check for specific disqualifying analysis components
230     *
231     * @param mixed[] $config
232     * @param string $analyzer
233     * @param string $component
234     * @param string[] $disallowed
235     * @return bool
236     */
237    private function disallowedComponentCheck( array $config, string $analyzer,
238            string $component, array $disallowed ): bool {
239        $component_arr = (array)( $config['analyzer'][$analyzer][$component] ?? [] );
240        foreach ( $disallowed as $diss ) {
241            if ( in_array( $diss, $component_arr ) ) {
242                return true;
243            }
244        }
245        return false;
246    }
247
248    /**
249     * check that the analyzer checks all the boxes to insert this filter
250     *
251     * @param mixed[] $config
252     * @param string $analyzer
253     * @param string $filterName filter we want to add
254     * @return bool
255     */
256    private function analyzerCheck( array $config, string $analyzer,
257            string $filterName ): bool {
258        $filters = $config['analyzer'][$analyzer][$this->type] ?? [];
259
260        if ( !array_key_exists( $analyzer, $config['analyzer'] ) // array exists
261            || $config['analyzer'][$analyzer]['type'] != 'custom' // array is custom
262            || !$this->requiredTokenizerUsed( $config['analyzer'][$analyzer] )
263            || $this->disallowedComponentsPresent( $config, $analyzer )
264            || in_array( $filterName, $filters ) // not a duplicate
265            ) {
266            return false;
267        }
268
269        return true;
270    }
271
272    /**
273     * insert one of the global custom filters into the right spot in the analysis chain
274     *
275     * @param mixed[] $config the analysis config we are modifying
276     * @param string $analyzer the specifc analyzer we are modifying
277     * @param string $filterName filter to add
278     * @return mixed[] updated config
279     */
280    private function insertGlobalCustomFilter( array $config, string $analyzer,
281            string $filterName ) {
282        $filters = $config['analyzer'][$analyzer][$this->type] ?? [];
283
284        $lastMustFollow = -1;
285        foreach ( $this->mustFollowFilters as $mustFollow ) {
286            $mustFollowIdx = array_keys( $filters, $mustFollow );
287            $mustFollowIdx = end( $mustFollowIdx );
288            if ( $mustFollowIdx !== false && $mustFollowIdx > $lastMustFollow ) {
289                $lastMustFollow = $mustFollowIdx;
290            }
291        }
292        array_splice( $filters, $lastMustFollow + 1, 0, $filterName );
293
294        $config['analyzer'][$analyzer][$this->type] = $filters;
295
296        return $config;
297    }
298
299}