Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
99.40% covered (success)
99.40%
1002 / 1008
76.92% covered (warning)
76.92%
20 / 26
CRAP
0.00% covered (danger)
0.00%
0 / 1
AnalysisConfigBuilder
99.40% covered (success)
99.40%
1002 / 1008
76.92% covered (warning)
76.92%
20 / 26
215
0.00% covered (danger)
0.00%
0 / 1
 __construct
96.30% covered (success)
96.30%
26 / 27
0.00% covered (danger)
0.00%
0 / 1
8
 shouldActivateIcuFolding
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
9
 shouldActivateIcuTokenization
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
7
 buildConfig
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
4
 buildSimilarityConfig
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 enableICUTokenizer
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
6
 standardTokenizerOnlyCleanup
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
6
 disableLimitedMappings
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
4
 enableICUFolding
100.00% covered (success)
100.00%
32 / 32
100.00% covered (success)
100.00%
1 / 1
12
 switchFiltersToICUFolding
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 switchFiltersToICUFoldingPreserve
94.44% covered (success)
94.44%
17 / 18
0.00% covered (danger)
0.00%
0 / 1
7.01
 getICUSetFilter
98.00% covered (success)
98.00%
49 / 50
0.00% covered (danger)
0.00%
0 / 1
29
 getICUNormSetFilter
80.00% covered (warning)
80.00%
4 / 5
0.00% covered (danger)
0.00%
0 / 1
4.13
 defaults
100.00% covered (success)
100.00%
286 / 286
100.00% covered (success)
100.00%
1 / 1
7
 customize
100.00% covered (success)
100.00%
424 / 424
100.00% covered (success)
100.00%
1 / 1
68
 fixAsciiFolding
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
7
 getDefaultTextAnalyzerType
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getDefaultFilters
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
5
 resolveFilters
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
 replaceFilter
87.50% covered (warning)
87.50%
7 / 8
0.00% covered (danger)
0.00%
0 / 1
4.03
 mergeConfig
100.00% covered (success)
100.00%
17 / 17
100.00% covered (success)
100.00%
1 / 1
12
 buildLanguageConfigs
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
 isIcuAvailable
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isTextifyAvailable
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 enableGlobalCustomFilters
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 buildGlobalCustomFilters
100.00% covered (success)
100.00%
31 / 31
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Maintenance;
4
5use CirrusSearch\CirrusSearch;
6use CirrusSearch\CirrusSearchHookRunner;
7use CirrusSearch\Profile\SearchProfileService;
8use CirrusSearch\SearchConfig;
9use MediaWiki\MediaWikiServices;
10
11/**
12 * Builds elasticsearch analysis config arrays.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 * http://www.gnu.org/copyleft/gpl.html
28 */
29class AnalysisConfigBuilder {
30    /**
31     * Version number for the core analysis. Increment the major
32     * version when the analysis changes in an incompatible way,
33     * and change the minor version when it changes but isn't
34     * incompatible.
35     *
36     * You may also need to increment MetaStoreIndex::METASTORE_VERSION
37     * manually as well.
38     */
39    public const VERSION = '0.12';
40
41    /**
42     * Maximum number of characters allowed in keyword terms.
43     */
44    private const KEYWORD_IGNORE_ABOVE = 5000;
45
46    /**
47     * Temporary magic value to prevent enabling ICU tokenizer in specific analyzers
48     */
49    private const STANDARD_TOKENIZER_ONLY = 'std_only';
50
51    /**
52     * @var bool is the icu plugin available?
53     */
54    private $icu;
55
56    /**
57     * @var bool is the textify plugin available?
58     */
59    private $textify;
60
61    /**
62     * @var string which ICU tokenizer should be used
63     */
64    private $icu_tokenizer = 'icu_tokenizer';
65
66    /**
67     * @var array Similarity algo (tf/idf, bm25, etc) configuration
68     */
69    private $similarity;
70
71    /**
72     * @var SearchConfig cirrus config
73     */
74    protected $config;
75
76    /**
77     * @var string[]
78     */
79    private $plugins;
80
81    /**
82     * @var string
83     */
84    protected $defaultLanguage;
85
86    /**
87     * @var CirrusSearchHookRunner
88     */
89    private $cirrusSearchHookRunner;
90
91    /**
92     * @var GlobalCustomFilter[]
93     */
94    public $globalCustomFilters;
95
96    /**
97     * @param string $langCode The language code to build config for
98     * @param string[] $plugins list of plugins installed in Elasticsearch
99     * @param SearchConfig|null $config
100     * @param CirrusSearchHookRunner|null $cirrusSearchHookRunner
101     */
102    public function __construct(
103        $langCode,
104        array $plugins,
105        SearchConfig $config = null,
106        CirrusSearchHookRunner $cirrusSearchHookRunner = null
107    ) {
108        $this->globalCustomFilters = $this->buildGlobalCustomFilters();
109
110        $this->defaultLanguage = $langCode;
111        $this->plugins = $plugins;
112        foreach ( $this->elasticsearchLanguageAnalyzersFromPlugins as $pluginSpec => $extra ) {
113            $pluginsPresent = 1;
114            $pluginList = explode( ',', $pluginSpec );
115            foreach ( $pluginList as $plugin ) {
116                if ( !in_array( $plugin, $plugins ) ) {
117                    $pluginsPresent = 0;
118                    break;
119                }
120            }
121            if ( $pluginsPresent ) {
122                $this->elasticsearchLanguageAnalyzers =
123                    array_merge( $this->elasticsearchLanguageAnalyzers, $extra );
124            }
125        }
126        $this->icu = in_array( 'analysis-icu', $plugins );
127        $this->textify = in_array( 'extra-analysis-textify', $plugins );
128        if ( $this->isTextifyAvailable() ) {
129            // icu_token_repair can only work with the textify icu_tokenizer clone
130            $this->icu_tokenizer = 'textify_icu_tokenizer';
131        }
132        $config ??= MediaWikiServices::getInstance()->getConfigFactory()
133            ->makeConfig( 'CirrusSearch' );
134        $similarity = $config->getProfileService()->loadProfile( SearchProfileService::SIMILARITY );
135        if ( !array_key_exists( 'similarity', $similarity ) ) {
136            $similarity['similarity'] = [];
137        }
138        $this->cirrusSearchHookRunner = $cirrusSearchHookRunner ?: new CirrusSearchHookRunner(