Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
25.84% |
23 / 89 |
|
87.50% |
7 / 8 |
CRAP | |
0.00% |
0 / 1 |
PhraseSuggesterProfileRepoWrapper | |
25.84% |
23 / 89 |
|
87.50% |
7 / 8 |
1111.72 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
fromFile | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fromConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
repositoryType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
repositoryName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getProfile | |
18.52% |
15 / 81 |
|
0.00% |
0 / 1 |
1091.33 | |||
hasProfile | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
listExposedProfiles | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Profile; |
4 | |
5 | use CirrusSearch\Util; |
6 | use MediaWiki\Config\Config; |
7 | use Wikimedia\ObjectCache\BagOStuff; |
8 | |
9 | /** |
10 | * Wrapper to augment the phrase suggester profile settings |
11 | * with customization on-wiki using system messages. |
12 | */ |
13 | class PhraseSuggesterProfileRepoWrapper implements SearchProfileRepository { |
14 | |
15 | private const MAX_ERRORS_HARD_LIMIT = 2; |
16 | private const MAX_TERM_FREQ_HARD_LIMIT = 0.6; |
17 | private const PREFIX_LENGTH_HARD_LIMIT = 2; |
18 | public const CIRRUSSEARCH_DIDYOUMEAN_SETTINGS = 'cirrussearch-didyoumean-settings'; |
19 | |
20 | /** |
21 | * @var string[] |
22 | */ |
23 | private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ]; |
24 | |
25 | /** |
26 | * @var SearchProfileRepository |
27 | */ |
28 | private $wrapped; |
29 | |
30 | /** |
31 | * @var BagOStuff |
32 | */ |
33 | private $bagOStuff; |
34 | |
35 | /** |
36 | * @param SearchProfileRepository $wrapped |
37 | * @param BagOStuff $bagOStuff |
38 | */ |
39 | public function __construct( SearchProfileRepository $wrapped, BagOStuff $bagOStuff ) { |
40 | $this->wrapped = $wrapped; |
41 | $this->bagOStuff = $bagOStuff; |
42 | } |
43 | |
44 | /** |
45 | * @param string $type |
46 | * @param string $name |
47 | * @param string $phpFile |
48 | * @param BagOStuff $cache |
49 | * @return SearchProfileRepository |
50 | */ |
51 | public static function fromFile( $type, $name, $phpFile, BagOStuff $cache ) { |
52 | return new self( ArrayProfileRepository::fromFile( $type, $name, $phpFile ), $cache ); |
53 | } |
54 | |
55 | /** |
56 | * @param string $type |
57 | * @param string $name |
58 | * @param string $configEntry |
59 | * @param Config $config |
60 | * @param BagOStuff $cache |
61 | * @return PhraseSuggesterProfileRepoWrapper |
62 | */ |
63 | public static function fromConfig( $type, $name, $configEntry, Config $config, BagOStuff $cache ) { |
64 | return new self( new ConfigProfileRepository( $type, $name, $configEntry, $config ), $cache ); |
65 | } |
66 | |
67 | /** |
68 | * The repository type |
69 | * @return string |
70 | */ |
71 | public function repositoryType() { |
72 | return $this->wrapped->repositoryType(); |
73 | } |
74 | |
75 | /** |
76 | * The repository name |
77 | * @return string |
78 | */ |
79 | public function repositoryName() { |
80 | return $this->wrapped->repositoryName(); |
81 | } |
82 | |
83 | /** |
84 | * Load a profile named $name |
85 | * @param string $name |
86 | * @return array|null the profile data or null if not found |
87 | */ |
88 | public function getProfile( $name ) { |
89 | $settings = $this->wrapped->getProfile( $name ); |
90 | if ( $settings === null ) { |
91 | return null; |
92 | } |
93 | $lines = $this->bagOStuff->getWithSetCallback( |
94 | $this->bagOStuff->makeKey( self::CIRRUSSEARCH_DIDYOUMEAN_SETTINGS ), |
95 | 600, |
96 | static function () { |
97 | $source = wfMessage( 'cirrussearch-didyoumean-settings' )->inContentLanguage(); |
98 | if ( $source->isDisabled() ) { |
99 | return []; |
100 | } |
101 | return Util::parseSettingsInMessage( $source->plain() ); |
102 | } |
103 | ); |
104 | |
105 | $laplaceAlpha = null; |
106 | $stupidBackoffDiscount = null; |
107 | foreach ( $lines as $line ) { |
108 | $linePieces = explode( ':', $line, 2 ); |
109 | if ( count( $linePieces ) !== 2 ) { |
110 | // Skip improperly formatted lines without a key:value |
111 | continue; |
112 | } |
113 | [ $k, $v ] = $linePieces; |
114 | |
115 | switch ( $k ) { |
116 | case 'max_errors': |
117 | if ( is_numeric( $v ) && $v >= 1 && $v <= self::MAX_ERRORS_HARD_LIMIT ) { |
118 | $settings['max_errors'] = floatval( $v ); |
119 | } |
120 | break; |
121 | case 'confidence': |
122 | if ( is_numeric( $v ) && $v >= 0 ) { |
123 | $settings['confidence'] = floatval( $v ); |
124 | } |
125 | break; |
126 | case 'max_term_freq': |
127 | if ( is_numeric( $v ) && $v >= 0 && $v <= self::MAX_TERM_FREQ_HARD_LIMIT ) { |
128 | $settings['max_term_freq'] = floatval( $v ); |
129 | } |
130 | break; |
131 | case 'min_doc_freq': |
132 | if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) { |
133 | $settings['min_doc_freq'] = floatval( $v ); |
134 | } |
135 | break; |
136 | case 'prefix_length': |
137 | if ( is_numeric( $v ) && $v >= 0 && $v <= self::PREFIX_LENGTH_HARD_LIMIT ) { |
138 | $settings['prefix_length'] = intval( $v ); |
139 | } |
140 | break; |
141 | case 'suggest_mode': |
142 | if ( in_array( $v, self::$ALLOWED_MODE ) ) { |
143 | $settings['mode'] = $v; |
144 | } |
145 | break; |
146 | case 'collate': |
147 | if ( $v === 'true' ) { |
148 | $settings['collate'] = true; |
149 | } elseif ( $v === 'false' ) { |
150 | $settings['collate'] = false; |
151 | } |
152 | break; |
153 | case 'smoothing': |
154 | if ( $v === 'laplace' ) { |
155 | $settings['smoothing_model'] = [ |
156 | 'laplace' => [ |
157 | 'alpha' => 0.5 |
158 | ] |
159 | ]; |
160 | } elseif ( $v === 'stupid_backoff' ) { |
161 | $settings['smoothing_model'] = [ |
162 | 'stupid_backoff' => [ |
163 | 'discount' => 0.4 |
164 | ] |
165 | ]; |
166 | } |
167 | break; |
168 | case 'laplace_alpha': |
169 | if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { |
170 | $laplaceAlpha = floatval( $v ); |
171 | } |
172 | break; |
173 | case 'stupid_backoff_discount': |
174 | if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { |
175 | $stupidBackoffDiscount = floatval( $v ); |
176 | } |
177 | break; |
178 | } |
179 | } |
180 | |
181 | // Apply smoothing model options, if none provided we'll use elasticsearch defaults |
182 | if ( isset( $settings['smoothing_model']['laplace'] ) && $laplaceAlpha !== null ) { |
183 | $settings['smoothing_model']['laplace'] = [ |
184 | 'alpha' => $laplaceAlpha |
185 | ]; |
186 | } |
187 | if ( isset( $settings['smoothing_model']['stupid_backoff'] ) && $stupidBackoffDiscount !== null ) { |
188 | $settings['smoothing_model']['stupid_backoff'] = [ |
189 | 'discount' => $stupidBackoffDiscount |
190 | ]; |
191 | } |
192 | return $settings; |
193 | } |
194 | |
195 | /** |
196 | * Check if a profile named $name exists in this repository |
197 | * @param string $name |
198 | * @return bool |
199 | */ |
200 | public function hasProfile( $name ) { |
201 | return $this->wrapped->hasProfile( $name ); |
202 | } |
203 | |
204 | /** |
205 | * Get the list of profiles that we want to expose to the user. |
206 | * |
207 | * @return array[] list of profiles index by name |
208 | */ |
209 | public function listExposedProfiles() { |
210 | return $this->wrapped->listExposedProfiles(); |
211 | } |
212 | } |