Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
53.16% |
42 / 79 |
|
33.33% |
6 / 18 |
CRAP | |
0.00% |
0 / 1 |
Connection | |
53.16% |
42 / 79 |
|
33.33% |
6 / 18 |
169.15 | |
0.00% |
0 / 1 |
getPool | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
clearPool | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
__sleep | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getClusterName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSettings | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getServerList | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMaxConnectionAttempts | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getArchiveIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAllIndexSuffixes | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
7 | |||
extractIndexSuffix | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getIndexSuffixForNamespace | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
pickIndexTypeForNamespaces | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
pickIndexSuffixForNamespaces | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
getAllIndexSuffixesForNamespaces | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
destroyClient | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getClusterConnections | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getConfig | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use Exception; |
6 | use LogicException; |
7 | use MediaWiki\Extension\Elastica\ElasticaConnection; |
8 | use MediaWiki\MediaWikiServices; |
9 | use Wikimedia\Assert\Assert; |
10 | |
11 | /** |
12 | * Forms and caches connection to Elasticsearch as well as client objects |
13 | * that contain connection information like \Elastica\Index and \Elastica\Type. |
14 | * |
15 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by |
17 | * the Free Software Foundation; either version 2 of the License, or |
18 | * (at your option) any later version. |
19 | * |
20 | * This program is distributed in the hope that it will be useful, |
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | * GNU General Public License for more details. |
24 | * |
25 | * You should have received a copy of the GNU General Public License along |
26 | * with this program; if not, write to the Free Software Foundation, Inc., |
27 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
28 | * http://www.gnu.org/copyleft/gpl.html |
29 | */ |
30 | class Connection extends ElasticaConnection { |
31 | |
32 | /** |
33 | * Suffix of the index that holds content articles. |
34 | */ |
35 | public const CONTENT_INDEX_SUFFIX = 'content'; |
36 | |
37 | /** |
38 | * Suffix of the index that holds non-content articles. |
39 | */ |
40 | public const GENERAL_INDEX_SUFFIX = 'general'; |
41 | |
42 | /** |
43 | * Suffix of the index that hosts content title suggestions |
44 | */ |
45 | public const TITLE_SUGGEST_INDEX_SUFFIX = 'titlesuggest'; |
46 | |
47 | /** |
48 | * Suffix of the index that hosts archive data |
49 | */ |
50 | public const ARCHIVE_INDEX_SUFFIX = 'archive'; |
51 | |
52 | /** |
53 | * Name of the page document type. |
54 | */ |
55 | public const PAGE_DOC_TYPE = 'page'; |
56 | |
57 | /** |
58 | * Name of the title suggest document type |
59 | */ |
60 | public const TITLE_SUGGEST_DOC_TYPE = 'titlesuggest'; |
61 | |
62 | /** |
63 | * Name of the archive document type |
64 | */ |
65 | public const ARCHIVE_DOC_TYPE = 'archive'; |
66 | |
67 | /** |
68 | * Map of index types (suffix names) indexed by mapping type. |
69 | */ |
70 | private const SUFFIX_MAPPING = [ |
71 | self::PAGE_DOC_TYPE => [ |
72 | self::CONTENT_INDEX_SUFFIX, |
73 | self::GENERAL_INDEX_SUFFIX, |
74 | ], |
75 | self::ARCHIVE_DOC_TYPE => [ |
76 | self::ARCHIVE_INDEX_SUFFIX |
77 | ], |
78 | ]; |
79 | |
80 | /** |
81 | * @var SearchConfig |
82 | */ |
83 | protected $config; |
84 | |
85 | /** |
86 | * @var string |
87 | */ |
88 | protected $cluster; |
89 | |
90 | /** |
91 | * @var ClusterSettings|null |
92 | */ |
93 | private $clusterSettings; |
94 | |
95 | /** |
96 | * @var Connection[][] |
97 | */ |
98 | private static $pool = []; |
99 | |
100 | /** |
101 | * @param SearchConfig $config |
102 | * @param string|null $cluster |
103 | * @return Connection |
104 | */ |
105 | public static function getPool( SearchConfig $config, $cluster = null ) { |
106 | $assignment = $config->getClusterAssignment(); |
107 | $cluster ??= $assignment->getSearchCluster(); |
108 | $wiki = $config->getWikiId(); |
109 | $clusterId = $assignment->uniqueId( $cluster ); |
110 | return self::$pool[$wiki][$clusterId] ?? new self( $config, $cluster ); |
111 | } |
112 | |
113 | /** |
114 | * Pool state must be cleared when forking. Also useful |
115 | * in tests. |
116 | */ |
117 | public static function clearPool() { |
118 | self::$pool = []; |
119 | } |
120 | |
121 | /** |
122 | * @param SearchConfig $config |
123 | * @param string|null $cluster Name of cluster to use, or |
124 | * null for the default cluster. |
125 | */ |
126 | public function __construct( SearchConfig $config, $cluster = null ) { |
127 | $this->config = $config; |
128 | $assignment = $config->getClusterAssignment(); |
129 | $this->cluster = $cluster ?? $assignment->getSearchCluster(); |
130 | $this->setConnectTimeout( $this->getSettings()->getConnectTimeout() ); |
131 | // overwrites previous connection if it exists, but these |
132 | // seemed more centralized than having the entry points |
133 | // all call a static method unnecessarily. |
134 | // TODO: Assumes all $config that return same wiki id have same config, but there |
135 | // are places that expect they can wrap config with new values and use them. |
136 | $clusterId = $assignment->uniqueId( $this->cluster ); |
137 | self::$pool[$config->getWikiId()][$clusterId] = $this; |
138 | } |
139 | |
140 | /** |
141 | * @return never |
142 | */ |
143 | public function __sleep() { |
144 | throw new \RuntimeException( 'Attempting to serialize ES connection' ); |
145 | } |
146 | |
147 | /** |
148 | * @return string |
149 | */ |
150 | public function getClusterName() { |
151 | return $this->cluster; |
152 | } |
153 | |
154 | /** |
155 | * @return ClusterSettings |
156 | */ |
157 | public function getSettings() { |
158 | if ( $this->clusterSettings === null ) { |
159 | $this->clusterSettings = new ClusterSettings( $this->config, $this->cluster ); |
160 | } |
161 | return $this->clusterSettings; |
162 | } |
163 | |
164 | /** |
165 | * @return string[]|array[] Either a list of hostnames, for default |
166 | * connection configuration or an array of arrays giving full connection |
167 | * specifications. |
168 | */ |
169 | public function getServerList() { |
170 | return $this->config->getClusterAssignment()->getServerList( $this->cluster ); |
171 | } |
172 | |
173 | /** |
174 | * How many times can we attempt to connect per host? |
175 | * |
176 | * @return int |
177 | */ |
178 | public function getMaxConnectionAttempts() { |
179 | return $this->config->get( 'CirrusSearchConnectionAttempts' ); |
180 | } |
181 | |
182 | /** |
183 | * Fetch the Elastica Index for archive. |
184 | * @param mixed $name basename of index |
185 | * @return \Elastica\Index |
186 | */ |
187 | public function getArchiveIndex( $name ) { |
188 | return $this->getIndex( $name, self::ARCHIVE_INDEX_SUFFIX ); |
189 | } |
190 | |
191 | /** |
192 | * Get all index types we support, content, general, plus custom ones |
193 | * |
194 | * @param string|null $documentType the document type name the index must support to be returned |
195 | * can be self::PAGE_DOC_TYPE for content and general indices but also self::ARCHIVE_DOC_TYPE |
196 | * for the archive index. Defaults to Connection::PAGE_DOC_TYPE. |
197 | * set to null to return all known index types (only suited for maintenance tasks, not for read/write operations). |
198 | * @return string[] |
199 | */ |
200 | public function getAllIndexSuffixes( $documentType = self::PAGE_DOC_TYPE ) { |
201 | Assert::parameter( $documentType === null || isset( self::SUFFIX_MAPPING[$documentType] ), |
202 | '$documentType', "Unknown mapping type $documentType" ); |
203 | $indexSuffixes = []; |
204 | |
205 | if ( $documentType === null ) { |
206 | foreach ( self::SUFFIX_MAPPING as $types ) { |
207 | $indexSuffixes = array_merge( $indexSuffixes, $types ); |
208 | } |
209 | $indexSuffixes = array_merge( |
210 | $indexSuffixes, |
211 | array_values( $this->config->get( 'CirrusSearchNamespaceMappings' ) ) |
212 | ); |
213 | } else { |
214 | $indexSuffixes = array_merge( |
215 | $indexSuffixes, |
216 | self::SUFFIX_MAPPING[$documentType], |
217 | $documentType === self::PAGE_DOC_TYPE ? |
218 | array_values( $this->config->get( 'CirrusSearchNamespaceMappings' ) ) : [] |
219 | ); |
220 | } |
221 | |
222 | if ( !$this->getSettings()->isPrivateCluster() |
223 | || !$this->config->get( 'CirrusSearchEnableArchive' ) |
224 | ) { |
225 | $indexSuffixes = array_filter( $indexSuffixes, static function ( $type ) { |
226 | return $type !== self::ARCHIVE_INDEX_SUFFIX; |
227 | } ); |
228 | } |
229 | |
230 | return $indexSuffixes; |
231 | } |
232 | |
233 | /** |
234 | * @param string $name |
235 | * @return string |
236 | * @throws Exception |
237 | */ |
238 | public function extractIndexSuffix( $name ) { |
239 | $matches = []; |
240 | $possible = implode( '|', array_map( 'preg_quote', $this->getAllIndexSuffixes( null ) ) ); |
241 | if ( !preg_match( "/_($possible)_[^_]+$/", $name, $matches ) ) { |
242 | throw new LogicException( "Can't parse index name: $name" ); |
243 | } |
244 | |
245 | return $matches[1]; |
246 | } |
247 | |
248 | /** |
249 | * Get the index suffix for a given namespace |
250 | * @param int $namespace A namespace id |
251 | * @return string |
252 | */ |
253 | public function getIndexSuffixForNamespace( $namespace ) { |
254 | $mappings = $this->config->get( 'CirrusSearchNamespaceMappings' ); |
255 | if ( isset( $mappings[$namespace] ) ) { |
256 | return $mappings[$namespace]; |
257 | } |
258 | $defaultSearch = $this->config->get( 'NamespacesToBeSearchedDefault' ); |
259 | if ( isset( $defaultSearch[$namespace] ) && $defaultSearch[$namespace] ) { |
260 | return self::CONTENT_INDEX_SUFFIX; |
261 | } |
262 | |
263 | return MediaWikiServices::getInstance()->getNamespaceInfo()->isContent( $namespace ) ? |
264 | self::CONTENT_INDEX_SUFFIX : self::GENERAL_INDEX_SUFFIX; |
265 | } |
266 | |
267 | /** |
268 | * @param int[]|null $namespaces List of namespaces to check |
269 | * @return string|false The suffix to use (e.g. content or general) to |
270 | * query the namespaces, or false if both need to be queried. |
271 | * @deprecated 1.38 Use self::pickIndexSuffixForNamespaces |
272 | */ |
273 | public function pickIndexTypeForNamespaces( ?array $namespaces = null ) { |
274 | return $this->pickIndexSuffixForNamespaces( $namespaces ); |
275 | } |
276 | |
277 | /** |
278 | * @param int[]|null $namespaces List of namespaces to check |
279 | * @return string|false The suffix to use (e.g. content or general) to |
280 | * query the namespaces, or false if all need to be queried. |
281 | */ |
282 | public function pickIndexSuffixForNamespaces( ?array $namespaces = null ) { |
283 | $indexSuffixes = []; |
284 | if ( $namespaces ) { |
285 | foreach ( $namespaces as $namespace ) { |
286 | $indexSuffixes[] = $this->getIndexSuffixForNamespace( $namespace ); |
287 | } |
288 | $indexSuffixes = array_unique( $indexSuffixes ); |
289 | } |
290 | if ( count( $indexSuffixes ) === 1 ) { |
291 | return $indexSuffixes[0]; |
292 | } else { |
293 | return false; |
294 | } |
295 | } |
296 | |
297 | /** |
298 | * @param int[]|null $namespaces List of namespaces to check |
299 | * @return string[] the list of all index suffixes mathing the namespaces |
300 | */ |
301 | public function getAllIndexSuffixesForNamespaces( $namespaces = null ) { |
302 | if ( $namespaces ) { |
303 | $indexSuffixes = []; |
304 | foreach ( $namespaces as $namespace ) { |
305 | $indexSuffixes[] = $this->getIndexSuffixForNamespace( $namespace ); |
306 | } |
307 | return array_unique( $indexSuffixes ); |
308 | } |
309 | // If no namespaces provided all indices are needed |
310 | $mappings = $this->config->get( 'CirrusSearchNamespaceMappings' ); |
311 | return array_merge( self::SUFFIX_MAPPING[self::PAGE_DOC_TYPE], |
312 | array_values( $mappings ) ); |
313 | } |
314 | |
315 | public function destroyClient() { |
316 | self::$pool = []; |
317 | parent::destroyClient(); |
318 | } |
319 | |
320 | /** |
321 | * @param string[] $clusters array of cluster names |
322 | * @param SearchConfig $config the search config |
323 | * @return Connection[] array of connection indexed by cluster name |
324 | */ |
325 | public static function getClusterConnections( array $clusters, SearchConfig $config ) { |
326 | $connections = []; |
327 | foreach ( $clusters as $name ) { |
328 | $connections[$name] = self::getPool( $config, $name ); |
329 | } |
330 | return $connections; |
331 | } |
332 | |
333 | /** |
334 | * @return SearchConfig |
335 | */ |
336 | public function getConfig() { |
337 | return $this->config; |
338 | } |
339 | } |