Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
87.80% |
36 / 41 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
LabelsField | |
87.80% |
36 / 41 |
|
50.00% |
2 / 4 |
14.36 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getMapping | |
82.61% |
19 / 23 |
|
0.00% |
0 / 1 |
4.08 | |||
getFieldData | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
getEngineHints | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | namespace Wikibase\Search\Elastic\Fields; |
3 | |
4 | use CirrusSearch\CirrusSearch; |
5 | use SearchEngine; |
6 | use Wikibase\DataModel\Entity\EntityDocument; |
7 | use Wikibase\DataModel\Term\AliasesProvider; |
8 | use Wikibase\DataModel\Term\LabelsProvider; |
9 | |
10 | /** |
11 | * Field which contains per-language specific labels. |
12 | * |
13 | * @license GPL-2.0-or-later |
14 | * @author Stas Malyshev |
15 | */ |
16 | class LabelsField extends TermIndexField { |
17 | |
18 | /** |
19 | * Field name |
20 | */ |
21 | public const NAME = "labels"; |
22 | |
23 | /** |
24 | * List of available languages |
25 | * @var string[] |
26 | */ |
27 | private $languages; |
28 | |
29 | /** |
30 | * @var array |
31 | */ |
32 | private $stemmingSettings; |
33 | |
34 | /** |
35 | * @param string[] $languages |
36 | */ |
37 | public function __construct( array $languages, array $stemmingSettings ) { |
38 | $this->languages = $languages; |
39 | parent::__construct( self::NAME, \SearchIndexField::INDEX_TYPE_NESTED ); |
40 | $this->stemmingSettings = $stemmingSettings; |
41 | } |
42 | |
43 | /** |
44 | * @param SearchEngine $engine |
45 | * @return array |
46 | */ |
47 | public function getMapping( SearchEngine $engine ) { |
48 | // Since we need a specially tuned field, we can not use |
49 | // standard search engine types. |
50 | if ( !( $engine instanceof CirrusSearch ) ) { |
51 | // For now only Cirrus/Elastic is supported |
52 | return []; |
53 | } |
54 | |
55 | $config = [ |
56 | 'type' => 'object', |
57 | 'properties' => [] |
58 | ]; |
59 | foreach ( $this->languages as $language ) { |
60 | if ( empty( $this->stemmingSettings[$language]['index'] ) ) { |
61 | $langConfig = $this->getUnindexedField(); |
62 | } else { |
63 | $langConfig = $this->getTokenizedSubfield( $engine->getConfig(), |
64 | $language . '_text', |
65 | $language . '_text_search' |
66 | ); |
67 | } |
68 | |
69 | $langConfig['fields']['prefix'] = |
70 | $this->getSubfield( 'prefix_asciifolding', 'near_match_asciifolding' ); |
71 | $langConfig['fields']['near_match_folded'] = |
72 | $this->getSubfield( 'near_match_asciifolding' ); |
73 | $langConfig['fields']['near_match'] = $this->getSubfield( 'near_match' ); |
74 | // This one is for full-text search, will tokenize |
75 | // TODO: here we probably will need better language-specific analyzers |
76 | $langConfig['fields']['plain'] = $this->getTokenizedSubfield( $engine->getConfig(), |
77 | $language . '_plain', $language . '_plain_search' ); |
78 | // All labels are copies to labels_all |
79 | $langConfig['copy_to'] = 'labels_all'; |
80 | |
81 | $config['properties'][$language] = $langConfig; |
82 | } |
83 | |
84 | return $config; |
85 | } |
86 | |
87 | /** |
88 | * @param EntityDocument $entity |
89 | * |
90 | * @return mixed Get the value of the field to be indexed when a page/document |
91 | * is indexed. This might be an array with nested data, if the field |
92 | * is defined with nested type or an int or string for simple field types. |
93 | */ |
94 | public function getFieldData( EntityDocument $entity ) { |
95 | if ( !( $entity instanceof LabelsProvider ) ) { |
96 | return null; |
97 | } |
98 | $data = []; |
99 | foreach ( $entity->getLabels() as $language => $label ) { |
100 | $data[$language][] = $label->getText(); |
101 | } |
102 | if ( $entity instanceof AliasesProvider ) { |
103 | foreach ( $entity->getAliasGroups() as $aliases ) { |
104 | $language = $aliases->getLanguageCode(); |
105 | if ( !isset( $data[$language] ) ) { |
106 | $data[$language][] = ''; |
107 | } |
108 | $data[$language] = array_merge( $data[$language], $aliases->getAliases() ); |
109 | } |
110 | } |
111 | // Shouldn't return empty arrays, that will be encoded to json as an |
112 | // empty list instead of an empty map. Elastic doesn't mind, but this |
113 | // allows more consistency working with the resulting cirrus docs |
114 | return $data ?: null; |
115 | } |
116 | |
117 | /** |
118 | * Set engine hints. |
119 | * Specifically, sets noop hint so that labels would be compared |
120 | * as arrays and removal of labels would be processed correctly. |
121 | * @param SearchEngine $engine |
122 | * @return array |
123 | */ |
124 | public function getEngineHints( SearchEngine $engine ) { |
125 | if ( !( $engine instanceof CirrusSearch ) ) { |
126 | // For now only Cirrus/Elastic is supported |
127 | return []; |
128 | } |
129 | return [ \CirrusSearch\Search\CirrusIndexField::NOOP_HINT => "equals" ]; |
130 | } |
131 | |
132 | } |