Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.89% |
32 / 36 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
LabelsField | |
88.89% |
32 / 36 |
|
66.67% |
2 / 3 |
12.20 | |
0.00% |
0 / 1 |
getMapping | |
82.61% |
19 / 23 |
|
0.00% |
0 / 1 |
4.08 | |||
getFieldData | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getLabelsIndexedData | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | namespace Wikibase\Search\Elastic\Fields; |
3 | |
4 | use CirrusSearch\CirrusSearch; |
5 | use SearchEngine; |
6 | use Wikibase\DataModel\Entity\EntityDocument; |
7 | use Wikibase\DataModel\Term\AliasesProvider; |
8 | use Wikibase\DataModel\Term\LabelsProvider; |
9 | |
10 | /** |
11 | * Field which contains per-language specific labels. |
12 | * |
13 | * @license GPL-2.0-or-later |
14 | * @author Stas Malyshev |
15 | */ |
16 | class LabelsField extends TermIndexField implements WikibaseLabelsIndexField { |
17 | |
18 | use LabelsDescriptionsFieldTrait; |
19 | |
20 | /** |
21 | * Field name |
22 | */ |
23 | public const NAME = "labels"; |
24 | |
25 | /** |
26 | * @param SearchEngine $engine |
27 | * @return array |
28 | */ |
29 | public function getMapping( SearchEngine $engine ) { |
30 | // Since we need a specially tuned field, we can not use |
31 | // standard search engine types. |
32 | if ( !( $engine instanceof CirrusSearch ) ) { |
33 | // For now only Cirrus/Elastic is supported |
34 | return []; |
35 | } |
36 | |
37 | $config = [ |
38 | 'type' => 'object', |
39 | 'properties' => [] |
40 | ]; |
41 | foreach ( $this->languages as $language ) { |
42 | if ( empty( $this->stemmingSettings[$language]['index'] ) ) { |
43 | $langConfig = $this->getUnindexedField(); |
44 | } else { |
45 | $langConfig = $this->getTokenizedSubfield( $engine->getConfig(), |
46 | $language . '_text', |
47 | $language . '_text_search' |
48 | ); |
49 | } |
50 | |
51 | $langConfig['fields']['prefix'] = |
52 | $this->getSubfield( 'prefix_asciifolding', 'near_match_asciifolding' ); |
53 | $langConfig['fields']['near_match_folded'] = |
54 | $this->getSubfield( 'near_match_asciifolding' ); |
55 | $langConfig['fields']['near_match'] = $this->getSubfield( 'near_match' ); |
56 | // This one is for full-text search, will tokenize |
57 | // TODO: here we probably will need better language-specific analyzers |
58 | $langConfig['fields']['plain'] = $this->getTokenizedSubfield( $engine->getConfig(), |
59 | $language . '_plain', $language . '_plain_search' ); |
60 | // All labels are copies to labels_all |
61 | $langConfig['copy_to'] = 'labels_all'; |
62 | |
63 | $config['properties'][$language] = $langConfig; |
64 | } |
65 | |
66 | return $config; |
67 | } |
68 | |
69 | /** |
70 | * @param EntityDocument $entity |
71 | * |
72 | * @return mixed Get the value of the field to be indexed when a page/document |
73 | * is indexed. This might be an array with nested data, if the field |
74 | * is defined with nested type or an int or string for simple field types. |
75 | */ |
76 | public function getFieldData( EntityDocument $entity ) { |
77 | if ( !( $entity instanceof LabelsProvider ) ) { |
78 | return null; |
79 | } |
80 | return $this->getLabelsIndexedData( $entity ); |
81 | } |
82 | |
83 | /** |
84 | * @inheritDoc |
85 | */ |
86 | public function getLabelsIndexedData( LabelsProvider $entity ) { |
87 | $data = []; |
88 | foreach ( $entity->getLabels() as $language => $label ) { |
89 | $data[$language][] = $label->getText(); |
90 | } |
91 | if ( $entity instanceof AliasesProvider ) { |
92 | foreach ( $entity->getAliasGroups() as $aliases ) { |
93 | $language = $aliases->getLanguageCode(); |
94 | if ( !isset( $data[$language] ) ) { |
95 | $data[$language][] = ''; |
96 | } |
97 | $data[$language] = array_merge( $data[$language], $aliases->getAliases() ); |
98 | } |
99 | } |
100 | // Shouldn't return empty arrays, that will be encoded to json as an |
101 | // empty list instead of an empty map. Elastic doesn't mind, but this |
102 | // allows more consistency working with the resulting cirrus docs |
103 | return $data ?: null; |
104 | } |
105 | } |