Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
87.80% covered (warning)
87.80%
36 / 41
50.00% covered (danger)
50.00%
2 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
LabelsField
87.80% covered (warning)
87.80%
36 / 41
50.00% covered (danger)
50.00%
2 / 4
14.36
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 getMapping
82.61% covered (warning)
82.61%
19 / 23
0.00% covered (danger)
0.00%
0 / 1
4.08
 getFieldData
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
7
 getEngineHints
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
1<?php
2namespace Wikibase\Search\Elastic\Fields;
3
4use CirrusSearch\CirrusSearch;
5use SearchEngine;
6use Wikibase\DataModel\Entity\EntityDocument;
7use Wikibase\DataModel\Term\AliasesProvider;
8use Wikibase\DataModel\Term\LabelsProvider;
9
10/**
11 * Field which contains per-language specific labels.
12 *
13 * @license GPL-2.0-or-later
14 * @author Stas Malyshev
15 */
16class LabelsField extends TermIndexField {
17
18    /**
19     * Field name
20     */
21    public const NAME = "labels";
22
23    /**
24     * List of available languages
25     * @var string[]
26     */
27    private $languages;
28
29    /**
30     * @var array
31     */
32    private $stemmingSettings;
33
34    /**
35     * @param string[] $languages
36     */
37    public function __construct( array $languages, array $stemmingSettings ) {
38        $this->languages = $languages;
39        parent::__construct( self::NAME, \SearchIndexField::INDEX_TYPE_NESTED );
40        $this->stemmingSettings = $stemmingSettings;
41    }
42
43    /**
44     * @param SearchEngine $engine
45     * @return array
46     */
47    public function getMapping( SearchEngine $engine ) {
48        // Since we need a specially tuned field, we can not use
49        // standard search engine types.
50        if ( !( $engine instanceof CirrusSearch ) ) {
51            // For now only Cirrus/Elastic is supported
52            return [];
53        }
54
55        $config = [
56            'type' => 'object',
57            'properties' => []
58        ];
59        foreach ( $this->languages as $language ) {
60            if ( empty( $this->stemmingSettings[$language]['index'] ) ) {
61                $langConfig = $this->getUnindexedField();
62            } else {
63                $langConfig = $this->getTokenizedSubfield( $engine->getConfig(),
64                    $language . '_text',
65                    $language . '_text_search'
66                );
67            }
68
69            $langConfig['fields']['prefix'] =
70                $this->getSubfield( 'prefix_asciifolding', 'near_match_asciifolding' );
71            $langConfig['fields']['near_match_folded'] =
72                $this->getSubfield( 'near_match_asciifolding' );
73            $langConfig['fields']['near_match'] = $this->getSubfield( 'near_match' );
74            // This one is for full-text search, will tokenize
75            // TODO: here we probably will need better language-specific analyzers
76            $langConfig['fields']['plain'] = $this->getTokenizedSubfield( $engine->getConfig(),
77                $language . '_plain', $language . '_plain_search' );
78            // All labels are copies to labels_all
79            $langConfig['copy_to'] = 'labels_all';
80
81            $config['properties'][$language] = $langConfig;
82        }
83
84        return $config;
85    }
86
87    /**
88     * @param EntityDocument $entity
89     *
90     * @return mixed Get the value of the field to be indexed when a page/document
91     *               is indexed. This might be an array with nested data, if the field
92     *               is defined with nested type or an int or string for simple field types.
93     */
94    public function getFieldData( EntityDocument $entity ) {
95        if ( !( $entity instanceof LabelsProvider ) ) {
96            return null;
97        }
98        $data = [];
99        foreach ( $entity->getLabels() as $language => $label ) {
100            $data[$language][] = $label->getText();
101        }
102        if ( $entity instanceof AliasesProvider ) {
103            foreach ( $entity->getAliasGroups() as $aliases ) {
104                $language = $aliases->getLanguageCode();
105                if ( !isset( $data[$language] ) ) {
106                    $data[$language][] = '';
107                }
108                $data[$language] = array_merge( $data[$language], $aliases->getAliases() );
109            }
110        }
111        // Shouldn't return empty arrays, that will be encoded to json as an
112        // empty list instead of an empty map. Elastic doesn't mind, but this
113        // allows more consistency working with the resulting cirrus docs
114        return $data ?: null;
115    }
116
117    /**
118     * Set engine hints.
119     * Specifically, sets noop hint so that labels would be compared
120     * as arrays and removal of labels would be processed correctly.
121     * @param SearchEngine $engine
122     * @return array
123     */
124    public function getEngineHints( SearchEngine $engine ) {
125        if ( !( $engine instanceof CirrusSearch ) ) {
126            // For now only Cirrus/Elastic is supported
127            return [];
128        }
129        return [ \CirrusSearch\Search\CirrusIndexField::NOOP_HINT => "equals" ];
130    }
131
132}