Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
60.45% covered (warning)
60.45%
81 / 134
12.50% covered (danger)
12.50%
1 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
LexemeFulltextResult
60.45% covered (warning)
60.45%
81 / 134
12.50% covered (danger)
12.50%
1 / 8
63.67
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 getSourceFiltering
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 getFields
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getHighlightingConfiguration
0.00% covered (danger)
0.00%
0 / 37
0.00% covered (danger)
0.00%
0 / 1
2
 getFormIdResult
90.00% covered (success)
90.00%
18 / 20
0.00% covered (danger)
0.00%
0 / 1
5.03
 getFormRepresentationResult
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
4.03
 transformElasticsearchResult
95.74% covered (success)
95.74%
45 / 47
0.00% covered (danger)
0.00%
0 / 1
11
 createEmptyResult
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2namespace Wikibase\Lexeme\Search\Elastic;
3
4use CirrusSearch\Search\BaseCirrusSearchResultSet;
5use CirrusSearch\Search\BaseResultsType;
6use Elastica\ResultSet;
7use Language;
8use Wikibase\DataModel\Entity\EntityIdParser;
9use Wikibase\Lexeme\DataAccess\LexemeDescription;
10use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory;
11use Wikibase\Search\Elastic\EntitySearchUtils;
12use Wikibase\Search\Elastic\Fields\StatementCountField;
13
14/**
15 * This result type implements the result for searching a Lexeme for fulltext search.
16 *
17 * @license GPL-2.0-or-later
18 * @author Stas Malyshev
19 */
20class LexemeFulltextResult extends BaseResultsType {
21
22    /**
23     * @var EntityIdParser
24     */
25    private $idParser;
26
27    /**
28     * Display language
29     * @var Language
30     */
31    private $displayLanguage;
32    /**
33     * @var FallbackLabelDescriptionLookupFactory
34     */
35    private $termLookupFactory;
36
37    /**
38     * @param EntityIdParser $idParser
39     * @param Language $displayLanguage User display language
40     * @param FallbackLabelDescriptionLookupFactory $termLookupFactory
41     *        Lookup factory for assembling descriptions
42     */
43    public function __construct(
44        EntityIdParser $idParser,
45        Language $displayLanguage,
46        FallbackLabelDescriptionLookupFactory $termLookupFactory
47    ) {
48        $this->idParser = $idParser;
49        $this->termLookupFactory = $termLookupFactory;
50        $this->displayLanguage = $displayLanguage;
51    }
52
53    /**
54     * Get the source filtering to be used loading the result.
55     *
56     * @return string[]
57     */
58    public function getSourceFiltering() {
59        return array_merge( parent::getSourceFiltering(), [
60                LemmaField::NAME,
61                LexemeLanguageField::NAME,
62                LexemeCategoryField::NAME,
63                FormsField::NAME,
64                StatementCountField::NAME,
65                // The web ui for fulltext search expects this to be returned.
66                // Longer term there should probably be some concept where the UI
67                // requests additional properties instead of baking it in at these
68                // lower levels for each fulltext results type.
69                'timestamp',
70        ] );
71    }
72
73    /**
74     * Get the fields to load.  Most of the time we'll use source filtering instead but
75     * some fields aren't part of the source.
76     *
77     * @return string[]
78     */
79    public function getFields() {
80        return [];
81    }
82
83    /**
84     * Get the highlighting configuration.
85     *
86     * @param array $highlightSource configuration for how to highlight the source.
87     *  Empty if source should be ignored.
88     * @return array|null highlighting configuration for elasticsearch
89     */
90    public function getHighlightingConfiguration( array $highlightSource ) {
91        $config = [
92            'pre_tags' => [ '' ],
93            'post_tags' => [ '' ],
94            'fields' => [],
95        ];
96        $config['fields']['title'] = [
97            'type' => 'experimental',
98            'fragmenter' => "none",
99            'number_of_fragments' => 0,
100            'matched_fields' => [ 'title.keyword' ]
101        ];
102        $config['fields']['lexeme_forms.id'] = [
103            'type' => 'experimental',
104            'fragmenter' => "none",
105            'number_of_fragments' => 0,
106            'options' => [
107                'skip_if_last_matched' => true,
108            ],
109        ];
110        $config['fields']["lemma"] = [
111            'type' => 'experimental',
112            'fragmenter' => "none",
113            'number_of_fragments' => 0,
114            'options' => [
115                'skip_if_last_matched' => true,
116            ],
117        ];
118        $config['fields']["lexeme_forms.representation"] = [
119            'type' => 'experimental',
120            'fragmenter' => "none",
121            'number_of_fragments' => 30,
122            'fragment_size' => 1000, // Hopefully this is enough
123            'options' => [
124                'skip_if_last_matched' => true,
125            ],
126        ];
127
128        return $config;
129    }
130
131    /**
132     * Produce raw result for Form ID match.
133     * @param string[][] $highlight Highlighter data
134     * @param array $sourceData Lexeme source data
135     * @return array|null Null if match is bad
136     */
137    private function getFormIdResult( $highlight, $sourceData ) {
138        $formId = $highlight['lexeme_forms.id'][0];
139        $formIdParsed = EntitySearchUtils::parseOrNull( $formId, $this->idParser );
140        if ( !$formIdParsed ) {
141            // Got some bad id?? Weird.
142            return null;
143        }
144        $repr = '';
145        $features = [];
146        foreach ( $sourceData['lexeme_forms'] as $form ) {
147            if ( $form['id'] === $formId ) {
148                // TODO: how we choose one?
149                $repr = $form['representation'][0];
150                // Convert features to EntityId's
151                $features = array_filter( array_map( function ( $featureId ) {
152                    return EntitySearchUtils::parseOrNull( $featureId, $this->idParser );
153                }, $form['features'] ) );
154                break;
155            }
156        }
157        if ( $repr === '' ) {
158            // Didn't find the right id? Weird, skip it.
159            return null;
160        }
161
162        return [
163            'formId' => $formId,
164            'representation' => $repr,
165            'features' => $features,
166        ];
167    }
168
169    /**
170     * Get data for specific form match from source data
171     * @param array[] $sourceForms 'forms' field of the source data
172     * @param string[] $highlight Highlighter data about match
173     * @return array|null Null if match is bad
174     */
175    private function getFormRepresentationResult( $sourceForms, $highlight ) {
176        foreach ( $sourceForms as $form ) {
177            $reprMatches = array_intersect( $form['representation'],
178                $highlight );
179            if ( !$reprMatches ) {
180                continue;
181            }
182            // matches the data
183            $formIdParsed = EntitySearchUtils::parseOrNull( $form['id'], $this->idParser );
184            if ( !$formIdParsed ) {
185                // Got some bad id?? Weird.
186                continue;
187            }
188            // Convert features to EntityId's
189            $featureIds = array_filter( array_map( function ( $featureId ) {
190                return EntitySearchUtils::parseOrNull( $featureId, $this->idParser );
191            }, $form['features'] ) );
192
193            return [
194                'formId' => $formIdParsed,
195                'representation' => reset( $reprMatches ),
196                'features' => $featureIds,
197            ];
198        }
199        // Didn't find anything
200        return null;
201    }
202
203    /**
204     * Convert search result from ElasticSearch result set to LexemeResultSet.
205     *
206     * The data inside the set are not rendered yet, but the set is configured with
207     * the label lookup that has necessary item labels already loaded.
208     *
209     * @param ResultSet $result ElasticSearch results
210     * @return \ISearchResultSet
211     */
212    public function transformElasticsearchResult( ResultSet $result ) {
213        $rawResults = $entityIds = [];
214        foreach ( $result->getResults() as $r ) {
215            $rawResultKey = spl_object_hash( $r );
216            $sourceData = $r->getSource();
217            $entityId = EntitySearchUtils::parseOrNull( $sourceData['title'], $this->idParser );
218            if ( !$entityId ) {
219                // Can not parse entity ID - skip it
220                // TODO: what we do here if no language code?
221                // Not sure we want to index all lemma languages.
222                // Should we just fake the term language code?
223                continue;
224            }
225
226            $lemmaCode = LexemeTermResult::extractLanguageCode( $sourceData );
227
228            // Highlight part contains information about what has actually been matched.
229            $highlight = $r->getHighlights();
230
231            $lang = $sourceData['lexeme_language']['entity'];
232            $category = $sourceData['lexical_category'];
233
234            $features = [];
235            $lexemeData = [
236                'lexemeId' => $entityId,
237                // Having empty lemma is unusual, but in theory possible
238                'lemma' => empty( $sourceData['lemma'] ) ? '' : $sourceData['lemma'][0],
239                'lang' => $lang,
240                'langcode' => $lemmaCode,
241                'category' => $category,
242                'elasticResult' => $r
243            ];
244
245            if ( !empty( $highlight['lexeme_forms.id'] ) ) {
246                // If we matched Form ID, this means it's a match by ID
247
248                $idResult = $this->getFormIdResult( $highlight, $sourceData );
249                if ( !$idResult ) {
250                    continue;
251                }
252
253                $lexemeData = $idResult + $lexemeData;
254                $features = array_merge( $features, $idResult['features'] );
255            } elseif ( !empty( $highlight['lemma'] ) ) {
256                // TODO: make result display highlight this
257                $lexemeData['matchedLemma'] = $highlight['lemma'][0];
258            } elseif ( !empty( $highlight["lexeme_forms.representation"] ) ) {
259                // For now, find the first form that matches
260                $formResult = $this->getFormRepresentationResult( $sourceData['lexeme_forms'],
261                        $highlight['lexeme_forms.representation'] );
262                if ( $formResult ) {
263                    $lexemeData = $formResult + $lexemeData;
264                    $features = array_merge( $features, $formResult['features'] );
265                }
266            }
267
268            // Doing two-stage resolution here since we want to prefetch all labels for
269            // auxiliary entities before using them to construct descriptions.
270            $lexemeData['elastica_result_hash'] = $rawResultKey;
271            $rawResults[$entityId->getSerialization()] = $lexemeData;
272            $entityIds[$lang] = EntitySearchUtils::parseOrNull( $lang, $this->idParser );
273            $entityIds[$category] = EntitySearchUtils::parseOrNull( $category, $this->idParser );
274            foreach ( $features as $feature ) {
275                $entityIds[$feature->getSerialization()] = $feature;
276            }
277        }
278
279        if ( !$rawResults ) {
280            return new \CirrusSearch\Search\ResultSet();
281        }
282        // Create prefetched lookup
283        $termLookup = $this->termLookupFactory->newLabelDescriptionLookup( $this->displayLanguage,
284            array_filter( $entityIds ) );
285        $descriptionMaker = new LexemeDescription( $termLookup, $this->idParser,
286            $this->displayLanguage );
287
288        return new LexemeResultSet( $result, $this->displayLanguage, $descriptionMaker, $rawResults );
289    }
290
291    /**
292     * @return mixed Empty set of search results
293     */
294    public function createEmptyResult() {
295        return BaseCirrusSearchResultSet::emptyResultSet( false );
296    }
297
298}