Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
11.11% covered (danger)
11.11%
1 / 9
CRAP
81.11% covered (warning)
81.11%
73 / 90
LexemeFulltextResult
0.00% covered (danger)
0.00%
0 / 1
11.11% covered (danger)
11.11%
1 / 9
30.56
81.11% covered (warning)
81.11%
73 / 90
 __construct
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
4 / 4
 getSourceFiltering
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 2
 getFields
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 1
 getStoredFields
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 1
 getHighlightingConfiguration
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 6
 getFormIdResult
0.00% covered (danger)
0.00%
0 / 1
5.04
88.24% covered (warning)
88.24%
15 / 17
 getFormRepresentationResult
0.00% covered (danger)
0.00%
0 / 1
4.06
84.62% covered (warning)
84.62%
11 / 13
 transformElasticsearchResult
0.00% covered (danger)
0.00%
0 / 1
11
95.56% covered (success)
95.56%
43 / 45
 createEmptyResult
0.00% covered (danger)
0.00%
0 / 1
2
0.00% covered (danger)
0.00%
0 / 1
<?php
namespace Wikibase\Lexeme\Search\Elastic;
use CirrusSearch\Search\BaseCirrusSearchResultSet;
use CirrusSearch\Search\BaseResultsType;
use Elastica\ResultSet;
use Language;
use Wikibase\DataModel\Entity\EntityIdParser;
use Wikibase\Lexeme\DataAccess\LexemeDescription;
use Wikibase\Lib\Store\LanguageFallbackLabelDescriptionLookupFactory;
use Wikibase\Search\Elastic\EntitySearchUtils;
use Wikibase\Search\Elastic\Fields\StatementCountField;
/**
 * This result type implements the result for searching a Lexeme for fulltext search.
 *
 * @license GPL-2.0-or-later
 * @author Stas Malyshev
 */
class LexemeFulltextResult extends BaseResultsType {
    /**
     * @var EntityIdParser
     */
    private $idParser;
    /**
     * Display language
     * @var Language
     */
    private $displayLanguage;
    /**
     * @var LanguageFallbackLabelDescriptionLookupFactory
     */
    private $termLookupFactory;
    /**
     * @param EntityIdParser $idParser
     * @param Language $displayLanguage User display language
     * @param LanguageFallbackLabelDescriptionLookupFactory $termLookupFactory
     *        Lookup factory for assembling descriptions
     */
    public function __construct(
        EntityIdParser $idParser,
        Language $displayLanguage,
        LanguageFallbackLabelDescriptionLookupFactory $termLookupFactory
    ) {
        $this->idParser = $idParser;
        $this->termLookupFactory = $termLookupFactory;
        $this->displayLanguage = $displayLanguage;
    }
    /**
     * Get the source filtering to be used loading the result.
     *
     * @return string[]
     */
    public function getSourceFiltering() {
        return array_merge( parent::getSourceFiltering(), [
                LemmaField::NAME,
                LexemeLanguageField::NAME,
                LexemeCategoryField::NAME,
                FormsField::NAME,
                StatementCountField::NAME,
        ] );
    }
    /**
     * Get the fields to load.  Most of the time we'll use source filtering instead but
     * some fields aren't part of the source.
     *
     * @return string[]
     */
    public function getFields() {
        return [];
    }
    /**
     * ES5 variant of getFields.
     * @return string[]
     */
    public function getStoredFields() {
        return [];
    }
    /**
     * Get the highlighting configuration.
     *
     * @param array $highlightSource configuration for how to highlight the source.
     *  Empty if source should be ignored.
     * @return array|null highlighting configuration for elasticsearch
     */
    public function getHighlightingConfiguration( array $highlightSource ) {
        $config = [
            'pre_tags' => [ '' ],
            'post_tags' => [ '' ],
            'fields' => [],
        ];
        $config['fields']['title'] = [
            'type' => 'experimental',
            'fragmenter' => "none",
            'number_of_fragments' => 0,
            'matched_fields' => [ 'title.keyword' ]
        ];
        $config['fields']['lexeme_forms.id'] = [
            'type' => 'experimental',
            'fragmenter' => "none",
            'number_of_fragments' => 0,
            'options' => [
                'skip_if_last_matched' => true,
            ],
        ];
        $config['fields']["lemma"] = [
            'type' => 'experimental',
            'fragmenter' => "none",
            'number_of_fragments' => 0,
            'options' => [
                'skip_if_last_matched' => true,
            ],
        ];
        $config['fields']["lexeme_forms.representation"] = [
            'type' => 'experimental',
            'fragmenter' => "none",
            'number_of_fragments' => 30,
            'fragment_size' => 1000, // Hopefully this is enough
            'options' => [
                'skip_if_last_matched' => true,
            ],
        ];
        return $config;
    }
    /**
     * Produce raw result for Form ID match.
     * @param string[][] $highlight Highlighter data
     * @param array $sourceData Lexeme source data
     * @return array|null Null if match is bad
     */
    private function getFormIdResult( $highlight, $sourceData ) {
        $formId = $highlight['lexeme_forms.id'][0];
        $formIdParsed = EntitySearchUtils::parseOrNull( $formId, $this->idParser );
        if ( !$formIdParsed ) {
            // Got some bad id?? Weird.
            return null;
        }
        $repr = '';
        $features = [];
        foreach ( $sourceData['lexeme_forms'] as $form ) {
            if ( $form['id'] === $formId ) {
                // TODO: how we choose one?
                $repr = $form['representation'][0];
                // Convert features to EntityId's
                $features = array_filter( array_map( function ( $featureId ) {
                    return EntitySearchUtils::parseOrNull( $featureId, $this->idParser );
                }, $form['features'] ) );
                break;
            }
        }
        if ( empty( $repr ) ) {
            // Didn't find the right id? Weird, skip it.
            return null;
        }
        return [
            'formId' => $formId,
            'representation' => $repr,
            'features' => $features,
        ];
    }
    /**
     * Get data for specific form match from source data
     * @param array[] $sourceForms 'forms' field of the source data
     * @param string[] $highlight Highlighter data about match
     * @return array|null Null if match is bad
     */
    private function getFormRepresentationResult( $sourceForms, $highlight ) {
        foreach ( $sourceForms as $form ) {
            $reprMatches = array_intersect( $form['representation'],
                $highlight );
            if ( !$reprMatches ) {
                continue;
            }
            // matches the data
            $formIdParsed = EntitySearchUtils::parseOrNull( $form['id'], $this->idParser );
            if ( !$formIdParsed ) {
                // Got some bad id?? Weird.
                continue;
            }
            // Convert features to EntityId's
            $featureIds = array_filter( array_map( function ( $featureId ) {
                return EntitySearchUtils::parseOrNull( $featureId, $this->idParser );
            }, $form['features'] ) );
            return [
                'formId' => $formIdParsed,
                'representation' => reset( $reprMatches ),
                'features' => $featureIds,
            ];
        }
        // Didn't find anything
        return null;
    }
    /**
     * Convert search result from ElasticSearch result set to LexemeResultSet.
     *
     * The data inside the set are not rendered yet, but the set is configured with
     * the label lookup that has necessary item labels already loaded.
     *
     * @param ResultSet $result ElasticSearch results
     * @return \ISearchResultSet
     */
    public function transformElasticsearchResult( ResultSet $result ) {
        $rawResults = $entityIds = [];
        foreach ( $result->getResults() as $r ) {
            $rawResultKey = spl_object_hash( $r );
            $sourceData = $r->getSource();
            $entityId = EntitySearchUtils::parseOrNull( $sourceData['title'], $this->idParser );
            if ( !$entityId ) {
                // Can not parse entity ID - skip it
                // TODO: what we do here if no language code?
                // Not sure we want to index all lemma languages.
                // Should we just fake the term language code?
                continue;
            }
            $lemmaCode = LexemeTermResult::extractLanguageCode( $sourceData );
            // Highlight part contains information about what has actually been matched.
            $highlight = $r->getHighlights();
            $lang = $sourceData['lexeme_language']['entity'];
            $category = $sourceData['lexical_category'];
            $features = [];
            $lexemeData = [
                'lexemeId' => $entityId,
                // Having empty lemma is unusual, but in theory possible
                'lemma' => empty( $sourceData['lemma'] ) ? '' : $sourceData['lemma'][0],
                'lang' => $lang,
                'langcode' => $lemmaCode,
                'category' => $category,
                'elasticResult' => $r
            ];
            if ( !empty( $highlight['lexeme_forms.id'] ) ) {
                // If we matched Form ID, this means it's a match by ID
                $idResult = $this->getFormIdResult( $highlight, $sourceData );
                if ( !$idResult ) {
                    continue;
                }
                $lexemeData = $idResult + $lexemeData;
                $features = array_merge( $features, $idResult['features'] );
            } elseif ( !empty( $highlight['lemma'] ) ) {
                // TODO: make result display highlight this
                $lexemeData['matchedLemma'] = $highlight['lemma'][0];
            } elseif ( !empty( $highlight["lexeme_forms.representation"] ) ) {
                // For now, find the first form that matches
                $formResult = $this->getFormRepresentationResult( $sourceData['lexeme_forms'],
                        $highlight['lexeme_forms.representation'] );
                if ( $formResult ) {
                    $lexemeData = $formResult + $lexemeData;
                    $features = array_merge( $features, $formResult['features'] );
                }
            }
            // Doing two-stage resolution here since we want to prefetch all labels for
            // auxiliary entities before using them to construct descriptions.
            $lexemeData['elastica_result_hash'] = $rawResultKey;
            $rawResults[$entityId->getSerialization()] = $lexemeData;
            $entityIds[$lang] = EntitySearchUtils::parseOrNull( $lang, $this->idParser );
            $entityIds[$category] = EntitySearchUtils::parseOrNull( $category, $this->idParser );
            foreach ( $features as $feature ) {
                $entityIds[$feature->getSerialization()] = $feature;
            }
        }
        if ( empty( $rawResults ) ) {
            return new \CirrusSearch\Search\ResultSet();
        }
        // Create prefetched lookup
        $termLookup = $this->termLookupFactory->newLabelDescriptionLookup( $this->displayLanguage,
            array_filter( $entityIds ) );
        $descriptionMaker = new LexemeDescription( $termLookup, $this->idParser,
            $this->displayLanguage );
        return new LexemeResultSet( $result, $this->displayLanguage, $descriptionMaker, $rawResults );
    }
    /**
     * @return mixed Empty set of search results
     */
    public function createEmptyResult() {
        return BaseCirrusSearchResultSet::emptyResultSet( false );
    }
}