Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
53.62% covered (warning)
53.62%
37 / 69
25.00% covered (danger)
25.00%
2 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
FancyTitleResultsType
53.62% covered (warning)
53.62%
37 / 69
25.00% covered (danger)
25.00%
2 / 8
75.77
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getSourceFiltering
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getHighlightingConfiguration
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
2
 transformElasticsearchResult
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 chooseBestTitleOrRedirect
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 createEmptyResult
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 transformOneElasticResult
86.67% covered (warning)
86.67%
26 / 30
0.00% covered (danger)
0.00%
0 / 1
10.24
 resolveRedirectHighlight
80.00% covered (warning)
80.00%
8 / 10
0.00% covered (danger)
0.00%
0 / 1
6.29
1<?php
2
3namespace CirrusSearch\Search;
4
5use Elastica\ResultSet as ElasticaResultSet;
6use MediaWiki\Logger\LoggerFactory;
7use MediaWiki\Title\Title;
8
9/**
10 * Returns titles categorized based on how they matched - redirect or name.
11 */
12class FancyTitleResultsType extends TitleResultsType {
13    /** @var string */
14    private $matchedAnalyzer;
15
16    /**
17     * Build result type.   The matchedAnalyzer is required to detect if the match
18     * was from the title or a redirect (and is kind of a leaky abstraction.)
19     *
20     * @param string $matchedAnalyzer the analyzer used to match the title
21     * @param TitleHelper|null $titleHelper
22     */
23    public function __construct( $matchedAnalyzer, ?TitleHelper $titleHelper = null ) {
24        parent::__construct( $titleHelper );
25        $this->matchedAnalyzer = $matchedAnalyzer;
26    }
27
28    /** @inheritDoc */
29    public function getSourceFiltering() {
30        return [ 'namespace', 'title', 'namespace_text', 'wiki', 'redirect' ];
31    }
32
33    /**
34     * @param array $extraHighlightFields
35     * @return array|null
36     */
37    public function getHighlightingConfiguration( array $extraHighlightFields = [] ) {
38        $entireValue = [
39            'type' => 'unified',
40            'number_of_fragments' => 0,
41        ];
42        $manyValues = [
43            'type' => 'unified',
44            'fragment_size' => 10000, // We want the whole value but more than this is crazy
45            'number_of_fragments' => 30,
46            'order' => 'score',
47        ];
48        return [
49            // we don't really care about the actual portion of the title that matched, the UI
50            // is generally responsible for doing this.
51            'pre_tags' => [ "" ],
52            'post_tags' => [ "" ],
53            'fields' => [
54                "title.$this->matchedAnalyzer" => $entireValue,
55                "title.{$this->matchedAnalyzer}_asciifolding" => $entireValue,
56                "redirect.title.$this->matchedAnalyzer" => $manyValues,
57                "redirect.title.{$this->matchedAnalyzer}_asciifolding" => $manyValues,
58            ],
59        ];
60    }
61
62    /**
63     * Convert the results to titles.
64     *
65     * @param ElasticaResultSet $resultSet
66     * @return array[] Array of arrays, each with optional keys:
67     *   titleMatch => a title if the title matched
68     *   redirectMatches => an array of redirect matches, one per matched redirect
69     */
70    public function transformElasticsearchResult( ElasticaResultSet $resultSet ) {
71        $results = [];
72        foreach ( $resultSet->getResults() as $r ) {
73            $results[] = $this->transformOneElasticResult( $r );
74        }
75        return $results;
76    }
77
78    /**
79     * Finds best title or redirect
80     * @param array $match array returned by self::transformOneElasticResult
81     * @return Title|false choose best
82     */
83    public static function chooseBestTitleOrRedirect( array $match ) {
84        // TODO maybe dig around in the redirect matches and find the best one?
85        return $match['titleMatch'] ?? $match['redirectMatches'][0] ?? false;
86    }
87
88    /**
89     * @return array
90     */
91    public function createEmptyResult() {
92        return [];
93    }
94
95    /**
96     * Transform a result from elastic into an array of Titles.
97     *
98     * @param \Elastica\Result $r
99     * @param int[] $namespaces Prefer
100     * @return Title[] with the following keys :
101     *   titleMatch => a title if the title matched
102     *   redirectMatches => an array of redirect matches, one per matched redirect
103     */
104    public function transformOneElasticResult( \Elastica\Result $r, array $namespaces = [] ) {
105        $title = $this->getTitleHelper()->makeTitle( $r );
106        $highlights = $r->getHighlights();
107        $resultForTitle = [];
108
109        // Now we have to use the highlights to figure out whether it was the title or the redirect
110        // that matched.  It is kind of a shame we can't really give the highlighting to the client
111        // though.
112        if ( isset( $highlights["title.$this->matchedAnalyzer"] ) ) {
113            $resultForTitle['titleMatch'] = $title;
114        } elseif ( isset( $highlights["title.{$this->matchedAnalyzer}_asciifolding"] ) ) {
115            $resultForTitle['titleMatch'] = $title;
116        }
117        $redirectHighlights = [];
118
119        if ( isset( $highlights["redirect.title.$this->matchedAnalyzer"] ) ) {
120            $redirectHighlights = $highlights["redirect.title.$this->matchedAnalyzer"];
121        }
122        if ( isset( $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] ) ) {
123            $redirectHighlights =
124                array_merge( $redirectHighlights,
125                    $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] );
126        }
127        if ( $redirectHighlights !== [] ) {
128            $source = $r->getSource();
129            $docRedirects = [];
130            if ( isset( $source['redirect'] ) ) {
131                foreach ( $source['redirect'] as $docRedir ) {
132                    $docRedirects[$docRedir['title']][] = $docRedir;
133                }
134            }
135            foreach ( $redirectHighlights as $redirectTitleString ) {
136                $resultForTitle['redirectMatches'][] = $this->resolveRedirectHighlight(
137                    $r, $redirectTitleString, $docRedirects, $namespaces );
138            }
139        }
140        if ( $resultForTitle === [] ) {
141            // We're not really sure where the match came from so lets just pretend it was the title.
142            LoggerFactory::getInstance( 'CirrusSearch' )
143                ->warning( "Title search result type hit a match but we can't " .
144                    "figure out what caused the match: {namespace}:{title}",
145                    [ 'namespace' => $r->namespace, 'title' => $r->title ] );
146            $resultForTitle['titleMatch'] = $title;
147        }
148
149        return $resultForTitle;
150    }
151
152    /**
153     * @param \Elastica\Result $r Elasticsearch result
154     * @param string $redirectTitleString Highlighted string returned from elasticsearch
155     * @param array $docRedirects Map from title string to list of redirects from elasticsearch source document
156     * @param int[] $namespaces Prefered namespaces to source redirects from
157     * @return Title
158     */
159    private function resolveRedirectHighlight( \Elastica\Result $r, $redirectTitleString, array $docRedirects, $namespaces ) {
160        // The match was against a redirect so we should replace the $title with one that
161        // represents the redirect.
162        if ( !isset( $docRedirects[$redirectTitleString] ) ) {
163            // Instead of getting the redirect's real namespace we're going to just use the namespace
164            // of the title.  This is not great.
165            // TODO: Should we just bail at this point?
166            return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $r->namespace );
167        }
168
169        $redirs = $docRedirects[$redirectTitleString];
170        if ( count( $redirs ) === 1 ) {
171            // may or may not be the right namespace, but we don't seem to have any other options.
172            return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] );
173        }
174
175        if ( $namespaces ) {
176            foreach ( $redirs as $redir ) {
177                if ( array_search( $redir['namespace'], $namespaces ) ) {
178                    return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redir['namespace'] );
179                }
180            }
181        }
182        // Multiple redirects with same text from different namespaces, but none of them match the requested namespaces. What now?
183        return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] );
184    }
185}