Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
47.56% |
39 / 82 |
|
25.00% |
2 / 8 |
CRAP | |
0.00% |
0 / 1 |
FancyTitleResultsType | |
47.56% |
39 / 82 |
|
25.00% |
2 / 8 |
107.06 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getSourceFiltering | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getHighlightingConfiguration | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
6 | |||
transformElasticsearchResult | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
chooseBestTitleOrRedirect | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
createEmptyResult | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
transformOneElasticResult | |
86.67% |
26 / 30 |
|
0.00% |
0 / 1 |
10.24 | |||
resolveRedirectHighlight | |
83.33% |
10 / 12 |
|
0.00% |
0 / 1 |
6.17 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\Searcher; |
6 | use Elastica\ResultSet as ElasticaResultSet; |
7 | use MediaWiki\Logger\LoggerFactory; |
8 | use MediaWiki\Title\Title; |
9 | |
10 | /** |
11 | * Returns titles categorized based on how they matched - redirect or name. |
12 | */ |
13 | class FancyTitleResultsType extends TitleResultsType { |
14 | /** @var string */ |
15 | private $matchedAnalyzer; |
16 | |
17 | /** |
18 | * Build result type. The matchedAnalyzer is required to detect if the match |
19 | * was from the title or a redirect (and is kind of a leaky abstraction.) |
20 | * |
21 | * @param string $matchedAnalyzer the analyzer used to match the title |
22 | * @param TitleHelper|null $titleHelper |
23 | */ |
24 | public function __construct( $matchedAnalyzer, TitleHelper $titleHelper = null ) { |
25 | parent::__construct( $titleHelper ); |
26 | $this->matchedAnalyzer = $matchedAnalyzer; |
27 | } |
28 | |
29 | public function getSourceFiltering() { |
30 | return [ 'namespace', 'title', 'namespace_text', 'wiki', 'redirect' ]; |
31 | } |
32 | |
33 | /** |
34 | * @param array $extraHighlightFields |
35 | * @return array|null |
36 | */ |
37 | public function getHighlightingConfiguration( array $extraHighlightFields = [] ) { |
38 | global $wgCirrusSearchUseExperimentalHighlighter; |
39 | |
40 | if ( $wgCirrusSearchUseExperimentalHighlighter ) { |
41 | // This is much less esoteric then the plain highlighter based |
42 | // invocation but does the same thing. The magic is that the none |
43 | // fragmenter still fragments on multi valued fields. |
44 | $entireValue = [ |
45 | 'type' => 'experimental', |
46 | 'fragmenter' => 'none', |
47 | 'number_of_fragments' => 1, |
48 | ]; |
49 | $manyValues = [ |
50 | 'type' => 'experimental', |
51 | 'fragmenter' => 'none', |
52 | 'order' => 'score', |
53 | ]; |
54 | } else { |
55 | // This is similar to the FullTextResults type but against the near_match and |
56 | // with the plain highlighter. Near match because that is how the field is |
57 | // queried. Plain highlighter because we don't want to add the FVH's space |
58 | // overhead for storing extra stuff and we don't need it for combining fields. |
59 | $entireValue = [ |
60 | 'type' => 'plain', |
61 | 'number_of_fragments' => 0, |
62 | ]; |
63 | $manyValues = [ |
64 | 'type' => 'plain', |
65 | 'fragment_size' => 10000, // We want the whole value but more than this is crazy |
66 | 'order' => 'score', |
67 | ]; |
68 | } |
69 | $manyValues[ 'number_of_fragments' ] = 30; |
70 | return [ |
71 | 'pre_tags' => [ Searcher::HIGHLIGHT_PRE ], |
72 | 'post_tags' => [ Searcher::HIGHLIGHT_POST ], |
73 | 'fields' => [ |
74 | "title.$this->matchedAnalyzer" => $entireValue, |
75 | "title.{$this->matchedAnalyzer}_asciifolding" => $entireValue, |
76 | "redirect.title.$this->matchedAnalyzer" => $manyValues, |
77 | "redirect.title.{$this->matchedAnalyzer}_asciifolding" => $manyValues, |
78 | ], |
79 | ]; |
80 | } |
81 | |
82 | /** |
83 | * Convert the results to titles. |
84 | * |
85 | * @param ElasticaResultSet $resultSet |
86 | * @return array[] Array of arrays, each with optional keys: |
87 | * titleMatch => a title if the title matched |
88 | * redirectMatches => an array of redirect matches, one per matched redirect |
89 | */ |
90 | public function transformElasticsearchResult( ElasticaResultSet $resultSet ) { |
91 | $results = []; |
92 | foreach ( $resultSet->getResults() as $r ) { |
93 | $results[] = $this->transformOneElasticResult( $r ); |
94 | } |
95 | return $results; |
96 | } |
97 | |
98 | /** |
99 | * Finds best title or redirect |
100 | * @param array $match array returned by self::transformOneElasticResult |
101 | * @return Title|false choose best |
102 | */ |
103 | public static function chooseBestTitleOrRedirect( array $match ) { |
104 | // TODO maybe dig around in the redirect matches and find the best one? |
105 | return $match['titleMatch'] ?? $match['redirectMatches'][0] ?? false; |
106 | } |
107 | |
108 | /** |
109 | * @return array |
110 | */ |
111 | public function createEmptyResult() { |
112 | return []; |
113 | } |
114 | |
115 | /** |
116 | * Transform a result from elastic into an array of Titles. |
117 | * |
118 | * @param \Elastica\Result $r |
119 | * @param int[] $namespaces Prefer |
120 | * @return Title[] with the following keys : |
121 | * titleMatch => a title if the title matched |
122 | * redirectMatches => an array of redirect matches, one per matched redirect |
123 | */ |
124 | public function transformOneElasticResult( \Elastica\Result $r, array $namespaces = [] ) { |
125 | $title = $this->getTitleHelper()->makeTitle( $r ); |
126 | $highlights = $r->getHighlights(); |
127 | $resultForTitle = []; |
128 | |
129 | // Now we have to use the highlights to figure out whether it was the title or the redirect |
130 | // that matched. It is kind of a shame we can't really give the highlighting to the client |
131 | // though. |
132 | if ( isset( $highlights["title.$this->matchedAnalyzer"] ) ) { |
133 | $resultForTitle['titleMatch'] = $title; |
134 | } elseif ( isset( $highlights["title.{$this->matchedAnalyzer}_asciifolding"] ) ) { |
135 | $resultForTitle['titleMatch'] = $title; |
136 | } |
137 | $redirectHighlights = []; |
138 | |
139 | if ( isset( $highlights["redirect.title.$this->matchedAnalyzer"] ) ) { |
140 | $redirectHighlights = $highlights["redirect.title.$this->matchedAnalyzer"]; |
141 | } |
142 | if ( isset( $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] ) ) { |
143 | $redirectHighlights = |
144 | array_merge( $redirectHighlights, |
145 | $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] ); |
146 | } |
147 | if ( $redirectHighlights !== [] ) { |
148 | $source = $r->getSource(); |
149 | $docRedirects = []; |
150 | if ( isset( $source['redirect'] ) ) { |
151 | foreach ( $source['redirect'] as $docRedir ) { |
152 | $docRedirects[$docRedir['title']][] = $docRedir; |
153 | } |
154 | } |
155 | foreach ( $redirectHighlights as $redirectTitleString ) { |
156 | $resultForTitle['redirectMatches'][] = $this->resolveRedirectHighlight( |
157 | $r, $redirectTitleString, $docRedirects, $namespaces ); |
158 | } |
159 | } |
160 | if ( $resultForTitle === [] ) { |
161 | // We're not really sure where the match came from so lets just pretend it was the title. |
162 | LoggerFactory::getInstance( 'CirrusSearch' ) |
163 | ->warning( "Title search result type hit a match but we can't " . |
164 | "figure out what caused the match: {namespace}:{title}", |
165 | [ 'namespace' => $r->namespace, 'title' => $r->title ] ); |
166 | $resultForTitle['titleMatch'] = $title; |
167 | } |
168 | |
169 | return $resultForTitle; |
170 | } |
171 | |
172 | /** |
173 | * @param \Elastica\Result $r Elasticsearch result |
174 | * @param string $redirectTitleString Highlighted string returned from elasticsearch |
175 | * @param array $docRedirects Map from title string to list of redirects from elasticsearch source document |
176 | * @param int[] $namespaces Prefered namespaces to source redirects from |
177 | * @return Title |
178 | */ |
179 | private function resolveRedirectHighlight( \Elastica\Result $r, $redirectTitleString, array $docRedirects, $namespaces ) { |
180 | // The match was against a redirect so we should replace the $title with one that |
181 | // represents the redirect. |
182 | // The first step is to strip the actual highlighting from the title. |
183 | $redirectTitleString = str_replace( [ Searcher::HIGHLIGHT_PRE, Searcher::HIGHLIGHT_POST ], |
184 | '', $redirectTitleString ); |
185 | |
186 | if ( !isset( $docRedirects[$redirectTitleString] ) ) { |
187 | // Instead of getting the redirect's real namespace we're going to just use the namespace |
188 | // of the title. This is not great. |
189 | // TODO: Should we just bail at this point? |
190 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $r->namespace ); |
191 | } |
192 | |
193 | $redirs = $docRedirects[$redirectTitleString]; |
194 | if ( count( $redirs ) === 1 ) { |
195 | // may or may not be the right namespace, but we don't seem to have any other options. |
196 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] ); |
197 | } |
198 | |
199 | if ( $namespaces ) { |
200 | foreach ( $redirs as $redir ) { |
201 | if ( array_search( $redir['namespace'], $namespaces ) ) { |
202 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redir['namespace'] ); |
203 | } |
204 | } |
205 | } |
206 | // Multiple redirects with same text from different namespaces, but none of them match the requested namespaces. What now? |
207 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] ); |
208 | } |
209 | } |