Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
47.95% |
35 / 73 |
|
20.00% |
4 / 20 |
CRAP | |
0.00% |
0 / 1 |
Result | |
47.95% |
35 / 73 |
|
20.00% |
4 / 20 |
317.08 | |
0.00% |
0 / 1 |
__construct | |
80.65% |
25 / 31 |
|
0.00% |
0 / 1 |
15.42 | |||
pickTextSnippet | |
31.58% |
6 / 19 |
|
0.00% |
0 / 1 |
34.95 | |||
getTitleSnippet | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getRedirectTitle | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
clearRedirectTitle | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
getRedirectSnippet | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTextSnippet | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSectionSnippet | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSectionTitle | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCategorySnippet | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getWordCount | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getByteSize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFileMatch | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getInterwikiPrefix | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getInterwikiNamespaceText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDocId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getScore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getExplanation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTitleHelper | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\Search\Fetch\HighlightingTrait; |
6 | use MediaWiki\Title\Title; |
7 | use MediaWiki\Utils\MWTimestamp; |
8 | |
9 | /** |
10 | * An individual search result from Elasticsearch. |
11 | * |
12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or |
15 | * (at your option) any later version. |
16 | * |
17 | * This program is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | * GNU General Public License for more details. |
21 | * |
22 | * You should have received a copy of the GNU General Public License along |
23 | * with this program; if not, write to the Free Software Foundation, Inc., |
24 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
25 | * http://www.gnu.org/copyleft/gpl.html |
26 | */ |
27 | class Result extends CirrusSearchResult { |
28 | use HighlightingTrait; |
29 | |
30 | /** @var string */ |
31 | private $titleSnippet = ''; |
32 | /** @var Title|null */ |
33 | private $redirectTitle = null; |
34 | /** @var string */ |
35 | private $redirectSnippet = ''; |
36 | /** @var Title|null */ |
37 | private $sectionTitle = null; |
38 | /** @var string */ |
39 | private $sectionSnippet = ''; |
40 | /** @var string */ |
41 | private $categorySnippet = ''; |
42 | /** @var string */ |
43 | private $textSnippet; |
44 | /** @var bool */ |
45 | private $isFileMatch = false; |
46 | /** @var string */ |
47 | private $namespaceText; |
48 | /** @var int */ |
49 | private $wordCount; |
50 | /** @var int */ |
51 | private $byteSize; |
52 | /** @var string */ |
53 | private $timestamp; |
54 | /** @var string */ |
55 | private $docId; |
56 | /** @var float */ |
57 | private $score; |
58 | /** @var array */ |
59 | private $explanation; |
60 | /** @var TitleHelper */ |
61 | private $titleHelper; |
62 | |
63 | /** |
64 | * Build the result. |
65 | * |
66 | * @param mixed $results Unused |
67 | * @param \Elastica\Result $result containing the given search result |
68 | * @param TitleHelper|null $titleHelper |
69 | */ |
70 | public function __construct( $results, $result, ?TitleHelper $titleHelper = null ) { |
71 | $this->titleHelper = $titleHelper ?: new TitleHelper(); |
72 | parent::__construct( $this->titleHelper->makeTitle( $result ) ); |
73 | $this->namespaceText = $result->namespace_text; |
74 | $this->docId = $result->getId(); |
75 | |
76 | $fields = $result->getFields(); |
77 | // Not all results requested a word count. Just pretend we have none if so |
78 | $this->wordCount = isset( $fields['text.word_count'] ) ? $fields['text.word_count'][ 0 ] : 0; |
79 | $this->byteSize = $result->text_bytes; |
80 | $this->timestamp = new MWTimestamp( $result->timestamp ); |
81 | $highlights = $result->getHighlights(); |
82 | // Evil hax to not special case .plain fields for intitle regex |
83 | foreach ( [ 'title', 'redirect.title' ] as $field ) { |
84 | if ( isset( $highlights["$field.plain"] ) && !isset( $highlights[$field] ) ) { |
85 | $highlights[$field] = $highlights["$field.plain"]; |
86 | unset( $highlights["$field.plain"] ); |
87 | } |
88 | } |
89 | if ( isset( $highlights[ 'title' ] ) ) { |
90 | $nstext = $this->getTitle()->getNamespace() === 0 ? '' : |
91 | $this->titleHelper->getNamespaceText( $this->getTitle() ) . ':'; |
92 | $this->titleSnippet = $nstext . $this->escapeHighlightedText( $highlights[ 'title' ][ 0 ] ); |
93 | } elseif ( $this->getTitle()->isExternal() ) { |
94 | // Interwiki searches are weird. They won't have title highlights by design, but |
95 | // if we don't return a title snippet we'll get weird display results. |
96 | $this->titleSnippet = $this->getTitle()->getText(); |
97 | } |
98 | |
99 | if ( !isset( $highlights[ 'title' ] ) && isset( $highlights[ 'redirect.title' ] ) ) { |
100 | // Make sure to find the redirect title before escaping because escaping breaks it.... |
101 | $this->redirectTitle = $this->findRedirectTitle( $result, $highlights[ 'redirect.title' ][ 0 ] ); |
102 | if ( $this->redirectTitle !== null ) { |
103 | $this->redirectSnippet = $this->escapeHighlightedText( $highlights[ 'redirect.title' ][ 0 ] ); |
104 | } |
105 | } |
106 | |
107 | $this->textSnippet = $this->escapeHighlightedText( $this->pickTextSnippet( $highlights ) ); |
108 | |
109 | if ( isset( $highlights[ 'heading' ] ) ) { |
110 | $this->sectionSnippet = $this->escapeHighlightedText( $highlights[ 'heading' ][ 0 ] ); |
111 | $this->sectionTitle = $this->findSectionTitle( $highlights[ 'heading' ][ 0 ], $this->getTitle() ); |
112 | } |
113 | |
114 | if ( isset( $highlights[ 'category' ] ) ) { |
115 | $this->categorySnippet = $this->escapeHighlightedText( $highlights[ 'category' ][ 0 ] ); |
116 | } |
117 | $this->score = $result->getScore(); |
118 | $this->explanation = $result->getExplanation(); |
119 | } |
120 | |
121 | /** |
122 | * @param string[] $highlights |
123 | * @return string |
124 | */ |
125 | private function pickTextSnippet( $highlights ) { |
126 | // This can get skipped if there the page was sent to Elasticsearch without text. |
127 | // This could be a bug or it could be that the page simply doesn't have any text. |
128 | $mainSnippet = ''; |
129 | // Prefer source_text.plain it's likely a regex |
130 | // TODO: use the priority system from the FetchPhaseConfigBuilder |
131 | if ( isset( $highlights[ 'source_text.plain' ] ) ) { |
132 | $sourceSnippet = $highlights[ 'source_text.plain' ][ 0 ]; |
133 | if ( $this->containsMatches( $sourceSnippet ) ) { |
134 | return $sourceSnippet; |
135 | } |
136 | } |
137 | if ( isset( $highlights[ 'text' ] ) ) { |
138 | $mainSnippet = $highlights[ 'text' ][ 0 ]; |
139 | if ( $this->containsMatches( $mainSnippet ) ) { |
140 | return $mainSnippet; |
141 | } |
142 | } |
143 | if ( isset( $highlights[ 'auxiliary_text' ] ) ) { |
144 | $auxSnippet = $highlights[ 'auxiliary_text' ][ 0 ]; |
145 | if ( $this->containsMatches( $auxSnippet ) ) { |
146 | return $auxSnippet; |
147 | } |
148 | } |
149 | if ( isset( $highlights[ 'file_text' ] ) ) { |
150 | $fileSnippet = $highlights[ 'file_text' ][ 0 ]; |
151 | if ( $this->containsMatches( $fileSnippet ) ) { |
152 | $this->isFileMatch = true; |
153 | return $fileSnippet; |
154 | } |
155 | } |
156 | return $mainSnippet; |
157 | } |
158 | |
159 | /** |
160 | * @return string |
161 | */ |
162 | public function getTitleSnippet() { |
163 | return $this->titleSnippet; |
164 | } |
165 | |
166 | /** |
167 | * @return Title|null |
168 | */ |
169 | public function getRedirectTitle() { |
170 | return $this->redirectTitle; |
171 | } |
172 | |
173 | protected function clearRedirectTitle(): bool { |
174 | $this->redirectTitle = null; |
175 | $this->redirectSnippet = ''; |
176 | |
177 | return !$this->containsHighlight( $this->textSnippet ) |
178 | && $this->titleSnippet === '' |
179 | && $this->sectionSnippet === '' |
180 | && $this->categorySnippet === ''; |
181 | } |
182 | |
183 | /** |
184 | * @return string |
185 | */ |
186 | public function getRedirectSnippet() { |
187 | return $this->redirectSnippet; |
188 | } |
189 | |
190 | /** |
191 | * @param array $terms |
192 | * @return string|null |
193 | */ |
194 | public function getTextSnippet( $terms = [] ) { |
195 | return $this->textSnippet; |
196 | } |
197 | |
198 | /** |
199 | * @return string |
200 | */ |
201 | public function getSectionSnippet() { |
202 | return $this->sectionSnippet; |
203 | } |
204 | |
205 | /** |
206 | * @return Title|null |
207 | */ |
208 | public function getSectionTitle() { |
209 | return $this->sectionTitle; |
210 | } |
211 | |
212 | /** |
213 | * @return string |
214 | */ |
215 | public function getCategorySnippet() { |
216 | return $this->categorySnippet; |
217 | } |
218 | |
219 | /** |
220 | * @return int |
221 | */ |
222 | public function getWordCount() { |
223 | return $this->wordCount; |
224 | } |
225 | |
226 | /** |
227 | * @return int |
228 | */ |
229 | public function getByteSize() { |
230 | return $this->byteSize; |
231 | } |
232 | |
233 | /** |
234 | * @return string |
235 | */ |
236 | public function getTimestamp() { |
237 | return $this->timestamp->getTimestamp( TS_MW ); |
238 | } |
239 | |
240 | /** |
241 | * @return bool |
242 | */ |
243 | public function isFileMatch() { |
244 | return $this->isFileMatch; |
245 | } |
246 | |
247 | /** |
248 | * @return string |
249 | */ |
250 | public function getInterwikiPrefix() { |
251 | return $this->getTitle()->getInterwiki(); |
252 | } |
253 | |
254 | /** |
255 | * @return string |
256 | */ |
257 | public function getInterwikiNamespaceText() { |
258 | // Seems to be only useful for API |
259 | return $this->namespaceText; |
260 | } |
261 | |
262 | /** |
263 | * @return string |
264 | */ |
265 | public function getDocId() { |
266 | return $this->docId; |
267 | } |
268 | |
269 | /** |
270 | * @return float the score |
271 | */ |
272 | public function getScore() { |
273 | return $this->score; |
274 | } |
275 | |
276 | /** |
277 | * @return array lucene score explanation |
278 | */ |
279 | public function getExplanation() { |
280 | return $this->explanation; |
281 | } |
282 | |
283 | /** |
284 | * @return TitleHelper |
285 | */ |
286 | protected function getTitleHelper(): TitleHelper { |
287 | return $this->titleHelper; |
288 | } |
289 | } |