Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.14% |
52 / 59 |
|
50.00% |
4 / 8 |
CRAP | |
0.00% |
0 / 1 |
CompletionResultsCollector | |
88.14% |
52 / 59 |
|
50.00% |
4 / 8 |
27.13 | |
0.00% |
0 / 1 |
__construct | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
canCollect | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
7.05 | |||
collect | |
81.82% |
18 / 22 |
|
0.00% |
0 / 1 |
7.29 | |||
isFull | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
size | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
updateMinDoc | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
logAndGetSet | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
getMinScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\CompletionRequestLog; |
6 | use SearchSuggestion; |
7 | use SearchSuggestionSet; |
8 | |
9 | /** |
10 | * Collect results from multiple result sets |
11 | */ |
12 | class CompletionResultsCollector { |
13 | /** |
14 | * @var SearchSuggestion[] suggestions indexed by pageId (mutable) |
15 | */ |
16 | private $suggestionsByDocId = []; |
17 | |
18 | /** |
19 | * @var string[][] profile names indexed by pageId (mutable) |
20 | */ |
21 | private $suggestionMetadataByDocId = []; |
22 | |
23 | /** |
24 | * @var float|null maintains the minScore (mutable) |
25 | */ |
26 | private $minScore = null; |
27 | |
28 | /** |
29 | * @var int|null maintains the doc that has minScore (mutable) |
30 | */ |
31 | private $minDoc = null; |
32 | |
33 | /** |
34 | * @var int how many results we want to keep (final) |
35 | */ |
36 | private $limit; |
37 | |
38 | /** |
39 | * @var int the offset (final) |
40 | */ |
41 | private $offset; |
42 | |
43 | /** |
44 | * Allows immediate removal of unwanted results while appropriate |
45 | * processes are worked out for communities to influence ranking |
46 | * as desired. |
47 | * |
48 | * @var int[] Set of id's to never return as results |
49 | */ |
50 | private $bannedIds; |
51 | |
52 | /** |
53 | * @param int $limit number of results we want to display |
54 | * @param int $offset |
55 | * @param int[] $bannedIds Set of id's to never return |
56 | */ |
57 | public function __construct( $limit, $offset = 0, $bannedIds = [] ) { |
58 | if ( $limit <= 0 ) { |
59 | throw new \RuntimeException( "limit must be strictly positive" ); |
60 | } |
61 | $this->limit = $limit; |
62 | $this->offset = $offset; |
63 | $this->bannedIds = $bannedIds; |
64 | } |
65 | |
66 | /** |
67 | * @param int $pageId |
68 | * @param float $score |
69 | * @return bool |
70 | * @internal param int $docId |
71 | */ |
72 | private function canCollect( $pageId, $score ) { |
73 | if ( in_array( $pageId, $this->bannedIds ) ) { |
74 | return false; |
75 | } |
76 | |
77 | // First element |
78 | if ( $this->minScore === null && $this->limit > 0 ) { |
79 | return true; |
80 | } |
81 | |
82 | // If we have the doc we do not accept it if it has lower score |
83 | if ( isset( $this->suggestionsByDocId[$pageId] ) && |
84 | $score <= $this->suggestionsByDocId[$pageId]->getScore() ) { |
85 | return false; |
86 | } |
87 | |
88 | // We always accept docs that are better |
89 | if ( $score > $this->minScore ) { |
90 | return true; |
91 | } |
92 | |
93 | // For everything else we accept until we are full |
94 | return !$this->isFull(); |
95 | } |
96 | |
97 | /** |
98 | * Collect a doc if possible. |
99 | * The doc will be collected if the capacity is not yet reached or if its score |
100 | * is better than a suggestion already collected. |
101 | * @param SearchSuggestion $suggestion |
102 | * @param string $profileName |
103 | * @param string $index |
104 | * @return bool true if the doc was added false otherwise |
105 | */ |
106 | public function collect( SearchSuggestion $suggestion, $profileName, $index ) { |
107 | if ( !$this->canCollect( $suggestion->getSuggestedTitleID(), $suggestion->getScore() ) ) { |
108 | return false; |
109 | } |
110 | |
111 | if ( isset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ) ) { |
112 | $oldSugg = $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()]; |
113 | if ( $oldSugg->getScore() > $suggestion->getScore() ) { |
114 | return false; |
115 | } |
116 | unset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ); |
117 | unset( $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] ); |
118 | // worst case 1: existing doc with better score |
119 | $this->updateMinDoc(); |
120 | } |
121 | |
122 | if ( $this->isFull() ) { |
123 | unset( $this->suggestionsByDocId[$this->minDoc] ); |
124 | unset( $this->suggestionMetadataByDocId[$this->minDoc] ); |
125 | // worst case 2: collector full but better score found |
126 | $this->updateMinDoc(); |
127 | } |
128 | if ( $this->minScore === null || $this->minScore > $suggestion->getScore() ) { |
129 | $this->minScore = $suggestion->getScore(); |
130 | $this->minDoc = $suggestion->getSuggestedTitleID(); |
131 | } |
132 | $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] = $suggestion; |
133 | $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] = [ |
134 | 'profile' => $profileName, |
135 | 'index' => $index |
136 | ]; |
137 | return true; |
138 | } |
139 | |
140 | /** |
141 | * Test whether the collector is full |
142 | * @return bool true if it's full |
143 | */ |
144 | public function isFull() { |
145 | return !( $this->size() < ( $this->limit + $this->offset ) ); |
146 | } |
147 | |
148 | /** |
149 | * Number of suggestions collected |
150 | * @return int |
151 | */ |
152 | public function size() { |
153 | return count( $this->suggestionsByDocId ); |
154 | } |
155 | |
156 | /** |
157 | * Find the min doc. |
158 | * This is called on worst case scenario: |
159 | * - when the collector is full but a better doc is found |
160 | * - when an already collected doc is found with a better score |
161 | * |
162 | * Realistically this should not happen too frequently since |
163 | * docs are usually fetched from elastic which returns them |
164 | * in order. If it appears to cause perf issues we might |
165 | * want to investigate an approach based on SplMinHeap. |
166 | */ |
167 | private function updateMinDoc() { |
168 | $minScore = null; |
169 | $minDoc = null; |
170 | foreach ( $this->suggestionsByDocId as $sugg ) { |
171 | if ( $minScore === null || $minScore > $sugg->getScore() ) { |
172 | $minScore = $sugg->getScore(); |
173 | $minDoc = $sugg->getSuggestedTitleID(); |
174 | } |
175 | } |
176 | $this->minDoc = $minDoc; |
177 | $this->minScore = $minScore; |
178 | } |
179 | |
180 | /** |
181 | * Return the set of suggestions collected so far and log |
182 | * its states to CompletionRequestLog. |
183 | * |
184 | * @param CompletionRequestLog $log |
185 | * @return SearchSuggestionSet |
186 | */ |
187 | public function logAndGetSet( CompletionRequestLog $log ) { |
188 | uasort( $this->suggestionsByDocId, static function ( SearchSuggestion $a, SearchSuggestion $b ) { |
189 | if ( $b->getScore() > $a->getScore() ) { |
190 | return 1; |
191 | } elseif ( $b->getScore() < $a->getScore() ) { |
192 | return -1; |
193 | } |
194 | return 0; |
195 | } ); |
196 | $results = array_slice( $this->suggestionsByDocId, $this->offset, |
197 | $this->limit, true ); |
198 | $log->setResult( $results, $this->suggestionMetadataByDocId ); |
199 | return new SearchSuggestionSet( $results ); |
200 | } |
201 | |
202 | /** |
203 | * @return float|null |
204 | */ |
205 | public function getMinScore() { |
206 | return $this->minScore; |
207 | } |
208 | } |