Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
88.14% |
52 / 59 |
|
50.00% |
4 / 8 |
CRAP | |
0.00% |
0 / 1 |
| CompletionResultsCollector | |
88.14% |
52 / 59 |
|
50.00% |
4 / 8 |
27.13 | |
0.00% |
0 / 1 |
| __construct | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| canCollect | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
7.05 | |||
| collect | |
81.82% |
18 / 22 |
|
0.00% |
0 / 1 |
7.29 | |||
| isFull | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| size | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| updateMinDoc | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
| logAndGetSet | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
| getMinScore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Search; |
| 4 | |
| 5 | use CirrusSearch\CompletionRequestLog; |
| 6 | use MediaWiki\Search\SearchSuggestion; |
| 7 | use MediaWiki\Search\SearchSuggestionSet; |
| 8 | |
| 9 | /** |
| 10 | * Collect results from multiple result sets |
| 11 | */ |
| 12 | class CompletionResultsCollector { |
| 13 | /** |
| 14 | * @var SearchSuggestion[] suggestions indexed by pageId (mutable) |
| 15 | */ |
| 16 | private $suggestionsByDocId = []; |
| 17 | |
| 18 | /** |
| 19 | * @var string[][] profile names indexed by pageId (mutable) |
| 20 | */ |
| 21 | private $suggestionMetadataByDocId = []; |
| 22 | |
| 23 | /** |
| 24 | * @var float|null maintains the minScore (mutable) |
| 25 | */ |
| 26 | private $minScore = null; |
| 27 | |
| 28 | /** |
| 29 | * @var int|null maintains the doc that has minScore (mutable) |
| 30 | */ |
| 31 | private $minDoc = null; |
| 32 | |
| 33 | /** |
| 34 | * @var int how many results we want to keep (final) |
| 35 | */ |
| 36 | private $limit; |
| 37 | |
| 38 | /** |
| 39 | * @var int the offset (final) |
| 40 | */ |
| 41 | private $offset; |
| 42 | |
| 43 | /** |
| 44 | * Allows immediate removal of unwanted results while appropriate |
| 45 | * processes are worked out for communities to influence ranking |
| 46 | * as desired. |
| 47 | * |
| 48 | * @var int[] Set of id's to never return as results |
| 49 | */ |
| 50 | private $bannedIds; |
| 51 | |
| 52 | /** |
| 53 | * @param int $limit number of results we want to display |
| 54 | * @param int $offset |
| 55 | * @param int[] $bannedIds Set of id's to never return |
| 56 | */ |
| 57 | public function __construct( $limit, $offset = 0, $bannedIds = [] ) { |
| 58 | if ( $limit <= 0 ) { |
| 59 | throw new \RuntimeException( "limit must be strictly positive" ); |
| 60 | } |
| 61 | $this->limit = $limit; |
| 62 | $this->offset = $offset; |
| 63 | $this->bannedIds = $bannedIds; |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * @param int $pageId |
| 68 | * @param float $score |
| 69 | * @return bool |
| 70 | * @internal param int $docId |
| 71 | */ |
| 72 | private function canCollect( $pageId, $score ) { |
| 73 | if ( in_array( $pageId, $this->bannedIds ) ) { |
| 74 | return false; |
| 75 | } |
| 76 | |
| 77 | // First element |
| 78 | if ( $this->minScore === null && $this->limit > 0 ) { |
| 79 | return true; |
| 80 | } |
| 81 | |
| 82 | // If we have the doc we do not accept it if it has lower score |
| 83 | if ( isset( $this->suggestionsByDocId[$pageId] ) && |
| 84 | $score <= $this->suggestionsByDocId[$pageId]->getScore() ) { |
| 85 | return false; |
| 86 | } |
| 87 | |
| 88 | // We always accept docs that are better |
| 89 | if ( $score > $this->minScore ) { |
| 90 | return true; |
| 91 | } |
| 92 | |
| 93 | // For everything else we accept until we are full |
| 94 | return !$this->isFull(); |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * Collect a doc if possible. |
| 99 | * The doc will be collected if the capacity is not yet reached or if its score |
| 100 | * is better than a suggestion already collected. |
| 101 | * @param SearchSuggestion $suggestion |
| 102 | * @param string $profileName |
| 103 | * @param string $index |
| 104 | * @return bool true if the doc was added false otherwise |
| 105 | */ |
| 106 | public function collect( SearchSuggestion $suggestion, $profileName, $index ) { |
| 107 | if ( !$this->canCollect( $suggestion->getSuggestedTitleID(), $suggestion->getScore() ) ) { |
| 108 | return false; |
| 109 | } |
| 110 | |
| 111 | if ( isset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ) ) { |
| 112 | $oldSugg = $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()]; |
| 113 | if ( $oldSugg->getScore() > $suggestion->getScore() ) { |
| 114 | return false; |
| 115 | } |
| 116 | unset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ); |
| 117 | unset( $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] ); |
| 118 | // worst case 1: existing doc with better score |
| 119 | $this->updateMinDoc(); |
| 120 | } |
| 121 | |
| 122 | if ( $this->isFull() ) { |
| 123 | unset( $this->suggestionsByDocId[$this->minDoc] ); |
| 124 | unset( $this->suggestionMetadataByDocId[$this->minDoc] ); |
| 125 | // worst case 2: collector full but better score found |
| 126 | $this->updateMinDoc(); |
| 127 | } |
| 128 | if ( $this->minScore === null || $this->minScore > $suggestion->getScore() ) { |
| 129 | $this->minScore = $suggestion->getScore(); |
| 130 | $this->minDoc = $suggestion->getSuggestedTitleID(); |
| 131 | } |
| 132 | $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] = $suggestion; |
| 133 | $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] = [ |
| 134 | 'profile' => $profileName, |
| 135 | 'index' => $index |
| 136 | ]; |
| 137 | return true; |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * Test whether the collector is full |
| 142 | * @return bool true if it's full |
| 143 | */ |
| 144 | public function isFull() { |
| 145 | return !( $this->size() < ( $this->limit + $this->offset ) ); |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * Number of suggestions collected |
| 150 | * @return int |
| 151 | */ |
| 152 | public function size() { |
| 153 | return count( $this->suggestionsByDocId ); |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * Find the min doc. |
| 158 | * This is called on worst case scenario: |
| 159 | * - when the collector is full but a better doc is found |
| 160 | * - when an already collected doc is found with a better score |
| 161 | * |
| 162 | * Realistically this should not happen too frequently since |
| 163 | * docs are usually fetched from elastic which returns them |
| 164 | * in order. If it appears to cause perf issues we might |
| 165 | * want to investigate an approach based on SplMinHeap. |
| 166 | */ |
| 167 | private function updateMinDoc() { |
| 168 | $minScore = null; |
| 169 | $minDoc = null; |
| 170 | foreach ( $this->suggestionsByDocId as $sugg ) { |
| 171 | if ( $minScore === null || $minScore > $sugg->getScore() ) { |
| 172 | $minScore = $sugg->getScore(); |
| 173 | $minDoc = $sugg->getSuggestedTitleID(); |
| 174 | } |
| 175 | } |
| 176 | $this->minDoc = $minDoc; |
| 177 | $this->minScore = $minScore; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * Return the set of suggestions collected so far and log |
| 182 | * its states to CompletionRequestLog. |
| 183 | * |
| 184 | * @param CompletionRequestLog $log |
| 185 | * @return SearchSuggestionSet |
| 186 | */ |
| 187 | public function logAndGetSet( CompletionRequestLog $log ) { |
| 188 | uasort( $this->suggestionsByDocId, static function ( SearchSuggestion $a, SearchSuggestion $b ) { |
| 189 | if ( $b->getScore() > $a->getScore() ) { |
| 190 | return 1; |
| 191 | } elseif ( $b->getScore() < $a->getScore() ) { |
| 192 | return -1; |
| 193 | } |
| 194 | return 0; |
| 195 | } ); |
| 196 | $results = array_slice( $this->suggestionsByDocId, $this->offset, |
| 197 | $this->limit, true ); |
| 198 | $log->setResult( $results, $this->suggestionMetadataByDocId ); |
| 199 | return new SearchSuggestionSet( $results ); |
| 200 | } |
| 201 | |
| 202 | /** |
| 203 | * @return float|null |
| 204 | */ |
| 205 | public function getMinScore() { |
| 206 | return $this->minScore; |
| 207 | } |
| 208 | } |