Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.14% covered (warning)
88.14%
52 / 59
50.00% covered (danger)
50.00%
4 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
CompletionResultsCollector
88.14% covered (warning)
88.14%
52 / 59
50.00% covered (danger)
50.00%
4 / 8
27.13
0.00% covered (danger)
0.00%
0 / 1
 __construct
80.00% covered (warning)
80.00%
4 / 5
0.00% covered (danger)
0.00%
0 / 1
2.03
 canCollect
90.00% covered (success)
90.00%
9 / 10
0.00% covered (danger)
0.00%
0 / 1
7.05
 collect
81.82% covered (warning)
81.82%
18 / 22
0.00% covered (danger)
0.00%
0 / 1
7.29
 isFull
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 size
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 updateMinDoc
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
4
 logAndGetSet
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
3.01
 getMinScore
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Search;
4
5use CirrusSearch\CompletionRequestLog;
6use SearchSuggestion;
7use SearchSuggestionSet;
8
9/**
10 * Collect results from multiple result sets
11 */
12class CompletionResultsCollector {
13    /**
14     * @var SearchSuggestion[] suggestions indexed by pageId (mutable)
15     */
16    private $suggestionsByDocId = [];
17
18    /**
19     * @var string[][] profile names indexed by pageId (mutable)
20     */
21    private $suggestionMetadataByDocId = [];
22
23    /**
24     * @var float|null maintains the minScore (mutable)
25     */
26    private $minScore = null;
27
28    /**
29     * @var int|null maintains the doc that has minScore (mutable)
30     */
31    private $minDoc = null;
32
33    /**
34     * @var int how many results we want to keep (final)
35     */
36    private $limit;
37
38    /**
39     * @var int the offset (final)
40     */
41    private $offset;
42
43    /**
44     * Allows immediate removal of unwanted results while appropriate
45     * processes are worked out for communities to influence ranking
46     * as desired.
47     *
48     * @var int[] Set of id's to never return as results
49     */
50    private $bannedIds;
51
52    /**
53     * @param int $limit number of results we want to display
54     * @param int $offset
55     * @param int[] $bannedIds Set of id's to never return
56     */
57    public function __construct( $limit, $offset = 0, $bannedIds = [] ) {
58        if ( $limit <= 0 ) {
59            throw new \RuntimeException( "limit must be strictly positive" );
60        }
61        $this->limit = $limit;
62        $this->offset = $offset;
63        $this->bannedIds = $bannedIds;
64    }
65
66    /**
67     * @param int $pageId
68     * @param float $score
69     * @return bool
70     * @internal param int $docId
71     */
72    private function canCollect( $pageId, $score ) {
73        if ( in_array( $pageId, $this->bannedIds ) ) {
74            return false;
75        }
76
77        // First element
78        if ( $this->minScore === null && $this->limit > 0 ) {
79            return true;
80        }
81
82        // If we have the doc we do not accept it if it has lower score
83        if ( isset( $this->suggestionsByDocId[$pageId] ) &&
84                $score <= $this->suggestionsByDocId[$pageId]->getScore() ) {
85            return false;
86        }
87
88        // We always accept docs that are better
89        if ( $score > $this->minScore ) {
90            return true;
91        }
92
93        // For everything else we accept until we are full
94        return !$this->isFull();
95    }
96
97    /**
98     * Collect a doc if possible.
99     * The doc will be collected if the capacity is not yet reached or if its score
100     * is better than a suggestion already collected.
101     * @param SearchSuggestion $suggestion
102     * @param string $profileName
103     * @param string $index
104     * @return bool true if the doc was added false otherwise
105     */
106    public function collect( SearchSuggestion $suggestion, $profileName, $index ) {
107        if ( !$this->canCollect( $suggestion->getSuggestedTitleID(), $suggestion->getScore() ) ) {
108            return false;
109        }
110
111        if ( isset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ) ) {
112            $oldSugg = $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()];
113            if ( $oldSugg->getScore() > $suggestion->getScore() ) {
114                return false;
115            }
116            unset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] );
117            unset( $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] );
118            // worst case 1: existing doc with better score
119            $this->updateMinDoc();
120        }
121
122        if ( $this->isFull() ) {
123            unset( $this->suggestionsByDocId[$this->minDoc] );
124            unset( $this->suggestionMetadataByDocId[$this->minDoc] );
125            // worst case 2: collector full but better score found
126            $this->updateMinDoc();
127        }
128        if ( $this->minScore === null || $this->minScore > $suggestion->getScore() ) {
129            $this->minScore = $suggestion->getScore();
130            $this->minDoc = $suggestion->getSuggestedTitleID();
131        }
132        $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] = $suggestion;
133        $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] = [
134            'profile' => $profileName,
135            'index' => $index
136        ];
137        return true;
138    }
139
140    /**
141     * Test whether the collector is full
142     * @return bool true if it's full
143     */
144    public function isFull() {
145        return !( $this->size() < ( $this->limit + $this->offset ) );
146    }
147
148    /**
149     * Number of suggestions collected
150     * @return int
151     */
152    public function size() {
153        return count( $this->suggestionsByDocId );
154    }
155
156    /**
157     * Find the min doc.
158     * This is called on worst case scenario:
159     * - when the collector is full but a better doc is found
160     * - when an already collected doc is found with a better score
161     *
162     * Realistically this should not happen too frequently since
163     * docs are usually fetched from elastic which returns them
164     * in order. If it appears to cause perf issues we might
165     * want to investigate an approach based on SplMinHeap.
166     */
167    private function updateMinDoc() {
168        $minScore = null;
169        $minDoc = null;
170        foreach ( $this->suggestionsByDocId as $sugg ) {
171            if ( $minScore === null || $minScore > $sugg->getScore() ) {
172                $minScore = $sugg->getScore();
173                $minDoc = $sugg->getSuggestedTitleID();
174            }
175        }
176        $this->minDoc = $minDoc;
177        $this->minScore = $minScore;
178    }
179
180    /**
181     * Return the set of suggestions collected so far and log
182     * its states to CompletionRequestLog.
183     *
184     * @param CompletionRequestLog $log
185     * @return SearchSuggestionSet
186     */
187    public function logAndGetSet( CompletionRequestLog $log ) {
188        uasort( $this->suggestionsByDocId, static function ( SearchSuggestion $a, SearchSuggestion $b ) {
189            if ( $b->getScore() > $a->getScore() ) {
190                return 1;
191            } elseif ( $b->getScore() < $a->getScore() ) {
192                return -1;
193            }
194            return 0;
195        } );
196        $results = array_slice( $this->suggestionsByDocId, $this->offset,
197            $this->limit, true );
198        $log->setResult( $results, $this->suggestionMetadataByDocId );
199        return new SearchSuggestionSet( $results );
200    }
201
202    /**
203     * @return float|null
204     */
205    public function getMinScore() {
206        return $this->minScore;
207    }
208}