Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
50.00% |
4 / 8 |
CRAP | |
88.14% |
52 / 59 |
CompletionResultsCollector | |
0.00% |
0 / 1 |
|
50.00% |
4 / 8 |
27.13 | |
88.14% |
52 / 59 |
__construct | |
0.00% |
0 / 1 |
2.02 | |
83.33% |
5 / 6 |
|||
canCollect | |
0.00% |
0 / 1 |
7.05 | |
90.00% |
9 / 10 |
|||
collect | |
0.00% |
0 / 1 |
7.34 | |
80.95% |
17 / 21 |
|||
isFull | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
size | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
updateMinDoc | |
100.00% |
1 / 1 |
4 | |
100.00% |
9 / 9 |
|||
logAndGetSet | |
0.00% |
0 / 1 |
3.01 | |
90.00% |
9 / 10 |
|||
getMinScore | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
<?php | |
namespace CirrusSearch\Search; | |
use CirrusSearch\CompletionRequestLog; | |
use SearchSuggestion; | |
use SearchSuggestionSet; | |
/** | |
* Collect results from multiple result sets | |
*/ | |
class CompletionResultsCollector { | |
/** | |
* @var SearchSuggestion[] suggestions indexed by pageId (mutable) | |
*/ | |
private $suggestionsByDocId = []; | |
/** | |
* @var string[][] profile names indexed by pageId (mutable) | |
*/ | |
private $suggestionMetadataByDocId = []; | |
/** | |
* @var float|null maintains the minScore (mutable) | |
*/ | |
private $minScore = null; | |
/** | |
* @var int|null maintains the doc that has minScore (mutable) | |
*/ | |
private $minDoc = null; | |
/** | |
* @var int how many results we want to keep (final) | |
*/ | |
private $limit; | |
/** | |
* @var int the offset (final) | |
*/ | |
private $offset; | |
/** | |
* Allows immediate removal of unwanted results while appropriate | |
* processes are worked out for communities to influence ranking | |
* as desired. | |
* | |
* @var int[] Set of id's to never return as results | |
*/ | |
private $bannedIds; | |
/** | |
* @param int $limit number of results we want to display | |
* @param int $offset | |
* @param int[] $bannedIds Set of id's to never return | |
*/ | |
public function __construct( $limit, $offset = 0, $bannedIds = [] ) { | |
if ( $limit <= 0 ) { | |
throw new \RuntimeException( "limit must be strictly positive" ); | |
} | |
$this->limit = $limit; | |
$this->offset = $offset; | |
$this->bannedIds = $bannedIds; | |
} | |
/** | |
* @param int $pageId | |
* @param float $score | |
* @return bool | |
* @internal param int $docId | |
*/ | |
private function canCollect( $pageId, $score ) { | |
if ( in_array( $pageId, $this->bannedIds ) ) { | |
return false; | |
} | |
// First element | |
if ( $this->minScore === null && $this->limit > 0 ) { | |
return true; | |
} | |
// If we have the doc we do not accept it if it has lower score | |
if ( isset( $this->suggestionsByDocId[$pageId] ) && | |
$score <= $this->suggestionsByDocId[$pageId]->getScore() ) { | |
return false; | |
} | |
// We always accept docs that are better | |
if ( $score > $this->minScore ) { | |
return true; | |
} | |
// For everything else we accept until we are full | |
return !$this->isFull(); | |
} | |
/** | |
* Collect a doc if possible. | |
* The doc will be collected if the capacity is not yet reached or if its score | |
* is better than a suggestion already collected. | |
* @param SearchSuggestion $suggestion | |
* @param string $profileName | |
* @param string $index | |
* @return bool true if the doc was added false otherwise | |
*/ | |
public function collect( SearchSuggestion $suggestion, $profileName, $index ) { | |
if ( !$this->canCollect( $suggestion->getSuggestedTitleID(), $suggestion->getScore() ) ) { | |
return false; | |
} | |
if ( isset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ) ) { | |
$oldSugg = $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()]; | |
if ( $oldSugg->getScore() > $suggestion->getScore() ) { | |
return false; | |
} | |
unset( $this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] ); | |
unset( $this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] ); | |
// worst case 1: existing doc with better score | |
$this->updateMinDoc(); | |
} | |
if ( $this->isFull() ) { | |
unset( $this->suggestionsByDocId[$this->minDoc] ); | |
unset( $this->suggestionMetadataByDocId[$this->minDoc] ); | |
// worst case 2: collector full but better score found | |
$this->updateMinDoc(); | |
} | |
if ( $this->minScore === null || $this->minScore > $suggestion->getScore() ) { | |
$this->minScore = $suggestion->getScore(); | |
$this->minDoc = $suggestion->getSuggestedTitleID(); | |
} | |
$this->suggestionsByDocId[$suggestion->getSuggestedTitleID()] = $suggestion; | |
$this->suggestionMetadataByDocId[$suggestion->getSuggestedTitleID()] = [ | |
'profile' => $profileName, | |
'index' => $index | |
]; | |
return true; | |
} | |
/** | |
* Test whether the collector is full | |
* @return bool true if it's full | |
*/ | |
public function isFull() { | |
return !( $this->size() < ( $this->limit + $this->offset ) ); | |
} | |
/** | |
* Number of suggestions collected | |
* @return int | |
*/ | |
public function size() { | |
return count( $this->suggestionsByDocId ); | |
} | |
/** | |
* Find the min doc. | |
* This is called on worst case scenario: | |
* - when the collector is full but a better doc is found | |
* - when an already collected doc is found with a better score | |
* | |
* Realistically this should not happen too frequently since | |
* docs are usually fetched from elastic which returns them | |
* in order. If it appears to cause perf issues we might | |
* want to investigate an approach based on SplMinHeap. | |
*/ | |
private function updateMinDoc() { | |
$minScore = null; | |
$minDoc = null; | |
foreach ( $this->suggestionsByDocId as $sugg ) { | |
if ( $minScore === null || $minScore > $sugg->getScore() ) { | |
$minScore = $sugg->getScore(); | |
$minDoc = $sugg->getSuggestedTitleID(); | |
} | |
} | |
$this->minDoc = $minDoc; | |
$this->minScore = $minScore; | |
} | |
/** | |
* Return the set of suggestions collected so far and log | |
* its states to CompletionRequestLog. | |
* | |
* @param CompletionRequestLog $log | |
* @return SearchSuggestionSet | |
*/ | |
public function logAndGetSet( CompletionRequestLog $log ) { | |
uasort( $this->suggestionsByDocId, static function ( SearchSuggestion $a, SearchSuggestion $b ) { | |
if ( $b->getScore() > $a->getScore() ) { | |
return 1; | |
} elseif ( $b->getScore() < $a->getScore() ) { | |
return -1; | |
} | |
return 0; | |
} ); | |
$results = array_slice( $this->suggestionsByDocId, $this->offset, | |
$this->limit, true ); | |
$log->setResult( $results, $this->suggestionMetadataByDocId ); | |
return new SearchSuggestionSet( $results ); | |
} | |
/** | |
* @return float|null | |
*/ | |
public function getMinScore() { | |
return $this->minScore; | |
} | |
} |