Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
67.39% |
31 / 46 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
| TeamDraftInterleaver | |
67.39% |
31 / 46 |
|
25.00% |
1 / 4 |
20.80 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| interleave | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
| extractResults | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| interleaveResults | |
100.00% |
31 / 31 |
|
100.00% |
1 / 1 |
10 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Search; |
| 4 | |
| 5 | use CirrusSearch\Util; |
| 6 | |
| 7 | /** |
| 8 | * Implementation of algorithm 2, Team-Draft Interleaving, from F. Radlinski, M. |
| 9 | * Kurup, and T. Joachims. How does clickthrough data reflect retrieval |
| 10 | * quality? In Proc 17th Intl. Conf. on Information and Knowledge Management. |
| 11 | * ACM New York, NY, USA, 2008. |
| 12 | */ |
| 13 | class TeamDraftInterleaver { |
| 14 | /** @var string The query provided by the user. */ |
| 15 | private $searchTerm; |
| 16 | |
| 17 | /** |
| 18 | * @param string $searchTerm The query provided by the user. |
| 19 | */ |
| 20 | public function __construct( $searchTerm ) { |
| 21 | $this->searchTerm = $searchTerm; |
| 22 | } |
| 23 | |
| 24 | /** |
| 25 | * @param CirrusSearchResultSet $a Results for team A |
| 26 | * @param CirrusSearchResultSet $b Results for team B |
| 27 | * @param int $limit Maximum number of results to return |
| 28 | * @return CirrusSearchResultSet |
| 29 | */ |
| 30 | public function interleave( CirrusSearchResultSet $a, CirrusSearchResultSet $b, $limit ): CirrusSearchResultSet { |
| 31 | // Seed the randomness so an individual user the clicks a result and then |
| 32 | // comes back to the search page sees the same interleaving. This is not |
| 33 | // strictly necessary for the test, but provides the user a more consistent |
| 34 | // experience. |
| 35 | $seed = hexdec( substr( Util::generateIdentToken( $this->searchTerm ), 0, 8 ) ); |
| 36 | mt_srand( $seed ); |
| 37 | |
| 38 | $aResults = $this->extractResults( $a ); |
| 39 | $bResults = $this->extractResults( $b ); |
| 40 | [ $interleaved, $teamA, $teamB, $aOffset ] = $this->interleaveResults( |
| 41 | $aResults, |
| 42 | $bResults, |
| 43 | $limit |
| 44 | ); |
| 45 | |
| 46 | return new InterleavedResultSet( $a, $interleaved, $teamA, $teamB, $aOffset ); |
| 47 | } |
| 48 | |
| 49 | private function extractResults( CirrusSearchResultSet $resultSet ): array { |
| 50 | $extracted = []; |
| 51 | /** @var CirrusSearchResult $result */ |
| 52 | foreach ( $resultSet as $result ) { |
| 53 | $extracted[$result->getDocId()] = $result; |
| 54 | } |
| 55 | return $extracted; |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * Interleave two arrays using team draft interleaving. Only public |
| 60 | * for unit tests. The id's used as keys in $a and $b must represent |
| 61 | * the same thing, to prevent duplicates in the returned results. |
| 62 | * |
| 63 | * @param mixed[] $a Map from id to anything in preferred order for team A |
| 64 | * @param mixed[] $b Map from id to anything in preferred order for team B |
| 65 | * @param int $limit Maximum number of results to interleave |
| 66 | * @return mixed[] Four item array. first item being the values from $a |
| 67 | * and $b in the interleaved ordering, second the ids belonging to team |
| 68 | * A, third the ids belonging to team B, and finally the offset for the |
| 69 | * next page in $a. |
| 70 | */ |
| 71 | public static function interleaveResults( $a, $b, $limit ) { |
| 72 | $interleaved = []; |
| 73 | $teamA = []; |
| 74 | $teamB = []; |
| 75 | $aIds = array_combine( array_keys( $a ), array_keys( $a ) ); |
| 76 | $bIds = array_combine( array_keys( $b ), array_keys( $b ) ); |
| 77 | while ( |
| 78 | count( $interleaved ) < $limit |
| 79 | && ( $aIds || $bIds ) |
| 80 | ) { |
| 81 | if ( !$aIds ) { |
| 82 | $id = reset( $bIds ); |
| 83 | $teamB[] = $id; |
| 84 | $interleaved[] = $b[$id]; |
| 85 | } elseif ( !$bIds ) { |
| 86 | $id = reset( $aIds ); |
| 87 | $teamA[] = $id; |
| 88 | $interleaved[] = $a[$id]; |
| 89 | // If team b has already chosen, then a needs to go |
| 90 | } elseif ( count( $teamA ) < count( $teamB ) |
| 91 | || ( |
| 92 | // If the counts are equal choose randomly which team goes next |
| 93 | count( $teamA ) == count( $teamB ) |
| 94 | && mt_rand( 0, 1 ) === 1 |
| 95 | ) ) { |
| 96 | $id = reset( $aIds ); |
| 97 | $teamA[] = $id; |
| 98 | $interleaved[] = $a[$id]; |
| 99 | } else { |
| 100 | $id = reset( $bIds ); |
| 101 | $teamB[] = $id; |
| 102 | $interleaved[] = $b[$id]; |
| 103 | } |
| 104 | unset( $aIds[$id], $bIds[$id] ); |
| 105 | } |
| 106 | |
| 107 | if ( $aIds ) { |
| 108 | // $offset needs to be set such that results starting at $offset + |
| 109 | // $limit are new. As such if we have 20 items and 10 of a are |
| 110 | // used, then offset should be -10. If 15 are used we want -5. |
| 111 | // This doesn't guarantee we don't show duplicates. Items from B |
| 112 | // could be in second page of A. The goal here is to ensure |
| 113 | // everything from A is seen when paginating even if there are |
| 114 | // dupes. |
| 115 | $nextA = reset( $aIds ); |
| 116 | $nextAIdx = array_search( $nextA, array_keys( $a ) ); |
| 117 | $offset = -( $limit - $nextAIdx ); |
| 118 | } else { |
| 119 | $offset = 0; |
| 120 | } |
| 121 | |
| 122 | return [ $interleaved, $teamA, $teamB, $offset ]; |
| 123 | } |
| 124 | } |