Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
67.39% |
31 / 46 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
TeamDraftInterleaver | |
67.39% |
31 / 46 |
|
25.00% |
1 / 4 |
20.80 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
interleave | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
extractResults | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
interleaveResults | |
100.00% |
31 / 31 |
|
100.00% |
1 / 1 |
10 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\Util; |
6 | |
7 | /** |
8 | * Implementation of algorithm 2, Team-Draft Interleaving, from F. Radlinski, M. |
9 | * Kurup, and T. Joachims. How does clickthrough data reflect retrieval |
10 | * quality? In Proc 17th Intl. Conf. on Information and Knowledge Management. |
11 | * ACM New York, NY, USA, 2008. |
12 | */ |
13 | class TeamDraftInterleaver { |
14 | /** @var string The query provided by the user. */ |
15 | private $searchTerm; |
16 | |
17 | /** |
18 | * @param string $searchTerm The query provided by the user. |
19 | */ |
20 | public function __construct( $searchTerm ) { |
21 | $this->searchTerm = $searchTerm; |
22 | } |
23 | |
24 | /** |
25 | * @param CirrusSearchResultSet $a Results for team A |
26 | * @param CirrusSearchResultSet $b Results for team B |
27 | * @param int $limit Maximum number of results to return |
28 | * @return CirrusSearchResultSet |
29 | */ |
30 | public function interleave( CirrusSearchResultSet $a, CirrusSearchResultSet $b, $limit ): CirrusSearchResultSet { |
31 | // Seed the randomness so an individual user the clicks a result and then |
32 | // comes back to the search page sees the same interleaving. This is not |
33 | // strictly necessary for the test, but provides the user a more consistent |
34 | // experience. |
35 | $seed = hexdec( substr( Util::generateIdentToken( $this->searchTerm ), 0, 8 ) ); |
36 | mt_srand( $seed ); |
37 | |
38 | $aResults = $this->extractResults( $a ); |
39 | $bResults = $this->extractResults( $b ); |
40 | [ $interleaved, $teamA, $teamB, $aOffset ] = $this->interleaveResults( |
41 | $aResults, |
42 | $bResults, |
43 | $limit |
44 | ); |
45 | |
46 | return new InterleavedResultSet( $a, $interleaved, $teamA, $teamB, $aOffset ); |
47 | } |
48 | |
49 | private function extractResults( CirrusSearchResultSet $resultSet ) { |
50 | $extracted = []; |
51 | /** @var $result CirrusSearchResult */ |
52 | foreach ( $resultSet as $result ) { |
53 | $extracted[$result->getDocId()] = $result; |
54 | } |
55 | return $extracted; |
56 | } |
57 | |
58 | /** |
59 | * Interleave two arrays using team draft interleaving. Only public |
60 | * for unit tests. The id's used as keys in $a and $b must represent |
61 | * the same thing, to prevent duplicates in the returned results. |
62 | * |
63 | * @param mixed[] $a Map from id to anything in preferred order for team A |
64 | * @param mixed[] $b Map from id to anything in preferred order for team B |
65 | * @param int $limit Maximum number of results to interleave |
66 | * @return mixed[] Four item array. first item being the values from $a |
67 | * and $b in the interleaved ordering, second the ids belonging to team |
68 | * A, third the ids belonging to team B, and finally the offset for the |
69 | * next page in $a. |
70 | */ |
71 | public static function interleaveResults( $a, $b, $limit ) { |
72 | $interleaved = []; |
73 | $teamA = []; |
74 | $teamB = []; |
75 | $aIds = array_combine( array_keys( $a ), array_keys( $a ) ); |
76 | $bIds = array_combine( array_keys( $b ), array_keys( $b ) ); |
77 | while ( |
78 | count( $interleaved ) < $limit |
79 | && ( $aIds || $bIds ) |
80 | ) { |
81 | if ( !$aIds ) { |
82 | $id = reset( $bIds ); |
83 | $teamB[] = $id; |
84 | $interleaved[] = $b[$id]; |
85 | } elseif ( !$bIds ) { |
86 | $id = reset( $aIds ); |
87 | $teamA[] = $id; |
88 | $interleaved[] = $a[$id]; |
89 | // If team b has already chosen, then a needs to go |
90 | } elseif ( count( $teamA ) < count( $teamB ) |
91 | || ( |
92 | // If the counts are equal choose randomly which team goes next |
93 | count( $teamA ) == count( $teamB ) |
94 | && mt_rand( 0, 1 ) === 1 |
95 | ) ) { |
96 | $id = reset( $aIds ); |
97 | $teamA[] = $id; |
98 | $interleaved[] = $a[$id]; |
99 | } else { |
100 | $id = reset( $bIds ); |
101 | $teamB[] = $id; |
102 | $interleaved[] = $b[$id]; |
103 | } |
104 | unset( $aIds[$id], $bIds[$id] ); |
105 | } |
106 | |
107 | if ( $aIds ) { |
108 | // $offset needs to be set such that results starting at $offset + |
109 | // $limit are new. As such if we have 20 items and 10 of a are |
110 | // used, then offset should be -10. If 15 are used we want -5. |
111 | // This doesn't guarantee we don't show duplicates. Items from B |
112 | // could be in second page of A. The goal here is to ensure |
113 | // everything from A is seen when paginating even if there are |
114 | // dupes. |
115 | $nextA = reset( $aIds ); |
116 | $nextAIdx = array_search( $nextA, array_keys( $a ) ); |
117 | $offset = -( $limit - $nextAIdx ); |
118 | } else { |
119 | $offset = 0; |
120 | } |
121 | |
122 | return [ $interleaved, $teamA, $teamB, $offset ]; |
123 | } |
124 | } |