Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
77.19% |
44 / 57 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
PQScore | |
77.19% |
44 / 57 |
|
0.00% |
0 / 3 |
7.58 | |
0.00% |
0 / 1 |
getRequiredFields | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
score | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
3.05 | |||
explain | |
77.78% |
35 / 45 |
|
0.00% |
0 / 1 |
3.10 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\BuildDocument\Completion; |
4 | |
5 | /** |
6 | * Score that combines QualityScore and the pageviews statistics (popularity) |
7 | */ |
8 | class PQScore extends QualityScore { |
9 | private const QSCORE_WEIGHT = 1; |
10 | private const POPULARITY_WEIGHT = 0.4; |
11 | // 0.04% of the total page views is the max we accept |
12 | // @todo: tested on enwiki values only |
13 | private const POPULARITY_MAX = 0.0004; |
14 | |
15 | /** |
16 | * @return string[] |
17 | */ |
18 | public function getRequiredFields() { |
19 | return array_merge( parent::getRequiredFields(), [ 'popularity_score' ] ); |
20 | } |
21 | |
22 | /** |
23 | * @param array $doc |
24 | * @return int |
25 | */ |
26 | public function score( array $doc ) { |
27 | $score = $this->intermediateScore( $doc ) * self::QSCORE_WEIGHT; |
28 | $pop = $doc['popularity_score'] ?? 0; |
29 | if ( $pop > self::POPULARITY_MAX ) { |
30 | $pop = 1; |
31 | } else { |
32 | $logBase = 1 + self::POPULARITY_MAX * $this->maxDocs; |
33 | // log₁(x) is undefined |
34 | if ( $logBase > 1 ) { |
35 | // @fixme: rough log scale by using maxDocs... |
36 | $pop = log( 1 + ( $pop * $this->maxDocs ), $logBase ); |
37 | } else { |
38 | $pop = 0; |
39 | } |
40 | } |
41 | |
42 | $score += $pop * self::POPULARITY_WEIGHT; |
43 | $score /= self::QSCORE_WEIGHT + self::POPULARITY_WEIGHT; |
44 | return intval( $score * self::SCORE_RANGE ); |
45 | } |
46 | |
47 | public function explain( array $doc ) { |
48 | $qualityExplain = $this->intermediateExplain( $doc ); |
49 | $pop = $doc['popularity_score'] ?? 0; |
50 | if ( $pop > self::POPULARITY_MAX ) { |
51 | $popExplain = [ |
52 | 'value' => 1, |
53 | 'description' => 'pop > max_popularity, pop = ' . $pop . |
54 | ', max_popularity' . self::POPULARITY_MAX, |
55 | ]; |
56 | } else { |
57 | $popLogBaseExplain = [ |
58 | 'value' => 1 + self::POPULARITY_MAX * $this->maxDocs, |
59 | 'description' => '1+popularity_max*max_docs; popularity_max = ' . self::POPULARITY_MAX . |
60 | ', max_docs = ' . $this->maxDocs, |
61 | ]; |
62 | |
63 | if ( $popLogBaseExplain['value'] > 1 ) { |
64 | $popExplain = [ |
65 | 'value' => log( |
66 | 1 + ( min( $pop, self::POPULARITY_MAX ) * $this->maxDocs ), $popLogBaseExplain['value'] |
67 | ), |
68 | 'description' => "log(1+(min(popularity,popularity_max)*max_docs), pop_logbase); popularity = $pop, " . |
69 | "popularity_max = " . self::POPULARITY_MAX . ", max_docs = {$this->maxDocs}, " . |
70 | "pop_logbase = {$popLogBaseExplain['value']}", |
71 | 'details' => [ 'pop_logbase' => $popLogBaseExplain ] |
72 | ]; |
73 | } else { |
74 | $popExplain = [ |
75 | 'value' => 0, |
76 | 'description' => 'log base 1 is undefined', |
77 | 'details' => [ 'pop_logbase' => $popLogBaseExplain ] |
78 | ]; |
79 | } |
80 | } |
81 | $totalW = self::QSCORE_WEIGHT + self::POPULARITY_WEIGHT; |
82 | $wPop = $this->explainWeight( $popExplain, self::POPULARITY_WEIGHT, $totalW, 'popularity' ); |
83 | $wQua = $this->explainWeight( $qualityExplain, self::QSCORE_WEIGHT, $totalW, 'quality' ); |
84 | $details = [ |
85 | 'popularity_weighted' => $wPop, |
86 | 'page_quality' => $wQua, |
87 | ]; |
88 | $innerExp = [ |
89 | 'value' => $wPop['value'] + $wQua['value'], |
90 | 'description' => "Weighted sum of doc quality score and popularity", |
91 | 'details' => $details |
92 | ]; |
93 | return [ |
94 | 'value' => (int)( $innerExp['value'] * self::SCORE_RANGE ), |
95 | 'description' => 'Convert to an integer score: ' . $innerExp['value'] . ' * ' . self::SCORE_RANGE, |
96 | 'details' => [ $innerExp ] |
97 | ]; |
98 | } |
99 | } |