Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
87.72% |
50 / 57 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| PQScore | |
87.72% |
50 / 57 |
|
0.00% |
0 / 3 |
7.09 | |
0.00% |
0 / 1 |
| getRequiredFields | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| score | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
| explain | |
88.89% |
40 / 45 |
|
0.00% |
0 / 1 |
3.01 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\BuildDocument\Completion; |
| 4 | |
| 5 | /** |
| 6 | * Score that combines QualityScore and the pageviews statistics (popularity) |
| 7 | */ |
| 8 | class PQScore extends QualityScore { |
| 9 | private const QSCORE_WEIGHT = 1; |
| 10 | private const POPULARITY_WEIGHT = 0.4; |
| 11 | // 0.04% of the total page views is the max we accept |
| 12 | // @todo: tested on enwiki values only |
| 13 | private const POPULARITY_MAX = 0.0004; |
| 14 | |
| 15 | /** |
| 16 | * @return string[] |
| 17 | */ |
| 18 | public function getRequiredFields() { |
| 19 | return array_merge( parent::getRequiredFields(), [ 'popularity_score' ] ); |
| 20 | } |
| 21 | |
| 22 | /** |
| 23 | * @param array $doc |
| 24 | * @return int |
| 25 | */ |
| 26 | public function score( array $doc ) { |
| 27 | $score = $this->intermediateScore( $doc ) * self::QSCORE_WEIGHT; |
| 28 | $pop = $doc['popularity_score'] ?? 0; |
| 29 | if ( $pop > self::POPULARITY_MAX ) { |
| 30 | $pop = 1; |
| 31 | } else { |
| 32 | $logBase = 1 + self::POPULARITY_MAX * $this->maxDocs; |
| 33 | // log₁(x) is undefined |
| 34 | if ( $logBase > 1 ) { |
| 35 | // @fixme: rough log scale by using maxDocs... |
| 36 | $pop = log( 1 + ( $pop * $this->maxDocs ), $logBase ); |
| 37 | } else { |
| 38 | $pop = 0; |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | $score += $pop * self::POPULARITY_WEIGHT; |
| 43 | $score /= self::QSCORE_WEIGHT + self::POPULARITY_WEIGHT; |
| 44 | return intval( $score * self::SCORE_RANGE ); |
| 45 | } |
| 46 | |
| 47 | public function explain( array $doc ): array { |
| 48 | $qualityExplain = $this->intermediateExplain( $doc ); |
| 49 | $pop = $doc['popularity_score'] ?? 0; |
| 50 | if ( $pop > self::POPULARITY_MAX ) { |
| 51 | $popExplain = [ |
| 52 | 'value' => 1, |
| 53 | 'description' => 'pop > max_popularity, pop = ' . $pop . |
| 54 | ', max_popularity' . self::POPULARITY_MAX, |
| 55 | ]; |
| 56 | } else { |
| 57 | $popLogBaseExplain = [ |
| 58 | 'value' => 1 + self::POPULARITY_MAX * $this->maxDocs, |
| 59 | 'description' => '1+popularity_max*max_docs; popularity_max = ' . self::POPULARITY_MAX . |
| 60 | ', max_docs = ' . $this->maxDocs, |
| 61 | ]; |
| 62 | |
| 63 | if ( $popLogBaseExplain['value'] > 1 ) { |
| 64 | $popExplain = [ |
| 65 | 'value' => log( |
| 66 | 1 + ( min( $pop, self::POPULARITY_MAX ) * $this->maxDocs ), $popLogBaseExplain['value'] |
| 67 | ), |
| 68 | 'description' => "log(1+(min(popularity,popularity_max)*max_docs), pop_logbase); popularity = $pop, " . |
| 69 | "popularity_max = " . self::POPULARITY_MAX . ", max_docs = {$this->maxDocs}, " . |
| 70 | "pop_logbase = {$popLogBaseExplain['value']}", |
| 71 | 'details' => [ 'pop_logbase' => $popLogBaseExplain ] |
| 72 | ]; |
| 73 | } else { |
| 74 | $popExplain = [ |
| 75 | 'value' => 0, |
| 76 | 'description' => 'log base 1 is undefined', |
| 77 | 'details' => [ 'pop_logbase' => $popLogBaseExplain ] |
| 78 | ]; |
| 79 | } |
| 80 | } |
| 81 | $totalW = self::QSCORE_WEIGHT + self::POPULARITY_WEIGHT; |
| 82 | $wPop = $this->explainWeight( $popExplain, self::POPULARITY_WEIGHT, $totalW, 'popularity' ); |
| 83 | $wQua = $this->explainWeight( $qualityExplain, self::QSCORE_WEIGHT, $totalW, 'quality' ); |
| 84 | $details = [ |
| 85 | 'popularity_weighted' => $wPop, |
| 86 | 'page_quality' => $wQua, |
| 87 | ]; |
| 88 | $innerExp = [ |
| 89 | 'value' => $wPop['value'] + $wQua['value'], |
| 90 | 'description' => "Weighted sum of doc quality score and popularity", |
| 91 | 'details' => $details |
| 92 | ]; |
| 93 | return [ |
| 94 | 'value' => (int)( $innerExp['value'] * self::SCORE_RANGE ), |
| 95 | 'description' => 'Convert to an integer score: ' . $innerExp['value'] . ' * ' . self::SCORE_RANGE, |
| 96 | 'details' => [ $innerExp ] |
| 97 | ]; |
| 98 | } |
| 99 | } |