Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.04% |
50 / 51 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
ScoreParser | |
98.04% |
50 / 51 |
|
66.67% |
2 / 3 |
13 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
processRevision | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
processRevisionPerModel | |
97.44% |
38 / 39 |
|
0.00% |
0 / 1 |
9 |
1 | <?php |
2 | /** |
3 | * This program is free software: you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation, either version 3 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
15 | */ |
16 | |
17 | namespace ORES\Storage; |
18 | |
19 | use InvalidArgumentException; |
20 | use RuntimeException; |
21 | |
22 | /** |
23 | * Class for parsing ORES service score response |
24 | * |
25 | * @license GPL-3.0-or-later |
26 | */ |
27 | class ScoreParser { |
28 | |
29 | private $modelLookup; |
30 | |
31 | private $modelClasses; |
32 | |
33 | private $aggregatedModels; |
34 | |
35 | public function __construct( |
36 | ModelLookup $modelLookup, |
37 | array $modelClasses, |
38 | array $aggregatedModels = [] |
39 | ) { |
40 | $this->modelLookup = $modelLookup; |
41 | $this->modelClasses = $modelClasses; |
42 | $this->aggregatedModels = $aggregatedModels; |
43 | } |
44 | |
45 | /** |
46 | * Convert data returned by ScoreFetcher::getScores() into ores_classification rows |
47 | * |
48 | * @note No row is generated for class 0 |
49 | * @param int $revision Revision being processed |
50 | * @param array[] $revisionData Data returned by ScoreFetcher::getScores() for the revision. |
51 | * |
52 | * @return array[] |
53 | * @throws RuntimeException |
54 | */ |
55 | public function processRevision( $revision, array $revisionData ) { |
56 | $dbData = []; |
57 | foreach ( $revisionData as $model => $modelOutputs ) { |
58 | if ( isset( $modelOutputs['error'] ) ) { |
59 | throw new InvalidArgumentException( $modelOutputs['error']['type'] ); |
60 | } |
61 | |
62 | $dbData = array_merge( |
63 | $dbData, |
64 | $this->processRevisionPerModel( $revision, $model, $modelOutputs ) |
65 | ); |
66 | } |
67 | |
68 | return $dbData; |
69 | } |
70 | |
71 | /** |
72 | * @param int $revision |
73 | * @param string $model |
74 | * @param array[] $modelOutputs |
75 | * |
76 | * @return array[] |
77 | */ |
78 | private function processRevisionPerModel( $revision, $model, array $modelOutputs ) { |
79 | $processedData = []; |
80 | $prediction = $modelOutputs['score']['prediction']; |
81 | // Kludge out booleans so we can match prediction against class name. |
82 | if ( $prediction === false ) { |
83 | $prediction = 'false'; |
84 | } elseif ( $prediction === true ) { |
85 | $prediction = 'true'; |
86 | } |
87 | |
88 | $modelId = $this->modelLookup->getModelId( $model ); |
89 | |
90 | if ( !isset( $this->modelClasses[$model] ) ) { |
91 | throw new InvalidArgumentException( "Model $model is not configured" ); |
92 | } |
93 | $weightedSum = 0; |
94 | foreach ( $modelOutputs['score']['probability'] as $class => $probability ) { |
95 | $ores_is_predicted = $prediction === $class; |
96 | if ( !isset( $this->modelClasses[$model][$class] ) ) { |
97 | throw new InvalidArgumentException( "Class $class in model $model is not configured" ); |
98 | } |
99 | $class = $this->modelClasses[$model][$class]; |
100 | if ( $class === 0 && ( count( $this->modelClasses[$model] ) === 2 ) ) { |
101 | // We don't store rows for class 0 of models with only 2 classes |
102 | // because we can easily query using reversed conditions on class 1 |
103 | // Example: WHERE class = 0 AND probability > 0.8 -> WHERE class = 1 AND probability <= 0.2 |
104 | continue; |
105 | } |
106 | // Never write a probability with more than 3 significant decimal digits, |
107 | // since the SQL field is a NUMERIC(3,3). So for example, convert 3.141592 |
108 | // to 3.142. T355089 |
109 | $probability = sprintf( "%.3f", $probability ); |
110 | $processedData[] = [ |
111 | 'oresc_rev' => $revision, |
112 | 'oresc_model' => $modelId, |
113 | 'oresc_class' => $class, |
114 | 'oresc_probability' => $probability, |
115 | 'oresc_is_predicted' => ( $ores_is_predicted ), |
116 | ]; |
117 | $weightedSum += ( (float)$probability * $class ); |
118 | } |
119 | |
120 | if ( in_array( $model, $this->aggregatedModels ) ) { |
121 | $probability = $weightedSum / count( $this->modelClasses[$model] ); |
122 | // Never write a probability with more than 3 significant decimal digits, |
123 | // since the SQL field is a NUMERIC(3,3). So for example, convert 3.141592 |
124 | // to 3.142. T355089 |
125 | $probability = sprintf( "%.3f", $probability ); |
126 | return [ |
127 | [ |
128 | 'oresc_rev' => $revision, |
129 | 'oresc_model' => $modelId, |
130 | 'oresc_class' => 0, |
131 | 'oresc_probability' => $probability, |
132 | 'oresc_is_predicted' => false, |
133 | ] |
134 | ]; |
135 | } |
136 | |
137 | return $processedData; |
138 | } |
139 | |
140 | } |