Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
79 / 79 |
|
100.00% |
8 / 8 |
CRAP | |
100.00% |
1 / 1 |
PSquare | |
100.00% |
79 / 79 |
|
100.00% |
8 / 8 |
26 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
__serialize | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
__unserialize | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getCount | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
addObservation | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
17 | |||
computeParabolic | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
computeLinear | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getValue | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author Ori Livneh <ori@wikimedia.org> |
20 | */ |
21 | |
22 | namespace Wikimedia; |
23 | |
24 | /** |
25 | * Represents a running, online estimate of a p-quantile for a series |
26 | * of observations using the P-squared algorithm. |
27 | * |
28 | * The algorithm is from "The P-Square Algorithm for Dynamic Calculation of |
29 | * Percentiles and Histograms without Storing Observations," Communications of |
30 | * the ACM, October 1985 by R. Jain and I. Chlamtac. |
31 | */ |
32 | class PSquare { |
33 | /** |
34 | * Percentile to estimate. |
35 | * @var float $p |
36 | */ |
37 | private $p; |
38 | |
39 | /** |
40 | * Position of each marker. |
41 | * @var int[] $positions |
42 | */ |
43 | private $positions; |
44 | |
45 | /** |
46 | * Desired position of each marker. |
47 | * @var float[] $desired |
48 | */ |
49 | private $desired; |
50 | |
51 | /** @var float[] $increments */ |
52 | private $increments; |
53 | |
54 | /** |
55 | * Number of observations. |
56 | * @var int $numObservations |
57 | */ |
58 | private $numObservations = 0; |
59 | |
60 | /** |
61 | * Height of each marker. |
62 | * @var float[] $heights |
63 | */ |
64 | private $heights = []; |
65 | |
66 | /** |
67 | * @param float $p the percentile (defaults to 0.5, or median). |
68 | */ |
69 | public function __construct( $p = 0.5 ) { |
70 | $this->p = $p; |
71 | $this->positions = [ 0, 1, 2, 3, 4 ]; |
72 | $this->desired = [ 0, ( 2 * $p ), ( 4 * $p ), 2 + ( 2 * $p ), 4 ]; |
73 | $this->increments = [ 0, ( $p / 2 ), $p, ( ( 1 + $p ) / 2 ), 1 ]; |
74 | } |
75 | |
76 | /** |
77 | * Export state, e.g. for serializing and caching. |
78 | * |
79 | * @return array |
80 | */ |
81 | public function __serialize(): array { |
82 | return [ |
83 | 'percentile' => $this->p, |
84 | 'positions' => $this->positions, |
85 | 'desired' => $this->desired, |
86 | 'increments' => $this->increments, |
87 | 'numObservations' => $this->numObservations, |
88 | 'heights' => $this->heights, |
89 | ]; |
90 | } |
91 | |
92 | /** |
93 | * @param array $data |
94 | */ |
95 | public function __unserialize( array $data ): void { |
96 | $this->p = $data['percentile']; |
97 | $this->positions = $data['positions']; |
98 | $this->desired = $data['desired']; |
99 | $this->increments = $data['increments']; |
100 | $this->numObservations = $data['numObservations']; |
101 | $this->heights = $data['heights']; |
102 | } |
103 | |
104 | /** |
105 | * Get the total number of accumulated observations. |
106 | * |
107 | * @return int |
108 | */ |
109 | public function getCount() { |
110 | return $this->numObservations; |
111 | } |
112 | |
113 | /** |
114 | * Add an observation. |
115 | * |
116 | * @param int|float $x Value to add |
117 | */ |
118 | public function addObservation( $x ) { |
119 | $this->numObservations++; |
120 | |
121 | if ( $this->numObservations <= 5 ) { |
122 | $this->heights[] = $x; |
123 | if ( $this->numObservations === 5 ) { |
124 | sort( $this->heights ); |
125 | } |
126 | return; |
127 | } |
128 | |
129 | if ( $x < $this->heights[0] ) { |
130 | $this->heights[0] = $x; |
131 | $k = 0; |
132 | } elseif ( $x >= $this->heights[4] ) { |
133 | $this->heights[4] = $x; |
134 | $k = 3; |
135 | } else { |
136 | for ( $i = 1; $i < 5; $i++ ) { |
137 | if ( $x < $this->heights[$i] ) { |
138 | $k = $i - 1; |
139 | break; |
140 | } |
141 | } |
142 | } |
143 | |
144 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
145 | for ( $i = $k + 1; $i < 5; $i++ ) { |
146 | $this->positions[$i]++; |
147 | } |
148 | |
149 | for ( $i = 0; $i < 5; $i++ ) { |
150 | $this->desired[$i] += $this->increments[$i]; |
151 | } |
152 | |
153 | for ( $i = 1; $i < 4; $i++ ) { |
154 | $n = $this->positions[$i]; |
155 | $nPrev = $this->positions[$i - 1]; |
156 | $nNext = $this->positions[$i + 1]; |
157 | |
158 | $d = $this->desired[$i] - $n; |
159 | |
160 | if ( ( $d >= 1 && $nNext - $n > 1 ) || ( $d <= -1 && $nPrev - $n < -1 ) ) { |
161 | $d = ( $d < 0 ) ? -1 : 1; |
162 | |
163 | $q = $this->computeParabolic( $i, $d ); |
164 | $qPrev = $this->heights[$i - 1]; |
165 | $qNext = $this->heights[$i + 1]; |
166 | |
167 | if ( $qPrev < $q && $q < $qNext ) { |
168 | $this->heights[$i] = $q; |
169 | } else { |
170 | $this->heights[$i] = $this->computeLinear( $i, $d ); |
171 | } |
172 | |
173 | $this->positions[$i] += $d; |
174 | } |
175 | } |
176 | } |
177 | |
178 | /** |
179 | * Use piecewise parabolic prediction to predict the ideal |
180 | * height of a marker. |
181 | * |
182 | * @param int $i index of marker to adjust |
183 | * @param int $d always -1 or 1 |
184 | * @return float ideal height of marker |
185 | */ |
186 | private function computeParabolic( $i, $d ) { |
187 | $q = $this->heights[$i]; |
188 | $qPrev = $this->heights[$i - 1]; |
189 | $qNext = $this->heights[$i + 1]; |
190 | |
191 | $n = $this->positions[$i]; |
192 | $nPrev = $this->positions[$i - 1]; |
193 | $nNext = $this->positions[$i + 1]; |
194 | |
195 | return ( $q + |
196 | $d / ( $nNext - $nPrev ) * |
197 | ( |
198 | ( $n - $nPrev + $d ) * ( $qNext - $q ) / ( $nNext - $n ) + |
199 | ( $nNext - $n - $d ) * ( $q - $qPrev ) / ( $n - $nPrev ) |
200 | ) |
201 | ); |
202 | } |
203 | |
204 | /** |
205 | * Linear formula to predict ideal position of a marker. |
206 | * |
207 | * @param int $i index of marker to adjust |
208 | * @param int $d always -1 or 1 |
209 | * @return float ideal height of marker |
210 | */ |
211 | private function computeLinear( $i, $d ) { |
212 | $q = $this->heights[$i]; |
213 | $n = $this->positions[$i]; |
214 | return ( $q + $d * |
215 | ( $this->heights[$i + $d] - $q ) / |
216 | ( $this->positions[$i + $d] - $n ) |
217 | ); |
218 | } |
219 | |
220 | /** |
221 | * Get the estimated p-quantile value. |
222 | * |
223 | * @return float |
224 | */ |
225 | public function getValue() { |
226 | // If we have five samples or fewer, fall back to a naive method. |
227 | if ( $this->getCount() <= 5 ) { |
228 | sort( $this->heights ); |
229 | $i = $this->p * count( $this->heights ); |
230 | if ( $i === floor( $i ) ) { |
231 | return ( $this->heights[$i - 1] + $this->heights[$i] ) / 2; |
232 | } else { |
233 | return $this->heights[floor( $i )]; |
234 | } |
235 | } |
236 | |
237 | return $this->heights[2]; |
238 | } |
239 | } |