Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.27% |
115 / 126 |
|
53.85% |
7 / 13 |
CRAP | |
0.00% |
0 / 1 |
CausedByLines | |
91.27% |
115 / 126 |
|
53.85% |
7 / 13 |
71.08 | |
0.00% |
0 / 1 |
addLines | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
12.34 | |||
asPreservingTaintednessAndLinks | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
asIntersectedWithTaintedness | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
asFilteredForFuncAndParam | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
getLinesForGenericReturn | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
withTaintAddedToMethodArgLinks | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
5 | |||
mergeWith | |
97.44% |
38 / 39 |
|
0.00% |
0 / 1 |
17 | |||
asMergedWith | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getArraySubsetIdx | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
7.02 | |||
toStringForIssue | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
getRelevantLinesForTaintedness | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
toLinesArray | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
toDebugString | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 |
1 | <?php declare( strict_types=1 ); |
2 | |
3 | namespace SecurityCheckPlugin; |
4 | |
5 | use Phan\Language\Element\FunctionInterface; |
6 | |
7 | /** |
8 | * Value object used to store caused-by lines. |
9 | * |
10 | * @note `clone` will not deep-clone instances of this class. That'd be more correct in theory, but it's |
11 | * tremendously expensive for this class (+35s and +600MB for MW core). |
12 | * |
13 | * @todo Keep per-offset caused-by lines |
14 | */ |
15 | class CausedByLines { |
16 | private const MAX_LINES_PER_ISSUE = 80; |
17 | // XXX Hack: Enforce a hard limit, or things may explode |
18 | private const LINES_HARD_LIMIT = 100; |
19 | |
20 | /** |
21 | * Note: the links are nullable for performance. |
22 | * @var array<array<Taintedness|string|MethodLinks|null>> |
23 | * @phan-var list<array{0:Taintedness,1:string,2:?MethodLinks}> |
24 | */ |
25 | private $lines = []; |
26 | |
27 | /** |
28 | * @param string[] $lines |
29 | * @param Taintedness $taintedness |
30 | * @param MethodLinks|null $links |
31 | * @note Taintedness and links are cloned as needed |
32 | */ |
33 | public function addLines( array $lines, Taintedness $taintedness, MethodLinks $links = null ): void { |
34 | if ( !$this->lines ) { |
35 | foreach ( $lines as $line ) { |
36 | $this->lines[] = [ clone $taintedness, $line, $links ? clone $links : null ]; |
37 | } |
38 | return; |
39 | } |
40 | |
41 | foreach ( $lines as $line ) { |
42 | if ( count( $this->lines ) >= self::LINES_HARD_LIMIT ) { |
43 | break; |
44 | } |
45 | $idx = array_search( $line, array_column( $this->lines, 1 ), true ); |
46 | if ( $idx !== false ) { |
47 | $this->lines[ $idx ][0]->mergeWith( $taintedness ); |
48 | if ( $links && !$this->lines[$idx][2] ) { |
49 | $this->lines[$idx][2] = clone $links; |
50 | } elseif ( $links && $links !== $this->lines[$idx][2] ) { |
51 | $this->lines[$idx][2]->mergeWith( $links ); |
52 | } |
53 | } else { |
54 | $this->lines[] = [ clone $taintedness, $line, $links ? clone $links : null ]; |
55 | } |
56 | } |
57 | } |
58 | |
59 | /** |
60 | * Move any possibly preserved taintedness stored in the method links to the actual taintedness of this line, |
61 | * and use $links as the new links being preserved. |
62 | * |
63 | * @param Taintedness $taintedness |
64 | * @param MethodLinks $links |
65 | * @return self |
66 | */ |
67 | public function asPreservingTaintednessAndLinks( Taintedness $taintedness, MethodLinks $links ): self { |
68 | $ret = new self; |
69 | if ( !$this->lines ) { |
70 | return $ret; |
71 | } |
72 | $curTaint = $taintedness->get(); |
73 | foreach ( $this->lines as [ $_, $eLine, $eLinks ] ) { |
74 | $preservedFlags = $eLinks && ( $curTaint !== SecurityCheckPlugin::NO_TAINT ) |
75 | ? $eLinks->filterPreservedFlags( $curTaint ) |
76 | : SecurityCheckPlugin::NO_TAINT; |
77 | $ret->lines[] = [ new Taintedness( $preservedFlags ), $eLine, clone $links ]; |
78 | } |
79 | return $ret; |
80 | } |
81 | |
82 | /** |
83 | * @param Taintedness $taintedness |
84 | * @return self |
85 | */ |
86 | public function asIntersectedWithTaintedness( Taintedness $taintedness ): self { |
87 | $ret = new self; |
88 | if ( !$this->lines ) { |
89 | return $ret; |
90 | } |
91 | $curTaint = $taintedness->get(); |
92 | foreach ( $this->lines as [ $eTaint, $eLine, $links ] ) { |
93 | $newTaint = $curTaint !== SecurityCheckPlugin::NO_TAINT |
94 | ? $eTaint->withOnly( $curTaint ) |
95 | : new Taintedness( SecurityCheckPlugin::NO_TAINT ); |
96 | $ret->lines[] = [ $newTaint, $eLine, $links ]; |
97 | } |
98 | return $ret; |
99 | } |
100 | |
101 | /** |
102 | * @param FunctionInterface $func |
103 | * @param int $param |
104 | * @return self |
105 | */ |
106 | public function asFilteredForFuncAndParam( FunctionInterface $func, int $param ): self { |
107 | $ret = new self; |
108 | foreach ( $this->lines as $line ) { |
109 | if ( $line[2] && $line[2]->hasDataForFuncAndParam( $func, $param ) ) { |
110 | $ret->lines[] = $line; |
111 | } |
112 | } |
113 | return $ret; |
114 | } |
115 | |
116 | /** |
117 | * @return self |
118 | */ |
119 | public function getLinesForGenericReturn(): self { |
120 | $ret = new self; |
121 | foreach ( $this->lines as [ $lineTaint, $lineLine, $_ ] ) { |
122 | if ( !$lineTaint->isSafe() ) { |
123 | // For generic lines, links don't matter |
124 | $ret->lines[] = [ $lineTaint, $lineLine, null ]; |
125 | } |
126 | } |
127 | return $ret; |
128 | } |
129 | |
130 | /** |
131 | * For every line in this object, check if the line has links for $func, and if so, add $taintedness to the |
132 | * line taintedness. |
133 | * |
134 | * @param Taintedness $taintedness |
135 | * @param FunctionInterface $func |
136 | * @param int $i Parameter index |
137 | * @return self |
138 | */ |
139 | public function withTaintAddedToMethodArgLinks( Taintedness $taintedness, FunctionInterface $func, int $i ): self { |
140 | $ret = new self; |
141 | foreach ( $this->lines as [ $lineTaint, $lineLine, $lineLinks ] ) { |
142 | $newLinks = $lineLinks ? clone $lineLinks : null; |
143 | if ( $lineLinks && $lineLinks->hasDataForFuncAndParam( $func, $i ) ) { |
144 | $ret->lines[] = [ $lineTaint->asMergedWith( $taintedness ), $lineLine, $newLinks ]; |
145 | } else { |
146 | $ret->lines[] = [ clone $lineTaint, $lineLine, $newLinks ]; |
147 | } |
148 | } |
149 | return $ret; |
150 | } |
151 | |
152 | /** |
153 | * @note this isn't a merge operation like array_merge. What this method does is: |
154 | * 1 - if $other is a subset of $this, leave $this as-is; |
155 | * 2 - update taintedness values in $this if the *lines* (not taint values) in $other |
156 | * are a subset of the lines in $this; |
157 | * 3 - if an upper set of $this *lines* is also a lower set of $other *lines*, remove that upper |
158 | * set from $this and merge the rest with $other; |
159 | * 4 - array_merge otherwise; |
160 | * |
161 | * Step 2 is very important, because otherwise, caused-by lines can grow exponentially if |
162 | * even a single taintedness value in $this changes. |
163 | * |
164 | * @param self $other |
165 | */ |
166 | public function mergeWith( self $other ): void { |
167 | if ( !$this->lines ) { |
168 | $this->lines = $other->lines; |
169 | return; |
170 | } |
171 | if ( !$other->lines || self::getArraySubsetIdx( $this->lines, $other->lines ) !== false ) { |
172 | return; |
173 | } |
174 | |
175 | $baseLines = array_column( $this->lines, 1 ); |
176 | $newLines = array_column( $other->lines, 1 ); |
177 | $subsIdx = self::getArraySubsetIdx( $baseLines, $newLines ); |
178 | if ( $subsIdx !== false ) { |
179 | foreach ( $other->lines as $i => $cur ) { |
180 | $this->lines[ $i + $subsIdx ][0]->mergeWith( $cur[0] ); |
181 | $curLinks = $cur[2]; |
182 | if ( $curLinks && !$this->lines[ $i + $subsIdx ][2] ) { |
183 | $this->lines[$i + $subsIdx][2] = $curLinks; |
184 | } elseif ( $curLinks && $curLinks !== $this->lines[ $i + $subsIdx ][2] ) { |
185 | $this->lines[$i + $subsIdx][2]->mergeWith( $curLinks ); |
186 | } |
187 | } |
188 | return; |
189 | } |
190 | |
191 | $ret = null; |
192 | $baseLen = count( $this->lines ); |
193 | $newLen = count( $other->lines ); |
194 | // NOTE: array_shift is O(n), and O(n^2) over all iterations, because it reindexes the whole array. |
195 | // So reverse the arrays, that is O(n) twice, and use array_pop which is O(1) (O(n) for all iterations) |
196 | $remaining = array_reverse( $baseLines ); |
197 | $newRev = array_reverse( $newLines ); |
198 | // Assuming the lines as posets with the "natural" order used by PHP (that is, not the keys): |
199 | // since we're working with reversed arrays, remaining lines should be an upper set of the reversed |
200 | // new lines; which is to say, a lower set of the non-reversed new lines. |
201 | $expectedIndex = $newLen - $baseLen; |
202 | do { |
203 | if ( $expectedIndex >= 0 && self::getArraySubsetIdx( $newRev, $remaining ) === $expectedIndex ) { |
204 | $startIdx = $baseLen - $newLen + $expectedIndex; |
205 | for ( $j = $startIdx; $j < $baseLen; $j++ ) { |
206 | $this->lines[$j][0]->mergeWith( $other->lines[$j - $startIdx][0] ); |
207 | $secondLinks = $other->lines[$j - $startIdx][2]; |
208 | if ( $secondLinks && !$this->lines[$j][2] ) { |
209 | $this->lines[$j][2] = $secondLinks; |
210 | } elseif ( $secondLinks && $secondLinks !== $this->lines[$j][2] ) { |
211 | $this->lines[$j][2]->mergeWith( $secondLinks ); |
212 | } |
213 | } |
214 | $ret = array_merge( $this->lines, array_slice( $other->lines, $newLen - $expectedIndex ) ); |
215 | break; |
216 | } |
217 | array_pop( $remaining ); |
218 | $expectedIndex++; |
219 | } while ( $remaining ); |
220 | $ret ??= array_merge( $this->lines, $other->lines ); |
221 | |
222 | $this->lines = array_slice( $ret, 0, self::LINES_HARD_LIMIT ); |
223 | } |
224 | |
225 | /** |
226 | * @param self $other |
227 | * @return self |
228 | */ |
229 | public function asMergedWith( self $other ): self { |
230 | $ret = clone $this; |
231 | $ret->mergeWith( $other ); |
232 | return $ret; |
233 | } |
234 | |
235 | /** |
236 | * Check whether $needle is subset of $haystack, regardless of the keys, and returns |
237 | * the starting index of the subset in the $haystack array. If the subset occurs multiple |
238 | * times, this will just find the first one. |
239 | * |
240 | * @param array[] $haystack |
241 | * @phan-param list<array{0:Taintedness,1:string,2:?MethodLinks}> $haystack |
242 | * @param array[] $needle |
243 | * @phan-param list<array{0:Taintedness,1:string,2:?MethodLinks}> $needle |
244 | * @return false|int False if not a subset, the starting index if it is. |
245 | * @note Use strict comparisons with the return value! |
246 | */ |
247 | private static function getArraySubsetIdx( array $haystack, array $needle ) { |
248 | if ( !$needle || !$haystack ) { |
249 | // For our needs, the empty array is not a subset of anything |
250 | return false; |
251 | } |
252 | |
253 | $needleLength = count( $needle ); |
254 | $haystackLength = count( $haystack ); |
255 | if ( $haystackLength < $needleLength ) { |
256 | return false; |
257 | } |
258 | $curIdx = 0; |
259 | foreach ( $haystack as $i => $el ) { |
260 | if ( $el === $needle[ $curIdx ] ) { |
261 | $curIdx++; |
262 | } else { |
263 | $curIdx = 0; |
264 | } |
265 | if ( $curIdx === $needleLength ) { |
266 | return $i - ( $needleLength - 1 ); |
267 | } |
268 | } |
269 | return false; |
270 | } |
271 | |
272 | /** |
273 | * Return a truncated, stringified representation of these lines to be used when reporting issues. |
274 | * |
275 | * @todo Perhaps this should include the first and last X lines, not the first 2X. However, |
276 | * doing so would make phan emit a new issue for the same line whenever new caused-by |
277 | * lines are added to the array. |
278 | * |
279 | * @param int $taintType Must only have normal flags, and no EXEC flags. |
280 | * @return string |
281 | */ |
282 | public function toStringForIssue( int $taintType ): string { |
283 | $filteredLines = $this->getRelevantLinesForTaintedness( $taintType ); |
284 | if ( !$filteredLines ) { |
285 | return ''; |
286 | } |
287 | |
288 | if ( count( $filteredLines ) <= self::MAX_LINES_PER_ISSUE ) { |
289 | $linesPart = implode( '; ', $filteredLines ); |
290 | } else { |
291 | $linesPart = implode( '; ', array_slice( $filteredLines, 0, self::MAX_LINES_PER_ISSUE ) ) . '; ...'; |
292 | } |
293 | return ' (Caused by: ' . $linesPart . ')'; |
294 | } |
295 | |
296 | /** |
297 | * @param int $taintedness With only normal flags, and no EXEC flags. |
298 | * @return string[] |
299 | */ |
300 | private function getRelevantLinesForTaintedness( int $taintedness ): array { |
301 | $ret = []; |
302 | foreach ( $this->lines as [ $lineTaint, $lineText ] ) { |
303 | if ( $lineTaint->has( $taintedness ) ) { |
304 | $ret[] = $lineText; |
305 | } |
306 | } |
307 | return $ret; |
308 | } |
309 | |
310 | /** |
311 | * @return string[] |
312 | * @suppress PhanUnreferencedPublicMethod |
313 | */ |
314 | public function toLinesArray(): array { |
315 | return array_column( $this->lines, 1 ); |
316 | } |
317 | |
318 | /** |
319 | * @return string |
320 | * @suppress PhanUnreferencedPublicMethod |
321 | */ |
322 | public function toDebugString(): string { |
323 | $r = []; |
324 | foreach ( $this->lines as [ $t, $line, $links ] ) { |
325 | $r[] = "\t[\n\t\tT: " . $t->toShortString() . "\n\t\tL: " . $line . "\n\t\tLinks: " . |
326 | ( $links ?: 'none' ) . "\n\t]"; |
327 | } |
328 | return "[\n" . implode( ",\n", $r ) . "\n]"; |
329 | } |
330 | } |