Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.79% |
91 / 96 |
|
62.50% |
5 / 8 |
CRAP | |
0.00% |
0 / 1 |
| TestSuiteBuilder | |
94.79% |
91 / 96 |
|
62.50% |
5 / 8 |
28.11 | |
0.00% |
0 / 1 |
| sortByNameAscending | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| buildSuites | |
89.29% |
25 / 28 |
|
0.00% |
0 / 1 |
9.10 | |||
| buildSuitesWithDurationInformationWithoutLeadingEmptyTests | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
| buildSuitesNoDurationInformation | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
4.01 | |||
| createChunksOfTests | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| calculatePrefixSum | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| constructSplitGroups | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
3 | |||
| generateBacktrackingTable | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
5 | |||
| 1 | <?php |
| 2 | |
| 3 | declare( strict_types = 1 ); |
| 4 | |
| 5 | namespace MediaWiki\Composer\PhpUnitSplitter; |
| 6 | |
| 7 | /** |
| 8 | * @license GPL-2.0-or-later |
| 9 | */ |
| 10 | class TestSuiteBuilder { |
| 11 | |
| 12 | private static function sortByNameAscending( TestDescriptor $a, TestDescriptor $b ): int { |
| 13 | return $a->getFilename() <=> $b->getFilename(); |
| 14 | } |
| 15 | |
| 16 | /** |
| 17 | * Try to build balanced groups (split_groups) of tests. We have a couple of |
| 18 | * objectives here: |
| 19 | * - the groups should contain a stable ordering of tests so that we reduce the amount |
| 20 | * of random test failures due to test re-ordering |
| 21 | * - the groups should reduce the number of interacting extensions where possible. This |
| 22 | * is achieved with the alphabetical sort on filename - tests of the same extension will |
| 23 | * be grouped together |
| 24 | * - the groups should have a similar test execution time |
| 25 | * |
| 26 | * Information about test duration may be completely absent (if no test cache information is |
| 27 | * supplied), or partially absent (if the test has not been seen before). We attempt to create |
| 28 | * similar-duration split-groups using the information we have available, and if anything goes |
| 29 | * wrong we fall back to just creating split-groups with the same number of tests in them. |
| 30 | * |
| 31 | * @param array $testDescriptors the list of tests that we want to sort into split_groups |
| 32 | * @param int $groups the number of split_groups we are targetting |
| 33 | * @param ?int $chunkSize optionally override the size of the 'chunks' into which tests |
| 34 | * are grouped. If not supplied, the chunk size will depend on the total number |
| 35 | * of tests. |
| 36 | * @return array a structured array of the resulting split_groups |
| 37 | */ |
| 38 | public function buildSuites( array $testDescriptors, int $groups, ?int $chunkSize = null ): array { |
| 39 | $suites = array_fill( 0, $groups, [ "list" => [], "time" => 0 ] ); |
| 40 | // If there are no test descriptors, we just return empty suites |
| 41 | if ( $testDescriptors === [] ) { |
| 42 | return $suites; |
| 43 | } |
| 44 | |
| 45 | // Sort the tests by name so that we run tests of the same extension together and in a predictable order |
| 46 | usort( $testDescriptors, [ self::class, "sortByNameAscending" ] ); |
| 47 | |
| 48 | $descriptorCount = count( $testDescriptors ); |
| 49 | if ( $chunkSize === null ) { |
| 50 | // The algorithm is CPU intensive - make sure we run with at most 200 'chunks' of tests to group |
| 51 | $chunkSize = intval( ceil( $descriptorCount / 200 ) ); |
| 52 | } |
| 53 | |
| 54 | // Skip over any leading zero-time tests, and add them back to the first group at the end |
| 55 | // Without this adjustment, the dynamic-sizing algorithm can end up with a zero-size split-group |
| 56 | // which would cause PHPUnit to error. |
| 57 | $startIndex = 0; |
| 58 | while ( $startIndex < $descriptorCount && $testDescriptors[$startIndex]->getDuration() == 0 ) { |
| 59 | $startIndex++; |
| 60 | } |
| 61 | |
| 62 | if ( $startIndex === 0 ) { |
| 63 | $testDescriptorsWithoutLeadingZeros = $testDescriptors; |
| 64 | $leadingZeros = []; |
| 65 | } elseif ( $startIndex < $descriptorCount ) { |
| 66 | $leadingZeros = array_map( |
| 67 | static fn ( $testDescriptor ) => $testDescriptor->getFilename(), |
| 68 | array_slice( $testDescriptors, 0, $startIndex ) |
| 69 | ); |
| 70 | $testDescriptorsWithoutLeadingZeros = array_slice( $testDescriptors, $startIndex ); |
| 71 | } else { |
| 72 | // if we never encounter a test with duration information, fall back to splitting |
| 73 | // tests into split-groups with the same number of test classes. |
| 74 | return $this->buildSuitesNoDurationInformation( $testDescriptors, $groups ); |
| 75 | } |
| 76 | |
| 77 | try { |
| 78 | $this->buildSuitesWithDurationInformationWithoutLeadingEmptyTests( |
| 79 | $testDescriptorsWithoutLeadingZeros, $suites, $groups, $chunkSize |
| 80 | ); |
| 81 | } catch ( SuiteSplittingException ) { |
| 82 | return $this->buildSuitesNoDurationInformation( $testDescriptors, $groups ); |
| 83 | } |
| 84 | |
| 85 | // Add any zero-length tests that we sliced away before splitting back to the first group |
| 86 | if ( $leadingZeros !== [] ) { |
| 87 | $suites[0]["list"] = array_merge( $leadingZeros, $suites[0]["list"] ); |
| 88 | } |
| 89 | |
| 90 | return $suites; |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * @throws SuiteSplittingException |
| 95 | */ |
| 96 | private function buildSuitesWithDurationInformationWithoutLeadingEmptyTests( |
| 97 | array $testDescriptorsWithoutLeadingZeros, |
| 98 | array &$suites, |
| 99 | int $numGroups, |
| 100 | int $chunkSize |
| 101 | ): void { |
| 102 | $n = count( $testDescriptorsWithoutLeadingZeros ); |
| 103 | if ( $n == 0 ) { |
| 104 | return; |
| 105 | } |
| 106 | |
| 107 | $chunks = $this->createChunksOfTests( $n, $testDescriptorsWithoutLeadingZeros, $chunkSize ); |
| 108 | |
| 109 | $numChunks = count( $chunks ); |
| 110 | $durations = array_column( $chunks, "time" ); |
| 111 | |
| 112 | // Build an array of cumulative test duration (or 'prefix sum') - sum(0..i){x.duration} |
| 113 | $prefixSum = $this->calculatePrefixSum( $numChunks, $durations ); |
| 114 | |
| 115 | // Generate backtracking table |
| 116 | $backtrack = $this->generateBacktrackingTable( $numChunks, $numGroups, $prefixSum ); |
| 117 | |
| 118 | $this->constructSplitGroups( $numGroups, $numChunks, $chunks, $backtrack, $suites ); |
| 119 | } |
| 120 | |
| 121 | /** |
| 122 | * Called as a fallback for the case where not duration information is available. |
| 123 | * Simply split the tests into $groups equally-sized split-groups. |
| 124 | * |
| 125 | * @param TestDescriptor[] $testDescriptors |
| 126 | * @param int $groups |
| 127 | * @return array |
| 128 | */ |
| 129 | private function buildSuitesNoDurationInformation( array $testDescriptors, int $groups ): array { |
| 130 | $suites = array_fill( 0, $groups, [ "list" => [], "time" => 0 ] ); |
| 131 | $testCount = count( $testDescriptors ); |
| 132 | $splitGroupSize = ceil( $testCount / $groups ); |
| 133 | $bucketIndex = 0; |
| 134 | $testIndex = 0; |
| 135 | for ( $currentGroup = 0; $currentGroup < $groups, $testIndex < $testCount; $testIndex++, $bucketIndex++ ) { |
| 136 | if ( $bucketIndex >= $splitGroupSize ) { |
| 137 | $bucketIndex = 0; |
| 138 | $currentGroup++; |
| 139 | if ( $currentGroup === $groups ) { |
| 140 | break; |
| 141 | } |
| 142 | } |
| 143 | $suites[$currentGroup]["list"][] = $testDescriptors[$testIndex]->getFilename(); |
| 144 | $suites[$currentGroup]["time"] += $testDescriptors[$testIndex]->getDuration(); |
| 145 | } |
| 146 | |
| 147 | return $suites; |
| 148 | } |
| 149 | |
| 150 | private function createChunksOfTests( int $n, array $testDescriptors, int $chunkSize ): array { |
| 151 | $chunks = []; |
| 152 | for ( $i = 0; $i < $n; $i += $chunkSize ) { |
| 153 | $chunk = array_slice( $testDescriptors, $i, min( $chunkSize, $n - $i ) ); |
| 154 | $chunks[] = [ |
| 155 | "list" => $chunk, |
| 156 | "time" => array_reduce( $chunk, static fn ( $sum, $test ) => $sum + $test->getDuration(), 0 ) |
| 157 | ]; |
| 158 | } |
| 159 | return $chunks; |
| 160 | } |
| 161 | |
| 162 | private function calculatePrefixSum( int $numChunks, array $durations ): array { |
| 163 | $prefixSum = array_fill( 0, $numChunks + 1, 0 ); |
| 164 | for ( $i = 1; $i <= $numChunks; $i++ ) { |
| 165 | $prefixSum[$i] = $prefixSum[$i - 1] + $durations[$i - 1]; |
| 166 | } |
| 167 | return $prefixSum; |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * Construct the split groups from the backtracking table. |
| 172 | * @throws SuiteSplittingException |
| 173 | */ |
| 174 | private function constructSplitGroups( |
| 175 | int $numGroups, |
| 176 | int $numChunks, |
| 177 | array $chunks, |
| 178 | array $backtrack, |
| 179 | array &$suites |
| 180 | ) { |
| 181 | for ( $splitGroupId = $numGroups, $i = $numChunks; $splitGroupId > 0; --$splitGroupId ) { |
| 182 | $startIndex = $backtrack[$i][$splitGroupId]; |
| 183 | if ( $startIndex === -1 ) { |
| 184 | throw new SuiteSplittingException( "Invalid backtracking index building group " . $splitGroupId ); |
| 185 | } |
| 186 | $suites[$splitGroupId - 1]["list"] = array_merge( |
| 187 | ...array_map( static fn ( $chunk ) => array_map( |
| 188 | static fn ( $test ) => $test->getFilename(), |
| 189 | $chunk["list"] |
| 190 | ), |
| 191 | array_slice( $chunks, $startIndex, $i - $startIndex ) |
| 192 | ) |
| 193 | ); |
| 194 | $suites[$splitGroupId - 1]["time"] = array_reduce( |
| 195 | array_slice( $chunks, $startIndex, $i - $startIndex ), |
| 196 | static fn ( $acc, $chunk ) => $acc + $chunk["time"], |
| 197 | 0 |
| 198 | ); |
| 199 | $i = $startIndex; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /** |
| 204 | * Find the distribution of group split points that minimises the duration of the largest split_group |
| 205 | * and thereby minimises the duration of the CI job. |
| 206 | */ |
| 207 | private function generateBacktrackingTable( int $numChunks, int $numGroups, array $prefixSum ): array { |
| 208 | // $minimumLargestBucket[x][y] is the minimum possible largest split_group duration when splitting |
| 209 | // the first x chunks into y groups |
| 210 | $minimumLargestBucket = array_fill( 0, $numChunks + 1, array_fill( 0, $numGroups + 1, PHP_INT_MAX ) ); |
| 211 | // The backtracking table keeps track of the end point of the last group of the optimal distribution |
| 212 | $backtrack = array_fill( 0, $numChunks + 1, array_fill( 0, $numGroups + 1, -1 ) ); |
| 213 | |
| 214 | $minimumLargestBucket[0][0] = 0; |
| 215 | |
| 216 | // We work inductively, starting with distributing 1 chunk into 1 split_group |
| 217 | // and progressively distributing more tests until all chunks are in a split_group |
| 218 | for ( $i = 1; $i <= $numChunks; $i++ ) { |
| 219 | // For $i chunks, split them into up to $numGroups groups by identifying the optimal split points |
| 220 | for ( $j = 1; $j <= min( $numGroups, $i ); $j++ ) { |
| 221 | // For each split group $j find a split point $k, somewhere before the current chunk |
| 222 | for ( $k = 0; $k < $i; $k++ ) { |
| 223 | // the difference of the prefix sums is the combined duration of chunks $k through $i |
| 224 | $currentSplitGroupDuration = $prefixSum[$i] - $prefixSum[$k]; |
| 225 | // Compare the duration of the proposed split group with the minimum duration we found so far |
| 226 | // for splitting $k tests into $j-1 groups. |
| 227 | $newBestCaseMinimumLargestBucket = max( |
| 228 | $minimumLargestBucket[$k][$j - 1], $currentSplitGroupDuration |
| 229 | ); |
| 230 | // If our current duration is smaller, we know that we can split $k tests into $j groups without |
| 231 | // increasing the minimum duration. If it is greater, we know that putting a split point at $k would |
| 232 | // make the minimum duration of any group at least $currentSplitGroupDuration. |
| 233 | if ( $newBestCaseMinimumLargestBucket < $minimumLargestBucket[$i][$j] ) { |
| 234 | // If the new duration is less than the existing minimum for splitting $i tests into $j groups, |
| 235 | // we update the minimum duration. |
| 236 | $minimumLargestBucket[$i][$j] = $newBestCaseMinimumLargestBucket; |
| 237 | // Set the backtrack reference so that we know where the split point was for this minimal value. |
| 238 | $backtrack[$i][$j] = $k; |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | } |
| 243 | return $backtrack; |
| 244 | } |
| 245 | |
| 246 | } |