Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.12% |
64 / 68 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
| PathMatcher | |
94.12% |
64 / 68 |
|
71.43% |
5 / 7 |
31.20 | |
0.00% |
0 / 1 |
| newFromCache | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| getCacheData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isParam | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
3 | |||
| getParamName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| findConflict | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
10 | |||
| add | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
9 | |||
| match | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
5 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Rest\PathTemplateMatcher; |
| 4 | |
| 5 | /** |
| 6 | * A tree-based path routing algorithm. |
| 7 | * |
| 8 | * This container builds defined routing templates into a tree, allowing |
| 9 | * paths to be efficiently matched against all templates. The match time is |
| 10 | * independent of the number of registered path templates. |
| 11 | * |
| 12 | * Efficient matching comes at the cost of a potentially significant setup time. |
| 13 | * We measured ~10ms for 1000 templates. Using getCacheData() and |
| 14 | * newFromCache(), this setup time may be amortized over multiple requests. |
| 15 | */ |
| 16 | class PathMatcher { |
| 17 | /** |
| 18 | * An array of trees indexed by the number of path components in the input. |
| 19 | * |
| 20 | * A tree node consists of an associative array in which the key is a match |
| 21 | * specifier string, and the value is another node. A leaf node, which is |
| 22 | * identifiable by its fixed depth in the tree, consists of an associative |
| 23 | * array with the following keys: |
| 24 | * - template: The path template string |
| 25 | * - paramNames: A list of parameter names extracted from the template |
| 26 | * - userData: The user data supplied to add() |
| 27 | * |
| 28 | * A match specifier string may be either "*", which matches any path |
| 29 | * component, or a literal string prefixed with "=", which matches the |
| 30 | * specified deprefixed string literal. |
| 31 | * |
| 32 | * @var array |
| 33 | */ |
| 34 | private $treesByLength = []; |
| 35 | |
| 36 | /** |
| 37 | * Create a PathMatcher from cache data |
| 38 | * |
| 39 | * @param array $data The data array previously returned by getCacheData() |
| 40 | * @return PathMatcher |
| 41 | */ |
| 42 | public static function newFromCache( $data ) { |
| 43 | $matcher = new self; |
| 44 | $matcher->treesByLength = $data; |
| 45 | return $matcher; |
| 46 | } |
| 47 | |
| 48 | /** |
| 49 | * Get a data array for later use by newFromCache(). |
| 50 | * |
| 51 | * The internal format is private to PathMatcher, but note that it includes |
| 52 | * any data passed as $userData to add(). The array returned will be |
| 53 | * serializable as long as all $userData values are serializable. |
| 54 | * |
| 55 | * @return array |
| 56 | */ |
| 57 | public function getCacheData() { |
| 58 | return $this->treesByLength; |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Determine whether a path template component is a parameter |
| 63 | * |
| 64 | * @param string $part |
| 65 | * @return bool |
| 66 | */ |
| 67 | private function isParam( $part ) { |
| 68 | $partLength = strlen( $part ); |
| 69 | return $partLength > 2 && $part[0] === '{' && $part[$partLength - 1] === '}'; |
| 70 | } |
| 71 | |
| 72 | /** |
| 73 | * If a path template component is a parameter, return the parameter name. |
| 74 | * Otherwise, return false. |
| 75 | * |
| 76 | * @param string $part |
| 77 | * @return string|false |
| 78 | */ |
| 79 | private function getParamName( $part ) { |
| 80 | if ( $this->isParam( $part ) ) { |
| 81 | return substr( $part, 1, -1 ); |
| 82 | } else { |
| 83 | return false; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | /** |
| 88 | * Recursively search the match tree, checking whether the proposed path |
| 89 | * template, passed as an array of component parts, can be added to the |
| 90 | * matcher without ambiguity. |
| 91 | * |
| 92 | * Ambiguity means that a path exists which matches multiple templates. |
| 93 | * |
| 94 | * The function calls itself recursively, incrementing $index so as to |
| 95 | * ignore a prefix of the input, in order to check deeper parts of the |
| 96 | * match tree. |
| 97 | * |
| 98 | * If a conflict is discovered, the conflicting leaf node is returned. |
| 99 | * Otherwise, false is returned. |
| 100 | * |
| 101 | * @param array $node The tree node to check against |
| 102 | * @param string[] $parts The array of path template parts |
| 103 | * @param int $index The current index into $parts |
| 104 | * @return array|false |
| 105 | */ |
| 106 | private function findConflict( $node, $parts, $index = 0 ) { |
| 107 | if ( $index >= count( $parts ) ) { |
| 108 | // If we reached the leaf node then a conflict is detected |
| 109 | return $node; |
| 110 | } |
| 111 | $part = $parts[$index]; |
| 112 | $result = false; |
| 113 | |
| 114 | if ( $this->isParam( $part ) ) { |
| 115 | foreach ( $node as $childNode ) { |
| 116 | // Params and tree entries for trailing empty slashes ('=') do not conflict |
| 117 | if ( !isset( $node['='] ) ) { |
| 118 | $result = $this->findConflict( $childNode, $parts, $index + 1 ); |
| 119 | if ( $result !== false ) { |
| 120 | break; |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | } else { |
| 125 | if ( isset( $node["=$part"] ) ) { |
| 126 | $result = $this->findConflict( $node["=$part"], $parts, $index + 1 ); |
| 127 | } |
| 128 | |
| 129 | // Trailing empty slashes (aka an empty $part) do not conflict with params |
| 130 | if ( $result === false && isset( $node['*'] ) && $part !== '' ) { |
| 131 | $result = $this->findConflict( $node['*'], $parts, $index + 1 ); |
| 132 | } |
| 133 | } |
| 134 | return $result; |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * Add a template to the matcher. |
| 139 | * |
| 140 | * The path template consists of components separated by "/". Each component |
| 141 | * may be either a parameter of the form {paramName}, or a literal string. |
| 142 | * A parameter matches any input path component, whereas a literal string |
| 143 | * matches itself. |
| 144 | * |
| 145 | * Path templates must not conflict with each other, that is, any input |
| 146 | * path must match at most one path template. If a path template conflicts |
| 147 | * with another already registered, this function throws a PathConflict |
| 148 | * exception. |
| 149 | * |
| 150 | * @param string $template The path template |
| 151 | * @param mixed $userData User data used to identify the matched route to |
| 152 | * the caller of match() |
| 153 | * @throws PathConflict|PathSegmentException |
| 154 | */ |
| 155 | public function add( $template, $userData ) { |
| 156 | // This will produce an empty part before any leading slash and after any trailing slash |
| 157 | $parts = explode( '/', $template ); |
| 158 | $length = count( $parts ); |
| 159 | if ( !isset( $this->treesByLength[$length] ) ) { |
| 160 | $this->treesByLength[$length] = []; |
| 161 | } |
| 162 | $tree =& $this->treesByLength[$length]; |
| 163 | |
| 164 | // Disallow empty path parts within the path (but not leading or trailing). |
| 165 | for ( $i = 1; $i < $length - 1; $i++ ) { |
| 166 | if ( $parts[$i] == '' ) { |
| 167 | throw new PathSegmentException( $template, $userData ); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | $conflict = $this->findConflict( $tree, $parts ); |
| 172 | if ( $conflict !== false ) { |
| 173 | throw new PathConflict( $template, $userData, $conflict ); |
| 174 | } |
| 175 | |
| 176 | $params = []; |
| 177 | foreach ( $parts as $index => $part ) { |
| 178 | $paramName = $this->getParamName( $part ); |
| 179 | if ( $paramName !== false ) { |
| 180 | $params[] = $paramName; |
| 181 | $key = '*'; |
| 182 | } else { |
| 183 | $key = "=$part"; |
| 184 | } |
| 185 | if ( $index === $length - 1 ) { |
| 186 | $tree[$key] = [ |
| 187 | 'template' => $template, |
| 188 | 'paramNames' => $params, |
| 189 | 'userData' => $userData |
| 190 | ]; |
| 191 | } elseif ( !isset( $tree[$key] ) ) { |
| 192 | $tree[$key] = []; |
| 193 | } |
| 194 | $tree =& $tree[$key]; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * Match a path against the current match trees. |
| 200 | * |
| 201 | * If the path matches a previously added path template, an array will be |
| 202 | * returned with the following keys: |
| 203 | * - params: An array mapping parameter names to their detected values |
| 204 | * - userData: The user data passed to add(), which identifies the route |
| 205 | * |
| 206 | * If the path does not match any template, false is returned. |
| 207 | * |
| 208 | * @param string $path |
| 209 | * @return array|false |
| 210 | */ |
| 211 | public function match( $path ) { |
| 212 | $parts = explode( '/', $path ); |
| 213 | $length = count( $parts ); |
| 214 | if ( !isset( $this->treesByLength[$length] ) ) { |
| 215 | return false; |
| 216 | } |
| 217 | $node = $this->treesByLength[$length]; |
| 218 | |
| 219 | $paramValues = []; |
| 220 | foreach ( $parts as $part ) { |
| 221 | if ( isset( $node["=$part"] ) ) { |
| 222 | $node = $node["=$part"]; |
| 223 | } elseif ( isset( $node['*'] ) ) { |
| 224 | $node = $node['*']; |
| 225 | $paramValues[] = $part; |
| 226 | } else { |
| 227 | return false; |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | return [ |
| 232 | 'params' => array_combine( $node['paramNames'], $paramValues ), |
| 233 | 'userData' => $node['userData'] |
| 234 | ]; |
| 235 | } |
| 236 | } |