Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
94.12% |
64 / 68 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
PathMatcher | |
94.12% |
64 / 68 |
|
71.43% |
5 / 7 |
31.20 | |
0.00% |
0 / 1 |
newFromCache | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getCacheData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParam | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
3 | |||
getParamName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
findConflict | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
10 | |||
add | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
9 | |||
match | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
5 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Rest\PathTemplateMatcher; |
4 | |
5 | /** |
6 | * A tree-based path routing algorithm. |
7 | * |
8 | * This container builds defined routing templates into a tree, allowing |
9 | * paths to be efficiently matched against all templates. The match time is |
10 | * independent of the number of registered path templates. |
11 | * |
12 | * Efficient matching comes at the cost of a potentially significant setup time. |
13 | * We measured ~10ms for 1000 templates. Using getCacheData() and |
14 | * newFromCache(), this setup time may be amortized over multiple requests. |
15 | */ |
16 | class PathMatcher { |
17 | /** |
18 | * An array of trees indexed by the number of path components in the input. |
19 | * |
20 | * A tree node consists of an associative array in which the key is a match |
21 | * specifier string, and the value is another node. A leaf node, which is |
22 | * identifiable by its fixed depth in the tree, consists of an associative |
23 | * array with the following keys: |
24 | * - template: The path template string |
25 | * - paramNames: A list of parameter names extracted from the template |
26 | * - userData: The user data supplied to add() |
27 | * |
28 | * A match specifier string may be either "*", which matches any path |
29 | * component, or a literal string prefixed with "=", which matches the |
30 | * specified deprefixed string literal. |
31 | * |
32 | * @var array |
33 | */ |
34 | private $treesByLength = []; |
35 | |
36 | /** |
37 | * Create a PathMatcher from cache data |
38 | * |
39 | * @param array $data The data array previously returned by getCacheData() |
40 | * @return PathMatcher |
41 | */ |
42 | public static function newFromCache( $data ) { |
43 | $matcher = new self; |
44 | $matcher->treesByLength = $data; |
45 | return $matcher; |
46 | } |
47 | |
48 | /** |
49 | * Get a data array for later use by newFromCache(). |
50 | * |
51 | * The internal format is private to PathMatcher, but note that it includes |
52 | * any data passed as $userData to add(). The array returned will be |
53 | * serializable as long as all $userData values are serializable. |
54 | * |
55 | * @return array |
56 | */ |
57 | public function getCacheData() { |
58 | return $this->treesByLength; |
59 | } |
60 | |
61 | /** |
62 | * Determine whether a path template component is a parameter |
63 | * |
64 | * @param string $part |
65 | * @return bool |
66 | */ |
67 | private function isParam( $part ) { |
68 | $partLength = strlen( $part ); |
69 | return $partLength > 2 && $part[0] === '{' && $part[$partLength - 1] === '}'; |
70 | } |
71 | |
72 | /** |
73 | * If a path template component is a parameter, return the parameter name. |
74 | * Otherwise, return false. |
75 | * |
76 | * @param string $part |
77 | * @return string|false |
78 | */ |
79 | private function getParamName( $part ) { |
80 | if ( $this->isParam( $part ) ) { |
81 | return substr( $part, 1, -1 ); |
82 | } else { |
83 | return false; |
84 | } |
85 | } |
86 | |
87 | /** |
88 | * Recursively search the match tree, checking whether the proposed path |
89 | * template, passed as an array of component parts, can be added to the |
90 | * matcher without ambiguity. |
91 | * |
92 | * Ambiguity means that a path exists which matches multiple templates. |
93 | * |
94 | * The function calls itself recursively, incrementing $index so as to |
95 | * ignore a prefix of the input, in order to check deeper parts of the |
96 | * match tree. |
97 | * |
98 | * If a conflict is discovered, the conflicting leaf node is returned. |
99 | * Otherwise, false is returned. |
100 | * |
101 | * @param array $node The tree node to check against |
102 | * @param string[] $parts The array of path template parts |
103 | * @param int $index The current index into $parts |
104 | * @return array|false |
105 | */ |
106 | private function findConflict( $node, $parts, $index = 0 ) { |
107 | if ( $index >= count( $parts ) ) { |
108 | // If we reached the leaf node then a conflict is detected |
109 | return $node; |
110 | } |
111 | $part = $parts[$index]; |
112 | $result = false; |
113 | |
114 | if ( $this->isParam( $part ) ) { |
115 | foreach ( $node as $childNode ) { |
116 | // Params and tree entries for trailing empty slashes ('=') do not conflict |
117 | if ( !isset( $node['='] ) ) { |
118 | $result = $this->findConflict( $childNode, $parts, $index + 1 ); |
119 | if ( $result !== false ) { |
120 | break; |
121 | } |
122 | } |
123 | } |
124 | } else { |
125 | if ( isset( $node["=$part"] ) ) { |
126 | $result = $this->findConflict( $node["=$part"], $parts, $index + 1 ); |
127 | } |
128 | |
129 | // Trailing empty slashes (aka an empty $part) do not conflict with params |
130 | if ( $result === false && isset( $node['*'] ) && $part !== '' ) { |
131 | $result = $this->findConflict( $node['*'], $parts, $index + 1 ); |
132 | } |
133 | } |
134 | return $result; |
135 | } |
136 | |
137 | /** |
138 | * Add a template to the matcher. |
139 | * |
140 | * The path template consists of components separated by "/". Each component |
141 | * may be either a parameter of the form {paramName}, or a literal string. |
142 | * A parameter matches any input path component, whereas a literal string |
143 | * matches itself. |
144 | * |
145 | * Path templates must not conflict with each other, that is, any input |
146 | * path must match at most one path template. If a path template conflicts |
147 | * with another already registered, this function throws a PathConflict |
148 | * exception. |
149 | * |
150 | * @param string $template The path template |
151 | * @param mixed $userData User data used to identify the matched route to |
152 | * the caller of match() |
153 | * @throws PathConflict|PathSegmentException |
154 | */ |
155 | public function add( $template, $userData ) { |
156 | // This will produce an empty part before any leading slash and after any trailing slash |
157 | $parts = explode( '/', $template ); |
158 | $length = count( $parts ); |
159 | if ( !isset( $this->treesByLength[$length] ) ) { |
160 | $this->treesByLength[$length] = []; |
161 | } |
162 | $tree =& $this->treesByLength[$length]; |
163 | |
164 | // Disallow empty path parts within the path (but not leading or trailing). |
165 | for ( $i = 1; $i < $length - 1; $i++ ) { |
166 | if ( $parts[$i] == '' ) { |
167 | throw new PathSegmentException( $template, $userData ); |
168 | } |
169 | } |
170 | |
171 | $conflict = $this->findConflict( $tree, $parts ); |
172 | if ( $conflict !== false ) { |
173 | throw new PathConflict( $template, $userData, $conflict ); |
174 | } |
175 | |
176 | $params = []; |
177 | foreach ( $parts as $index => $part ) { |
178 | $paramName = $this->getParamName( $part ); |
179 | if ( $paramName !== false ) { |
180 | $params[] = $paramName; |
181 | $key = '*'; |
182 | } else { |
183 | $key = "=$part"; |
184 | } |
185 | if ( $index === $length - 1 ) { |
186 | $tree[$key] = [ |
187 | 'template' => $template, |
188 | 'paramNames' => $params, |
189 | 'userData' => $userData |
190 | ]; |
191 | } elseif ( !isset( $tree[$key] ) ) { |
192 | $tree[$key] = []; |
193 | } |
194 | $tree =& $tree[$key]; |
195 | } |
196 | } |
197 | |
198 | /** |
199 | * Match a path against the current match trees. |
200 | * |
201 | * If the path matches a previously added path template, an array will be |
202 | * returned with the following keys: |
203 | * - params: An array mapping parameter names to their detected values |
204 | * - userData: The user data passed to add(), which identifies the route |
205 | * |
206 | * If the path does not match any template, false is returned. |
207 | * |
208 | * @param string $path |
209 | * @return array|false |
210 | */ |
211 | public function match( $path ) { |
212 | $parts = explode( '/', $path ); |
213 | $length = count( $parts ); |
214 | if ( !isset( $this->treesByLength[$length] ) ) { |
215 | return false; |
216 | } |
217 | $node = $this->treesByLength[$length]; |
218 | |
219 | $paramValues = []; |
220 | foreach ( $parts as $part ) { |
221 | if ( isset( $node["=$part"] ) ) { |
222 | $node = $node["=$part"]; |
223 | } elseif ( isset( $node['*'] ) ) { |
224 | $node = $node['*']; |
225 | $paramValues[] = $part; |
226 | } else { |
227 | return false; |
228 | } |
229 | } |
230 | |
231 | return [ |
232 | 'params' => array_combine( $node['paramNames'], $paramValues ), |
233 | 'userData' => $node['userData'] |
234 | ]; |
235 | } |
236 | } |