Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
91 / 91 |
|
100.00% |
13 / 13 |
CRAP | |
100.00% |
1 / 1 |
SimpleKeywordFeature | |
100.00% |
91 / 91 |
|
100.00% |
13 / 13 |
28 | |
100.00% |
1 / 1 |
getKeywords | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getKeywordPrefixes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
allowEmptyValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
greedy | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
queryHeader | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFeatureName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getValueDelimiters | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parseValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCrossSearchStrategy | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
expand | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getValueRegex | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
6 | |||
doApply | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
doApplyExtended | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
apply | |
100.00% |
57 / 57 |
|
100.00% |
1 / 1 |
11 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Query; |
4 | |
5 | use CirrusSearch\CrossSearchStrategy; |
6 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
7 | use CirrusSearch\Search\SearchContext; |
8 | use CirrusSearch\SearchConfig; |
9 | use CirrusSearch\WarningCollector; |
10 | use Wikimedia\Assert\Assert; |
11 | |
12 | /** |
13 | * Implements abstract handling of keyword features that are composed of a |
14 | * keyword followed by a colon then an optionally quoted value. For consistency |
15 | * most query features should be implemented this way using the default |
16 | * getValueRegex() where possible. |
17 | */ |
18 | abstract class SimpleKeywordFeature implements KeywordFeature { |
19 | /** |
20 | * NOTE: will be removed once all implementations implement getKeywordStrings |
21 | * (transitional state to change the visibility of getKeywords()) |
22 | * @return string[] The list of keywords this feature is supposed to match |
23 | */ |
24 | abstract protected function getKeywords(); |
25 | |
26 | /** |
27 | * @return string[] |
28 | */ |
29 | public function getKeywordPrefixes() { |
30 | return $this->getKeywords(); |
31 | } |
32 | |
33 | /** |
34 | * Whether this keyword allows empty value. |
35 | * @return bool true to allow the keyword to appear in an empty form |
36 | */ |
37 | public function allowEmptyValue() { |
38 | return false; |
39 | } |
40 | |
41 | /** |
42 | * Whether this keyword can have a value |
43 | * @return bool |
44 | */ |
45 | public function hasValue() { |
46 | return true; |
47 | } |
48 | |
49 | /** |
50 | * Whether this keyword is greedy consuming the rest of the string. |
51 | * NOTE: do not override, greedy keywords will eventually be removed in the future |
52 | * @return bool |
53 | */ |
54 | public function greedy() { |
55 | return false; |
56 | } |
57 | |
58 | /** |
59 | * Whether this keyword can appear only at the beginning of the query |
60 | * (excluding spaces) |
61 | * @return bool |
62 | */ |
63 | public function queryHeader() { |
64 | return false; |
65 | } |
66 | |
67 | /** |
68 | * Determine the name of the feature being set in SearchContext::addSyntaxUsed |
69 | * Defaults to $key |
70 | * |
71 | * @param string $key |
72 | * @param string $valueDelimiter the delimiter used to wrap the value |
73 | * @return string |
74 | * '"' when parsing keyword:"test" |
75 | * '' when parsing keyword:test |
76 | */ |
77 | public function getFeatureName( $key, $valueDelimiter ) { |
78 | return $key; |
79 | } |
80 | |
81 | /** |
82 | * List of value delimiters supported (must be an array of single byte char) |
83 | * @return string[][] list of delimiters options |
84 | */ |
85 | public function getValueDelimiters() { |
86 | return [ [ 'delimiter' => '"' ] ]; |
87 | } |
88 | |
89 | /** |
90 | * Parse the value of the keyword. |
91 | * |
92 | * @param string $key |
93 | * @param string $value |
94 | * @param string $quotedValue |
95 | * @param string $valueDelimiter |
96 | * @param string $suffix |
97 | * @param WarningCollector $warningCollector |
98 | * @return array|null|false null when nothing is to be kept, false when the value is refused |
99 | * (only allowed for keywords that allows empty value) |
100 | * @see self::allowEmptyValue |
101 | */ |
102 | public function parseValue( |
103 | $key, |
104 | $value, |
105 | $quotedValue, |
106 | $valueDelimiter, |
107 | $suffix, |
108 | WarningCollector $warningCollector |
109 | ) { |
110 | return null; |
111 | } |
112 | |
113 | /** |
114 | * @param KeywordFeatureNode $node |
115 | * @return CrossSearchStrategy |
116 | */ |
117 | public function getCrossSearchStrategy( KeywordFeatureNode $node ) { |
118 | return CrossSearchStrategy::hostWikiOnlyStrategy(); |
119 | } |
120 | |
121 | /** |
122 | * @param KeywordFeatureNode $node |
123 | * @param SearchConfig $config |
124 | * @param WarningCollector $warningCollector |
125 | * @return array |
126 | */ |
127 | public function expand( |
128 | KeywordFeatureNode $node, |
129 | SearchConfig $config, |
130 | WarningCollector $warningCollector |
131 | ) { |
132 | return []; |
133 | } |
134 | |
135 | /** |
136 | * Captures either a quoted or unquoted string. Quoted strings may have |
137 | * escaped (\") quotes embedded in them. |
138 | * |
139 | * @return string A piece of a regular expression (not wrapped in //) that |
140 | * matches the acceptable values for this feature. Must contain quoted and |
141 | * unquoted capture groups. |
142 | */ |
143 | private function getValueRegex() { |
144 | Assert::invariant( $this->hasValue(), __METHOD__ . ' called but hasValue() is false' ); |
145 | if ( $this->greedy() ) { |
146 | Assert::precondition( !$this->allowEmptyValue(), "greedy keywords must not accept empty value" ); |
147 | // XXX: we ignore value delimiter for greedy keywords |
148 | Assert::precondition( $this->getValueDelimiters() === [ [ 'delimiter' => '"' ] ], |
149 | "getValueDelimiters() must not be overridden with greedy keywords" ); |
150 | // XXX: we send raw value to the keyword |
151 | return '(?<unquoted>.+)'; |
152 | } else { |
153 | $quantifier = $this->allowEmptyValue() ? '*' : '+'; |
154 | // Collect all quoted vlaue delimiter (usually only " but can be / for regexes) |
155 | $allDelims = ''; |
156 | $optionalSuffixes = []; |
157 | foreach ( $this->getValueDelimiters() as $delimConfig ) { |
158 | Assert::precondition( strlen( $delimConfig['delimiter'] ) === 1, |
159 | "Value delimiter must be a single byte char" ); |
160 | $delim = preg_quote( $delimConfig['delimiter'], '/' ); |
161 | $allDelims .= $delim; |
162 | if ( isset( $delimConfig['suffixes'] ) ) { |
163 | // Use lookbehind to only match the suffix if it was used with the proper delimiter |
164 | // i.e i should only be matched in /regex/i not "regex"i |
165 | $optionalSuffixes[] = "(?<=$delim)" . preg_quote( $delimConfig['suffixes'], '/' ); |
166 | } |
167 | } |
168 | $quotedValue = "(?<delim>[$allDelims])" . // Capture the delimiter used to use in backreferences |
169 | // use negative lookbehind to consume any char that is not the captured delimiter |
170 | // but also accept to escape the captured delimiter |
171 | "(?<quoted>(?:\\\\\g{delim}|(?!\g{delim}).)*)" . |
172 | "\g{delim}"; |
173 | if ( $optionalSuffixes ) { |
174 | $quotedValue .= "(?<suffixes>" . implode( '|', $optionalSuffixes ) . ')?'; |
175 | } |
176 | // XXX: we support only " to break the unquoted value |
177 | $unquotedValue = "(?<unquoted>[^\"\s]$quantifier)"; |
178 | return $quotedValue . '|' . $unquotedValue; |
179 | } |
180 | } |
181 | |
182 | /** |
183 | * Applies the detected keyword from the search term. May apply changes |
184 | * either to $context directly, or return a filter to be added. |
185 | * |
186 | * @param SearchContext $context |
187 | * @param string $key The keyword |
188 | * @param string $value The value attached to the keyword with quotes stripped and escaped |
189 | * quotes un-escaped. |
190 | * @param string $quotedValue The original value in the search string, including quotes if used |
191 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
192 | * that will be negated as necessary. Used for any other building/context necessary. |
193 | * @return array Two element array, first an AbstractQuery or null to apply to the |
194 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
195 | * string. |
196 | */ |
197 | abstract protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ); |
198 | |
199 | /** |
200 | * Fully featured apply method which delegates to doApply by default. |
201 | * |
202 | * @param SearchContext $context |
203 | * @param string $key The keyword |
204 | * @param string $value The value attached to the keyword with quotes stripped and escaped |
205 | * quotes un-escaped. |
206 | * @param string $quotedValue The original value in the search string, including quotes if used |
207 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
208 | * that will be negated as necessary. Used for any other building/context necessary. |
209 | * @param string $delimiter the delimiter char used to wrap the keyword value ('"' in intitle:"test") |
210 | * @param string $suffix the optional suffix used after the value ('i' in insource:/regex/i) |
211 | * @return array Two element array, first an AbstractQuery or null to apply to the |
212 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
213 | * string. |
214 | */ |
215 | public function doApplyExtended( |
216 | SearchContext $context, |
217 | $key, |
218 | $value, |
219 | $quotedValue, |
220 | $negated, |
221 | $delimiter, |
222 | $suffix |
223 | ) { |
224 | return $this->doApply( $context, $key, $value, $quotedValue, $negated ); |
225 | } |
226 | |
227 | /** |
228 | * @param SearchContext $context |
229 | * @param string $term Search query |
230 | * @return string Remaining search query |
231 | */ |
232 | public function apply( SearchContext $context, $term ) { |
233 | $keyListRegex = implode( |
234 | '|', |
235 | array_map( |
236 | static function ( $kw ) { |
237 | return preg_quote( $kw, '/' ); |
238 | }, |
239 | $this->getKeywords() |
240 | ) |
241 | ); |
242 | // Hook to the beginning allowing optional spaces if we are a queryHeader |
243 | // otherwise lookbehind allowing begin or space. |
244 | $begin = $this->queryHeader() ? '(?:^\s*)' : '(?<=^|\s)'; |
245 | $keywordRegex = '(?<key>-?(?:' . $keyListRegex . '))'; |
246 | $valueSideRegex = ''; |
247 | if ( $this->hasValue() ) { |
248 | $valueRegex = '(?<value>' . $this->getValueRegex() . ')'; |
249 | // If we allow empty values we don't allow spaces between |
250 | // the keyword and its value, a space would mean "empty value" |
251 | $spacesAfterSep = $this->allowEmptyValue() ? '' : '\s*'; |
252 | $valueSideRegex = "{$spacesAfterSep}{$valueRegex}\\s?"; |
253 | } |
254 | |
255 | $callback = function ( $match ) use ( $context ) { |
256 | $key = $match['key']; |
257 | Assert::invariant( $this->hasValue() === isset( $match['value'] ), 'a value must have matched' ); |
258 | $quotedValue = ''; |
259 | $value = ''; |
260 | $valueDelimiter = ''; |
261 | $valueSuffix = ''; |
262 | if ( $this->hasValue() ) { |
263 | $quotedValue = $match['value']; |
264 | if ( isset( $match["unquoted"] ) ) { |
265 | $value = $match["unquoted"]; |
266 | } else { |
267 | $valueDelimiter = $match['delim']; |
268 | $value = str_replace( "\\$valueDelimiter", $valueDelimiter, $match["quoted"] ); |
269 | } |
270 | if ( isset( $match["suffixes"] ) ) { |
271 | $valueSuffix = $match["suffixes"]; |
272 | $quotedValue = rtrim( $quotedValue, $valueSuffix ); |
273 | } |
274 | } |
275 | if ( $key[0] === '-' ) { |
276 | $negated = true; |
277 | $key = substr( $key, 1 ); |
278 | } else { |
279 | $negated = false; |
280 | } |
281 | |
282 | $context->addSyntaxUsed( $this->getFeatureName( $key, $valueDelimiter ) ); |
283 | [ $filter, $keepText ] = $this->doApplyExtended( |
284 | $context, |
285 | $key, |
286 | $value, |
287 | $quotedValue, |
288 | $negated, |
289 | $valueDelimiter, |
290 | $valueSuffix |
291 | ); |
292 | if ( $filter !== null ) { |
293 | if ( $negated ) { |
294 | $context->addNotFilter( $filter ); |
295 | } else { |
296 | $context->addFilter( $filter ); |
297 | } |
298 | } |
299 | // FIXME: this adds a trailing space if this is the last keyword |
300 | return $keepText ? "$quotedValue " : ''; |
301 | }; |
302 | |
303 | return preg_replace_callback( |
304 | "/{$begin}{$keywordRegex}:{$valueSideRegex}/", |
305 | $callback, |
306 | $term |
307 | ); |
308 | } |
309 | } |