Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
105 / 105 |
|
100.00% |
14 / 14 |
CRAP | |
100.00% |
1 / 1 |
| SimpleKeywordFeature | |
100.00% |
105 / 105 |
|
100.00% |
14 / 14 |
32 | |
100.00% |
1 / 1 |
| getKeywords | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| getKeywordPrefixes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| allowEmptyValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| hasValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| greedy | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| queryHeader | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getFeatureName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getValueDelimiters | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| parseValue | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getCrossSearchStrategy | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| expand | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getValueRegex | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
6 | |||
| doApply | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| doApplyExtended | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| apply | |
100.00% |
57 / 57 |
|
100.00% |
1 / 1 |
11 | |||
| parseBoost | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
4 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Query; |
| 4 | |
| 5 | use CirrusSearch\CrossSearchStrategy; |
| 6 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
| 7 | use CirrusSearch\Search\SearchContext; |
| 8 | use CirrusSearch\SearchConfig; |
| 9 | use CirrusSearch\WarningCollector; |
| 10 | use MediaWiki\Message\Message; |
| 11 | use Wikimedia\Assert\Assert; |
| 12 | |
| 13 | /** |
| 14 | * Implements abstract handling of keyword features that are composed of a |
| 15 | * keyword followed by a colon then an optionally quoted value. For consistency |
| 16 | * most query features should be implemented this way using the default |
| 17 | * getValueRegex() where possible. |
| 18 | */ |
| 19 | abstract class SimpleKeywordFeature implements KeywordFeature { |
| 20 | public const WARN_MESSAGE_INVALID_BOOST = "cirrussearch-invalid-keyword-boost"; |
| 21 | |
| 22 | /** |
| 23 | * NOTE: will be removed once all implementations implement getKeywordStrings |
| 24 | * (transitional state to change the visibility of getKeywords()) |
| 25 | * @return string[] The list of keywords this feature is supposed to match |
| 26 | */ |
| 27 | abstract protected function getKeywords(); |
| 28 | |
| 29 | /** |
| 30 | * @return string[] |
| 31 | */ |
| 32 | public function getKeywordPrefixes() { |
| 33 | return $this->getKeywords(); |
| 34 | } |
| 35 | |
| 36 | /** |
| 37 | * Whether this keyword allows empty value. |
| 38 | * @return bool true to allow the keyword to appear in an empty form |
| 39 | */ |
| 40 | public function allowEmptyValue() { |
| 41 | return false; |
| 42 | } |
| 43 | |
| 44 | /** |
| 45 | * Whether this keyword can have a value |
| 46 | * @return bool |
| 47 | */ |
| 48 | public function hasValue() { |
| 49 | return true; |
| 50 | } |
| 51 | |
| 52 | /** |
| 53 | * Whether this keyword is greedy consuming the rest of the string. |
| 54 | * NOTE: do not override, greedy keywords will eventually be removed in the future |
| 55 | * @return bool |
| 56 | */ |
| 57 | public function greedy() { |
| 58 | return false; |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Whether this keyword can appear only at the beginning of the query |
| 63 | * (excluding spaces) |
| 64 | * @return bool |
| 65 | */ |
| 66 | public function queryHeader() { |
| 67 | return false; |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * Determine the name of the feature being set in SearchContext::addSyntaxUsed |
| 72 | * Defaults to $key |
| 73 | * |
| 74 | * @param string $key |
| 75 | * @param string $valueDelimiter the delimiter used to wrap the value |
| 76 | * @return string |
| 77 | * '"' when parsing keyword:"test" |
| 78 | * '' when parsing keyword:test |
| 79 | */ |
| 80 | public function getFeatureName( $key, $valueDelimiter ) { |
| 81 | return $key; |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * List of value delimiters supported (must be an array of single byte char) |
| 86 | * @return string[][] list of delimiters options |
| 87 | */ |
| 88 | public function getValueDelimiters() { |
| 89 | return [ [ 'delimiter' => '"' ] ]; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * Parse the value of the keyword. |
| 94 | * |
| 95 | * @param string $key |
| 96 | * @param string $value |
| 97 | * @param string $quotedValue |
| 98 | * @param string $valueDelimiter |
| 99 | * @param string $suffix |
| 100 | * @param WarningCollector $warningCollector |
| 101 | * @return array|null|false null when nothing is to be kept, false when the value is refused |
| 102 | * (only allowed for keywords that allows empty value) |
| 103 | * @see self::allowEmptyValue |
| 104 | */ |
| 105 | public function parseValue( |
| 106 | $key, |
| 107 | $value, |
| 108 | $quotedValue, |
| 109 | $valueDelimiter, |
| 110 | $suffix, |
| 111 | WarningCollector $warningCollector |
| 112 | ) { |
| 113 | return null; |
| 114 | } |
| 115 | |
| 116 | /** |
| 117 | * @param KeywordFeatureNode $node |
| 118 | * @return CrossSearchStrategy |
| 119 | */ |
| 120 | public function getCrossSearchStrategy( KeywordFeatureNode $node ) { |
| 121 | return CrossSearchStrategy::hostWikiOnlyStrategy(); |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * @param KeywordFeatureNode $node |
| 126 | * @param SearchConfig $config |
| 127 | * @param WarningCollector $warningCollector |
| 128 | * @return array |
| 129 | */ |
| 130 | public function expand( |
| 131 | KeywordFeatureNode $node, |
| 132 | SearchConfig $config, |
| 133 | WarningCollector $warningCollector |
| 134 | ) { |
| 135 | return []; |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * Captures either a quoted or unquoted string. Quoted strings may have |
| 140 | * escaped (\") quotes embedded in them. |
| 141 | * |
| 142 | * @return string A piece of a regular expression (not wrapped in //) that |
| 143 | * matches the acceptable values for this feature. Must contain quoted and |
| 144 | * unquoted capture groups. |
| 145 | */ |
| 146 | private function getValueRegex() { |
| 147 | Assert::invariant( $this->hasValue(), __METHOD__ . ' called but hasValue() is false' ); |
| 148 | if ( $this->greedy() ) { |
| 149 | Assert::precondition( !$this->allowEmptyValue(), "greedy keywords must not accept empty value" ); |
| 150 | // XXX: we ignore value delimiter for greedy keywords |
| 151 | Assert::precondition( $this->getValueDelimiters() === [ [ 'delimiter' => '"' ] ], |
| 152 | "getValueDelimiters() must not be overridden with greedy keywords" ); |
| 153 | // XXX: we send raw value to the keyword |
| 154 | return '(?<unquoted>.+)'; |
| 155 | } else { |
| 156 | $quantifier = $this->allowEmptyValue() ? '*' : '+'; |
| 157 | // Collect all quoted vlaue delimiter (usually only " but can be / for regexes) |
| 158 | $allDelims = ''; |
| 159 | $optionalSuffixes = []; |
| 160 | foreach ( $this->getValueDelimiters() as $delimConfig ) { |
| 161 | Assert::precondition( strlen( $delimConfig['delimiter'] ) === 1, |
| 162 | "Value delimiter must be a single byte char" ); |
| 163 | $delim = preg_quote( $delimConfig['delimiter'], '/' ); |
| 164 | $allDelims .= $delim; |
| 165 | if ( isset( $delimConfig['suffixes'] ) ) { |
| 166 | // Use lookbehind to only match the suffix if it was used with the proper delimiter |
| 167 | // i.e i should only be matched in /regex/i not "regex"i |
| 168 | $optionalSuffixes[] = "(?<=$delim)" . preg_quote( $delimConfig['suffixes'], '/' ); |
| 169 | } |
| 170 | } |
| 171 | $quotedValue = "(?<delim>[$allDelims])" . // Capture the delimiter used to use in backreferences |
| 172 | // use negative lookbehind to consume any char that is not the captured delimiter |
| 173 | // but also accept to escape the captured delimiter |
| 174 | "(?<quoted>(?:\\\\\g{delim}|(?!\g{delim}).)*)" . |
| 175 | "\g{delim}"; |
| 176 | if ( $optionalSuffixes ) { |
| 177 | $quotedValue .= "(?<suffixes>" . implode( '|', $optionalSuffixes ) . ')?'; |
| 178 | } |
| 179 | // XXX: we support only " to break the unquoted value |
| 180 | $unquotedValue = "(?<unquoted>[^\"\s]$quantifier)"; |
| 181 | return $quotedValue . '|' . $unquotedValue; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | /** |
| 186 | * Applies the detected keyword from the search term. May apply changes |
| 187 | * either to $context directly, or return a filter to be added. |
| 188 | * |
| 189 | * @param SearchContext $context |
| 190 | * @param string $key The keyword |
| 191 | * @param string $value The value attached to the keyword with quotes stripped and escaped |
| 192 | * quotes un-escaped. |
| 193 | * @param string $quotedValue The original value in the search string, including quotes if used |
| 194 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
| 195 | * that will be negated as necessary. Used for any other building/context necessary. |
| 196 | * @return array Two element array, first an AbstractQuery or null to apply to the |
| 197 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
| 198 | * string. |
| 199 | */ |
| 200 | abstract protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ); |
| 201 | |
| 202 | /** |
| 203 | * Fully featured apply method which delegates to doApply by default. |
| 204 | * |
| 205 | * @param SearchContext $context |
| 206 | * @param string $key The keyword |
| 207 | * @param string $value The value attached to the keyword with quotes stripped and escaped |
| 208 | * quotes un-escaped. |
| 209 | * @param string $quotedValue The original value in the search string, including quotes if used |
| 210 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
| 211 | * that will be negated as necessary. Used for any other building/context necessary. |
| 212 | * @param string $delimiter the delimiter char used to wrap the keyword value ('"' in intitle:"test") |
| 213 | * @param string $suffix the optional suffix used after the value ('i' in insource:/regex/i) |
| 214 | * @return array Two element array, first an AbstractQuery or null to apply to the |
| 215 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
| 216 | * string. |
| 217 | */ |
| 218 | public function doApplyExtended( |
| 219 | SearchContext $context, |
| 220 | $key, |
| 221 | $value, |
| 222 | $quotedValue, |
| 223 | $negated, |
| 224 | $delimiter, |
| 225 | $suffix |
| 226 | ) { |
| 227 | return $this->doApply( $context, $key, $value, $quotedValue, $negated ); |
| 228 | } |
| 229 | |
| 230 | /** |
| 231 | * @param SearchContext $context |
| 232 | * @param string $term Search query |
| 233 | * @return string Remaining search query |
| 234 | */ |
| 235 | public function apply( SearchContext $context, $term ) { |
| 236 | $keyListRegex = implode( |
| 237 | '|', |
| 238 | array_map( |
| 239 | static function ( $kw ) { |
| 240 | return preg_quote( $kw, '/' ); |
| 241 | }, |
| 242 | $this->getKeywords() |
| 243 | ) |
| 244 | ); |
| 245 | // Hook to the beginning allowing optional spaces if we are a queryHeader |
| 246 | // otherwise lookbehind allowing begin or space. |
| 247 | $begin = $this->queryHeader() ? '(?:^\s*)' : '(?<=^|\s)'; |
| 248 | $keywordRegex = '(?<key>-?(?:' . $keyListRegex . '))'; |
| 249 | $valueSideRegex = ''; |
| 250 | if ( $this->hasValue() ) { |
| 251 | $valueRegex = '(?<value>' . $this->getValueRegex() . ')'; |
| 252 | // If we allow empty values we don't allow spaces between |
| 253 | // the keyword and its value, a space would mean "empty value" |
| 254 | $spacesAfterSep = $this->allowEmptyValue() ? '' : '\s*'; |
| 255 | $valueSideRegex = "{$spacesAfterSep}{$valueRegex}\\s?"; |
| 256 | } |
| 257 | |
| 258 | $callback = function ( $match ) use ( $context ) { |
| 259 | $key = $match['key']; |
| 260 | Assert::invariant( $this->hasValue() === isset( $match['value'] ), 'a value must have matched' ); |
| 261 | $quotedValue = ''; |
| 262 | $value = ''; |
| 263 | $valueDelimiter = ''; |
| 264 | $valueSuffix = ''; |
| 265 | if ( $this->hasValue() ) { |
| 266 | $quotedValue = $match['value']; |
| 267 | if ( isset( $match["unquoted"] ) ) { |
| 268 | $value = $match["unquoted"]; |
| 269 | } else { |
| 270 | $valueDelimiter = $match['delim']; |
| 271 | $value = str_replace( "\\$valueDelimiter", $valueDelimiter, $match["quoted"] ); |
| 272 | } |
| 273 | if ( isset( $match["suffixes"] ) ) { |
| 274 | $valueSuffix = $match["suffixes"]; |
| 275 | $quotedValue = rtrim( $quotedValue, $valueSuffix ); |
| 276 | } |
| 277 | } |
| 278 | if ( $key[0] === '-' ) { |
| 279 | $negated = true; |
| 280 | $key = substr( $key, 1 ); |
| 281 | } else { |
| 282 | $negated = false; |
| 283 | } |
| 284 | |
| 285 | $context->addSyntaxUsed( $this->getFeatureName( $key, $valueDelimiter ) ); |
| 286 | [ $filter, $keepText ] = $this->doApplyExtended( |
| 287 | $context, |
| 288 | $key, |
| 289 | $value, |
| 290 | $quotedValue, |
| 291 | $negated, |
| 292 | $valueDelimiter, |
| 293 | $valueSuffix |
| 294 | ); |
| 295 | if ( $filter !== null ) { |
| 296 | if ( $negated ) { |
| 297 | $context->addNotFilter( $filter ); |
| 298 | } else { |
| 299 | $context->addFilter( $filter ); |
| 300 | } |
| 301 | } |
| 302 | // FIXME: this adds a trailing space if this is the last keyword |
| 303 | return $keepText ? "$quotedValue " : ''; |
| 304 | }; |
| 305 | |
| 306 | return preg_replace_callback( |
| 307 | "/{$begin}{$keywordRegex}:{$valueSideRegex}/", |
| 308 | $callback, |
| 309 | $term |
| 310 | ); |
| 311 | } |
| 312 | |
| 313 | /** |
| 314 | * Parses boosted term: term^2.3 |
| 315 | * @param string $keyword |
| 316 | * @param WarningCollector $warningCollector |
| 317 | * @return array |
| 318 | * @phan-return array{term:string,boost:float|null} |
| 319 | */ |
| 320 | public function parseBoost( string $keyword, WarningCollector $warningCollector ): array { |
| 321 | $termAndBoost = explode( '^', $keyword, 2 ); |
| 322 | if ( count( $termAndBoost ) === 1 ) { |
| 323 | return [ 'term' => $termAndBoost[0], 'boost' => null ]; |
| 324 | } |
| 325 | [ $term, $boost ] = $termAndBoost; |
| 326 | if ( is_numeric( $boost ) ) { |
| 327 | $boost = floatval( $boost ); |
| 328 | if ( $boost < 0 ) { |
| 329 | $warningCollector->addWarning( self::WARN_MESSAGE_INVALID_BOOST, |
| 330 | Message::numParam( $boost ) ); |
| 331 | return [ 'term' => $term, 'boost' => null ]; |
| 332 | } |
| 333 | return [ 'term' => $term, 'boost' => $boost ]; |
| 334 | } else { |
| 335 | $warningCollector->addWarning( self::WARN_MESSAGE_INVALID_BOOST, |
| 336 | Message::plaintextParam( $boost ) ); |
| 337 | return [ 'term' => $term, 'boost' => null ]; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | } |