Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
96.59% |
85 / 88 |
|
81.25% |
13 / 16 |
CRAP | |
0.00% |
0 / 1 |
| ParsedQuery | |
96.59% |
85 / 88 |
|
81.25% |
13 / 16 |
38 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| getRoot | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getQuery | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getQueryWithoutNsHeader | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| getRawQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| hasCleanup | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getParseWarnings | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getNamespaceHeader | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getRequiredNamespaces | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getActualNamespaces | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
10 | |||
| getCrossSearchStrategy | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| isQueryOfClass | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| loadQueryClass | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| preloadQueryClasses | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| getFeaturesUsed | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
| toArray | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
7 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Parser\AST; |
| 4 | |
| 5 | use CirrusSearch\CrossSearchStrategy; |
| 6 | use CirrusSearch\Parser\AST\Visitor\KeywordNodeVisitor; |
| 7 | use CirrusSearch\Parser\ParsedQueryClassifiersRepository; |
| 8 | use Wikimedia\Assert\Assert; |
| 9 | |
| 10 | /** |
| 11 | * Parsed query |
| 12 | */ |
| 13 | class ParsedQuery { |
| 14 | |
| 15 | /** |
| 16 | * markup to indicate that the query was cleaned up |
| 17 | * detecting a double quote used as a gershayim |
| 18 | * see T66350 |
| 19 | */ |
| 20 | public const CLEANUP_GERSHAYIM_QUIRKS = 'gershayim_quirks'; |
| 21 | |
| 22 | /** |
| 23 | * markup to indicate that the had some question marks |
| 24 | * stripped |
| 25 | * @see \CirrusSearch\Util::stripQuestionMarks |
| 26 | */ |
| 27 | public const CLEANUP_QMARK_STRIPPING = 'stripped_qmark'; |
| 28 | |
| 29 | /** |
| 30 | * markup to indicate that we removed a '~' at the beginning of the query |
| 31 | */ |
| 32 | public const TILDE_HEADER = 'tilde_header'; |
| 33 | |
| 34 | /** |
| 35 | * @var ParsedNode |
| 36 | */ |
| 37 | private $root; |
| 38 | |
| 39 | /** |
| 40 | * @var string |
| 41 | */ |
| 42 | private $query; |
| 43 | |
| 44 | /** |
| 45 | * @var string |
| 46 | */ |
| 47 | private $rawQuery; |
| 48 | |
| 49 | /** |
| 50 | * @var bool[] indexed by cleanup type |
| 51 | */ |
| 52 | private $queryCleanups; |
| 53 | |
| 54 | /** |
| 55 | * @var ParseWarning[] |
| 56 | */ |
| 57 | private $parseWarnings; |
| 58 | |
| 59 | /** |
| 60 | * @var NamespaceHeaderNode|null namespace specified at the beginning of the query |
| 61 | */ |
| 62 | private $namespaceHeader; |
| 63 | |
| 64 | /** |
| 65 | * @var array|string (array of int or 'all') list of required namespaces |
| 66 | * for the query to be able to return results. |
| 67 | * This list of namespace must always be added no matter what is requested |
| 68 | * before. |
| 69 | * Main use-case is the prefix keyword that must supersede any other settings. |
| 70 | */ |
| 71 | private $requiredNamespaces; |
| 72 | |
| 73 | /** |
| 74 | * @var CrossSearchStrategy|null (lazy loaded) |
| 75 | */ |
| 76 | private $crossSearchStrategy; |
| 77 | |
| 78 | /** |
| 79 | * @var ParsedQueryClassifiersRepository |
| 80 | */ |
| 81 | private $classifierRepository; |
| 82 | |
| 83 | /** |
| 84 | * @var bool[] indexed by query class name |
| 85 | */ |
| 86 | private $queryClassCache = []; |
| 87 | |
| 88 | /** |
| 89 | * @var string[] list of used features in the query |
| 90 | * @see \CirrusSearch\Query\KeywordFeature::getFeatureName() |
| 91 | */ |
| 92 | private $featuresUsed; |
| 93 | |
| 94 | /** |
| 95 | * @param ParsedNode $root |
| 96 | * @param string $query cleaned up query string |
| 97 | * @param string $rawQuery original query as received by the search engine |
| 98 | * @param bool[] $queryCleanups indexed by cleanup type (non-empty when $query !== $rawQuery) |
| 99 | * @param ?NamespaceHeaderNode $namespaceHeader namespace found as a "header" of the query |
| 100 | * is a int when a namespace id is provided, string with 'all' or null if none specified |
| 101 | * @param array|string $requiredNamespaces |
| 102 | * @param ParseWarning[] $parseWarnings list of warnings detected during parsing |
| 103 | * @param ParsedQueryClassifiersRepository $repository |
| 104 | */ |
| 105 | public function __construct( |
| 106 | ParsedNode $root, |
| 107 | $query, |
| 108 | $rawQuery, |
| 109 | $queryCleanups, |
| 110 | ?NamespaceHeaderNode $namespaceHeader, |
| 111 | $requiredNamespaces, |
| 112 | array $parseWarnings, |
| 113 | ParsedQueryClassifiersRepository $repository |
| 114 | ) { |
| 115 | $this->root = $root; |
| 116 | $this->query = $query; |
| 117 | $this->rawQuery = $rawQuery; |
| 118 | $this->queryCleanups = $queryCleanups; |
| 119 | $this->parseWarnings = $parseWarnings; |
| 120 | $this->namespaceHeader = $namespaceHeader; |
| 121 | Assert::parameter( is_array( $requiredNamespaces ) || $requiredNamespaces === 'all', |
| 122 | '$requiredNamespaces', 'must be an array or "all"' ); |
| 123 | $this->requiredNamespaces = $requiredNamespaces; |
| 124 | $this->classifierRepository = $repository; |
| 125 | } |
| 126 | |
| 127 | /** |
| 128 | * @return ParsedNode |
| 129 | */ |
| 130 | public function getRoot() { |
| 131 | return $this->root; |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * The query being parsed |
| 136 | * Some cleanups may have been made to the raw query |
| 137 | * NOTE: the query may include the namespace header |
| 138 | * @return string |
| 139 | */ |
| 140 | public function getQuery() { |
| 141 | return $this->query; |
| 142 | } |
| 143 | |
| 144 | /** |
| 145 | * The query string without the namespace header |
| 146 | * @return string |
| 147 | */ |
| 148 | public function getQueryWithoutNsHeader() { |
| 149 | // TODO: remove once the AST is fully used by all building components. |
| 150 | if ( $this->namespaceHeader !== null ) { |
| 151 | return substr( $this->query, $this->namespaceHeader->getEndOffset() ); |
| 152 | } |
| 153 | return $this->query; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * The raw query as received by the search engine |
| 158 | * @return string |
| 159 | */ |
| 160 | public function getRawQuery() { |
| 161 | return $this->rawQuery; |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * Check if the query was cleanup with this type |
| 166 | * @see ParsedQuery::CLEANUP_QMARK_STRIPPING |
| 167 | * @see ParsedQuery::CLEANUP_GERSHAYIM_QUIRKS |
| 168 | * @param string $cleanup |
| 169 | * @return bool |
| 170 | */ |
| 171 | public function hasCleanup( $cleanup ) { |
| 172 | return isset( $this->queryCleanups[$cleanup] ); |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * List of warnings detected at parse time |
| 177 | * @return ParseWarning[] |
| 178 | */ |
| 179 | public function getParseWarnings() { |
| 180 | return $this->parseWarnings; |
| 181 | } |
| 182 | |
| 183 | /** |
| 184 | * Get the node of the namespace header identified in the prefix of the query |
| 185 | * if specified. |
| 186 | * It can be null in all other cases |
| 187 | * @return NamespaceHeaderNode|null |
| 188 | */ |
| 189 | public function getNamespaceHeader() { |
| 190 | return $this->namespaceHeader; |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * @return array|string array of additional namespaces or 'all' if all namespaces required |
| 195 | */ |
| 196 | public function getRequiredNamespaces() { |
| 197 | return $this->requiredNamespaces; |
| 198 | } |
| 199 | |
| 200 | /** |
| 201 | * Determine the actual namespaces required for this query to run |
| 202 | * assuming that $namespaces is the list of namespaces initially requested |
| 203 | * usually set <code>\SearchEngine::setNamespaces()</code>. |
| 204 | * |
| 205 | * @param int[]|null $namespaces initial namespaces |
| 206 | * @param int[]|null $additionalRequiredNamespaces additional namespaces required (by ContextualFilters) |
| 207 | * @return int[] the list of namespaces that have to be queried, |
| 208 | * empty array means all namespaces |
| 209 | * @see \SearchEngine::setNamespaces() |
| 210 | * @see self::getRequiredNamespaces() |
| 211 | * @see self::getNamespaceHeader() |
| 212 | * @see \CirrusSearch\Query\Builder\ContextualFilter::requiredNamespaces() |
| 213 | */ |
| 214 | public function getActualNamespaces( ?array $namespaces = null, ?array $additionalRequiredNamespaces = null ) { |
| 215 | if ( $this->requiredNamespaces === 'all' ) { |
| 216 | // e.g. prefix:all:foo (all namespaces must be queried no matter what is requested before |
| 217 | return []; |
| 218 | } |
| 219 | |
| 220 | if ( $additionalRequiredNamespaces === [] ) { |
| 221 | return []; |
| 222 | } |
| 223 | |
| 224 | if ( $this->namespaceHeader !== null && $this->namespaceHeader->getNamespace() === 'all' ) { |
| 225 | // e.g. all:foo |
| 226 | return []; |
| 227 | } |
| 228 | |
| 229 | if ( $this->namespaceHeader === null && !$namespaces ) { |
| 230 | // Everything was selected using SearchEngine::setNamespaces() but nothing more specific |
| 231 | // was requested using a prefixed ns |
| 232 | return []; |
| 233 | } |
| 234 | |
| 235 | // now everything else will be an explicit list of namespaces |
| 236 | Assert::postcondition( $this->namespaceHeader === null || is_int( $this->namespaceHeader->getNamespace() ), |
| 237 | '$this->namespaceHeader must be null or an integer' ); |
| 238 | |
| 239 | $ns = $this->namespaceHeader === null ? $namespaces : [ $this->namespaceHeader->getNamespace() ]; |
| 240 | Assert::postcondition( is_array( $ns ) && $ns !== [], |
| 241 | 'at this point we must have a list of specific namespaces' ); |
| 242 | |
| 243 | return array_values( array_unique( |
| 244 | // @phan-suppress-next-line PhanCoalescingNeverNull |
| 245 | array_merge( $ns, $this->requiredNamespaces, $additionalRequiredNamespaces ?? [] ), |
| 246 | SORT_REGULAR |
| 247 | ) ); |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * Get the cross search strategy supported by this query. |
| 252 | * |
| 253 | * @return CrossSearchStrategy |
| 254 | */ |
| 255 | public function getCrossSearchStrategy() { |
| 256 | if ( $this->crossSearchStrategy === null ) { |
| 257 | $visitor = new class() extends KeywordNodeVisitor { |
| 258 | public CrossSearchStrategy $strategy; |
| 259 | |
| 260 | public function __construct( array $excludeOccurs = [], array $keywordClasses = [] ) { |
| 261 | parent::__construct( $excludeOccurs, $keywordClasses ); |
| 262 | $this->strategy = CrossSearchStrategy::allWikisStrategy(); |
| 263 | } |
| 264 | |
| 265 | public function doVisitKeyword( KeywordFeatureNode $node ) { |
| 266 | $this->strategy = $this->strategy |
| 267 | ->intersect( $node->getKeyword()->getCrossSearchStrategy( $node ) ); |
| 268 | } |
| 269 | }; |
| 270 | $this->root->accept( $visitor ); |
| 271 | $this->crossSearchStrategy = $visitor->strategy; |
| 272 | } |
| 273 | return $this->crossSearchStrategy; |
| 274 | } |
| 275 | |
| 276 | /** |
| 277 | * @param string $class |
| 278 | * @return bool |
| 279 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException if the class is unknown |
| 280 | */ |
| 281 | public function isQueryOfClass( $class ) { |
| 282 | return $this->queryClassCache[$class] ?? $this->loadQueryClass( $class ); |
| 283 | } |
| 284 | |
| 285 | /** |
| 286 | * @param string $class |
| 287 | * @return bool |
| 288 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException |
| 289 | */ |
| 290 | private function loadQueryClass( $class ) { |
| 291 | $classifier = $this->classifierRepository->getClassifier( $class ); |
| 292 | $newClasses = $classifier->classify( $this ); |
| 293 | foreach ( $classifier->classes() as $k ) { |
| 294 | $this->queryClassCache[$k] = in_array( $k, $newClasses, true ); |
| 295 | } |
| 296 | return $this->queryClassCache[$class]; |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * Preload all known query classes and classify this |
| 301 | * query. |
| 302 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException |
| 303 | */ |
| 304 | public function preloadQueryClasses() { |
| 305 | foreach ( $this->classifierRepository->getKnownClassifiers() as $class ) { |
| 306 | $this->isQueryOfClass( $class ); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | /** |
| 311 | * Get the list of keyword features used by this query. |
| 312 | * @see \CirrusSearch\Query\KeywordFeature::getFeatureName() |
| 313 | * @return string[] |
| 314 | */ |
| 315 | public function getFeaturesUsed() { |
| 316 | if ( $this->featuresUsed === null ) { |
| 317 | $visitor = new class() extends KeywordNodeVisitor { |
| 318 | public array $features = []; |
| 319 | |
| 320 | public function doVisitKeyword( KeywordFeatureNode $node ) { |
| 321 | $name = $node->getKeyword() |
| 322 | ->getFeatureName( $node->getKey(), $node->getDelimiter() ); |
| 323 | $this->features[$name] = true; |
| 324 | } |
| 325 | }; |
| 326 | $this->root->accept( $visitor ); |
| 327 | $this->featuresUsed = array_keys( $visitor->features ); |
| 328 | if ( $this->namespaceHeader ) { |
| 329 | $this->featuresUsed[] = 'namespace_header'; |
| 330 | } |
| 331 | } |
| 332 | return $this->featuresUsed; |
| 333 | } |
| 334 | |
| 335 | /** |
| 336 | * @return array |
| 337 | */ |
| 338 | public function toArray() { |
| 339 | $ar = [ |
| 340 | 'query' => $this->query, |
| 341 | 'rawQuery' => $this->rawQuery |
| 342 | ]; |
| 343 | |
| 344 | if ( $this->namespaceHeader !== null ) { |
| 345 | $ar += $this->namespaceHeader->toArray(); |
| 346 | } |
| 347 | if ( $this->requiredNamespaces !== [] ) { |
| 348 | $ar['requiredNamespaces'] = $this->requiredNamespaces; |
| 349 | } |
| 350 | if ( $this->queryCleanups !== [] ) { |
| 351 | $ar['queryCleanups'] = $this->queryCleanups; |
| 352 | } |
| 353 | $this->preloadQueryClasses(); |
| 354 | $classes = array_keys( array_filter( $this->queryClassCache ) ); |
| 355 | if ( $classes !== [] ) { |
| 356 | $ar['queryClassCache'] = $classes; |
| 357 | } |
| 358 | if ( $this->parseWarnings !== [] ) { |
| 359 | $ar['warnings'] = array_map( static function ( ParseWarning $w ) { |
| 360 | return $w->toArray(); |
| 361 | }, $this->parseWarnings ); |
| 362 | } |
| 363 | if ( $this->getFeaturesUsed() !== [] ) { |
| 364 | $ar['featuresUsed'] = $this->getFeaturesUsed(); |
| 365 | } |
| 366 | $ar['root'] = $this->getRoot()->toArray(); |
| 367 | |
| 368 | return $ar; |
| 369 | } |
| 370 | } |