Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
81.25% |
13 / 16 |
CRAP | |
96.47% |
82 / 85 |
ParsedQuery | |
0.00% |
0 / 1 |
|
81.25% |
13 / 16 |
39 | |
96.47% |
82 / 85 |
__construct | |
100.00% |
1 / 1 |
3 | |
100.00% |
11 / 11 |
|||
getRoot | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
getQuery | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
getQueryWithoutNsHeader | |
0.00% |
0 / 1 |
2.15 | |
66.67% |
2 / 3 |
|||
getRawQuery | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
hasCleanup | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
getParseWarnings | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
getNamespaceHeader | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
getRequiredNamespaces | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
getActualNamespaces | |
100.00% |
1 / 1 |
10 | |
100.00% |
16 / 16 |
|||
getCrossSearchStrategy | |
100.00% |
1 / 1 |
2 | |
100.00% |
10 / 10 |
|||
isQueryOfClass | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
loadQueryClass | |
100.00% |
1 / 1 |
2 | |
100.00% |
5 / 5 |
|||
preloadQueryClasses | |
100.00% |
1 / 1 |
2 | |
100.00% |
3 / 3 |
|||
getFeaturesUsed | |
100.00% |
1 / 1 |
3 | |
100.00% |
10 / 10 |
|||
toArray | |
100.00% |
1 / 1 |
7 | |
100.00% |
19 / 19 |
<?php | |
namespace CirrusSearch\Parser\AST; | |
use CirrusSearch\CrossSearchStrategy; | |
use CirrusSearch\Parser\AST\Visitor\KeywordNodeVisitor; | |
use CirrusSearch\Parser\ParsedQueryClassifiersRepository; | |
use Wikimedia\Assert\Assert; | |
/** | |
* Parsed query | |
*/ | |
class ParsedQuery { | |
/** | |
* markup to indicate that the query was cleaned up | |
* detecting a double quote used as a gershayim | |
* see T66350 | |
*/ | |
public const CLEANUP_GERSHAYIM_QUIRKS = 'gershayim_quirks'; | |
/** | |
* markup to indicate that the had some question marks | |
* stripped | |
* @see \CirrusSearch\Util::stripQuestionMarks | |
*/ | |
public const CLEANUP_QMARK_STRIPPING = 'stripped_qmark'; | |
/** | |
* markup to indicate that we removed a '~' at the beginning of the query | |
*/ | |
public const TILDE_HEADER = 'tilde_header'; | |
/** | |
* @var ParsedNode | |
*/ | |
private $root; | |
/** | |
* @var string | |
*/ | |
private $query; | |
/** | |
* @var string | |
*/ | |
private $rawQuery; | |
/** | |
* @var bool[] indexed by cleanup type | |
*/ | |
private $queryCleanups; | |
/** | |
* @var ParseWarning[] | |
*/ | |
private $parseWarnings; | |
/** | |
* @var NamespaceHeaderNode|null namespace specified at the beginning of the query | |
*/ | |
private $namespaceHeader; | |
/** | |
* @var array|string (array of int or 'all') list of required namespaces | |
* for the query to be able to return results. | |
* This list of namespace must always be added no matter what is requested | |
* before. | |
* Main use-case is the prefix keyword that must supersede any other settings. | |
*/ | |
private $requiredNamespaces; | |
/** | |
* @var CrossSearchStrategy|null (lazy loaded) | |
*/ | |
private $crossSearchStrategy; | |
/** | |
* @var ParsedQueryClassifiersRepository | |
*/ | |
private $classifierRepository; | |
/** | |
* @var bool[] indexed by query class name | |
*/ | |
private $queryClassCache = []; | |
/** | |
* @var string[] list of used features in the query | |
* @see \CirrusSearch\Query\KeywordFeature::getFeatureName() | |
*/ | |
private $featuresUsed; | |
/** | |
* @param ParsedNode $root | |
* @param string $query cleaned up query string | |
* @param string $rawQuery original query as received by the search engine | |
* @param bool[] $queryCleanups indexed by cleanup type (non-empty when $query !== $rawQuery) | |
* @param ?NamespaceHeaderNode $namespaceHeader namespace found as a "header" of the query | |
* is a int when a namespace id is provided, string with 'all' or null if none specified | |
* @param array|string $requiredNamespaces | |
* @param ParseWarning[] $parseWarnings list of warnings detected during parsing | |
* @param ParsedQueryClassifiersRepository $repository | |
*/ | |
public function __construct( | |
ParsedNode $root, | |
$query, | |
$rawQuery, | |
$queryCleanups, | |
?NamespaceHeaderNode $namespaceHeader, | |
$requiredNamespaces, | |
array $parseWarnings, | |
ParsedQueryClassifiersRepository $repository | |
) { | |
$this->root = $root; | |
$this->query = $query; | |
$this->rawQuery = $rawQuery; | |
$this->queryCleanups = $queryCleanups; | |
$this->parseWarnings = $parseWarnings; | |
$this->namespaceHeader = $namespaceHeader; | |
Assert::parameter( is_array( $requiredNamespaces ) || $requiredNamespaces === 'all', | |
'$requiredNamespaces', 'must be an array or "all"' ); | |
$this->requiredNamespaces = $requiredNamespaces; | |
$this->classifierRepository = $repository; | |
} | |
/** | |
* @return ParsedNode | |
*/ | |
public function getRoot() { | |
return $this->root; | |
} | |
/** | |
* The query being parsed | |
* Some cleanups may have been made to the raw query | |
* NOTE: the query may include the namespace header | |
* @return string | |
*/ | |
public function getQuery() { | |
return $this->query; | |
} | |
/** | |
* The query string without the namespace header | |
* @return string | |
*/ | |
public function getQueryWithoutNsHeader() { | |
// TODO: remove once the AST is fully used by all building components. | |
if ( $this->namespaceHeader !== null ) { | |
return substr( $this->query, $this->namespaceHeader->getEndOffset() ); | |
} | |
return $this->query; | |
} | |
/** | |
* The raw query as received by the search engine | |
* @return string | |
*/ | |
public function getRawQuery() { | |
return $this->rawQuery; | |
} | |
/** | |
* Check if the query was cleanup with this type | |
* @see ParsedQuery::CLEANUP_QMARK_STRIPPING | |
* @see ParsedQuery::CLEANUP_GERSHAYIM_QUIRKS | |
* @param string $cleanup | |
* @return bool | |
*/ | |
public function hasCleanup( $cleanup ) { | |
return isset( $this->queryCleanups[$cleanup] ); | |
} | |
/** | |
* List of warnings detected at parse time | |
* @return ParseWarning[] | |
*/ | |
public function getParseWarnings() { | |
return $this->parseWarnings; | |
} | |
/** | |
* Get the node of the namespace header identified in the prefix of the query | |
* if specified. | |
* It can be null in all other cases | |
* @return NamespaceHeaderNode|null | |
*/ | |
public function getNamespaceHeader() { | |
return $this->namespaceHeader; | |
} | |
/** | |
* @return array|string array of additional namespaces or 'all' if all namespaces required | |
*/ | |
public function getRequiredNamespaces() { | |
return $this->requiredNamespaces; | |
} | |
/** | |
* Determine the actual namespaces required for this query to run | |
* assuming that $namespaces is the list of namespaces initially requested | |
* usually set <code>\SearchEngine::setNamespaces()</code>. | |
* | |
* @param int[]|null $namespaces initial namespaces | |
* @param int[]|null $additionalRequiredNamespaces additional namespaces required (by ContextualFilters) | |
* @return int[] the list of namespaces that have to be queried, | |
* empty array means all namespaces | |
* @see \SearchEngine::setNamespaces() | |
* @see self::getRequiredNamespaces() | |
* @see self::getNamespaceHeader() | |
* @see \CirrusSearch\Query\Builder\ContextualFilter::requiredNamespaces() | |
*/ | |
public function getActualNamespaces( array $namespaces = null, array $additionalRequiredNamespaces = null ) { | |
if ( $this->requiredNamespaces === 'all' ) { | |
// e.g. prefix:all:foo (all namespaces must be queried no matter what is requested before | |
return []; | |
} | |
if ( $additionalRequiredNamespaces === [] ) { | |
return []; | |
} | |
if ( $this->namespaceHeader !== null && $this->namespaceHeader->getNamespace() === 'all' ) { | |
// e.g. all:foo | |
return []; | |
} | |
if ( $this->namespaceHeader === null && !$namespaces ) { | |
// Everything was selected using SearchEngine::setNamespaces() but nothing more specific | |
// was requested using a prefixed ns | |
return []; | |
} | |
// now everything else will be an explicit list of namespaces | |
Assert::postcondition( $this->namespaceHeader === null || is_int( $this->namespaceHeader->getNamespace() ), | |
'$this->namespaceHeader must be null or an integer' ); | |
$ns = $this->namespaceHeader === null ? $namespaces : [ $this->namespaceHeader->getNamespace() ]; | |
Assert::postcondition( is_array( $ns ) && $ns !== [], | |
'at this point we must have a list of specific namespaces' ); | |
return array_values( array_unique( | |
array_merge( $ns, $this->requiredNamespaces, $additionalRequiredNamespaces ?? [] ), | |
SORT_REGULAR | |
) ); | |
} | |
/** | |
* Get the cross search strategy supported by this query. | |
* | |
* @return CrossSearchStrategy | |
*/ | |
public function getCrossSearchStrategy() { | |
if ( $this->crossSearchStrategy === null ) { | |
$visitor = new class() extends KeywordNodeVisitor { | |
public $strategy; | |
public function __construct( array $excludeOccurs = [], array $keywordClasses = [] ) { | |
parent::__construct( $excludeOccurs, $keywordClasses ); | |
$this->strategy = CrossSearchStrategy::allWikisStrategy(); | |
} | |
/** | |
* @param KeywordFeatureNode $node | |
*/ | |
public function doVisitKeyword( KeywordFeatureNode $node ) { | |
$this->strategy = $this->strategy | |
->intersect( $node->getKeyword()->getCrossSearchStrategy( $node ) ); | |
} | |
}; | |
$this->root->accept( $visitor ); | |
$this->crossSearchStrategy = $visitor->strategy; | |
} | |
return $this->crossSearchStrategy; | |
} | |
/** | |
* @param string $class | |
* @return bool | |
* @throws \CirrusSearch\Parser\ParsedQueryClassifierException if the class is unknown | |
*/ | |
public function isQueryOfClass( $class ) { | |
return $this->queryClassCache[$class] ?? $this->loadQueryClass( $class ); | |
} | |
/** | |
* @param string $class | |
* @return bool | |
* @throws \CirrusSearch\Parser\ParsedQueryClassifierException | |
*/ | |
private function loadQueryClass( $class ) { | |
$classifier = $this->classifierRepository->getClassifier( $class ); | |
$newClasses = $classifier->classify( $this ); | |
foreach ( $classifier->classes() as $k ) { | |
$this->queryClassCache[$k] = in_array( $k, $newClasses, true ); | |
} | |
return $this->queryClassCache[$class]; | |
} | |
/** | |
* Preload all known query classes and classify this | |
* query. | |
* @throws \CirrusSearch\Parser\ParsedQueryClassifierException | |
*/ | |
public function preloadQueryClasses() { | |
foreach ( $this->classifierRepository->getKnownClassifiers() as $class ) { | |
$this->isQueryOfClass( $class ); | |
} | |
} | |
/** | |
* Get the list of keyword features used by this query. | |
* @see \CirrusSearch\Query\KeywordFeature::getFeatureName() | |
* @return string[] | |
*/ | |
public function getFeaturesUsed() { | |
if ( $this->featuresUsed === null ) { | |
$visitor = new class() extends KeywordNodeVisitor { | |
public $features = []; | |
/** | |
* @param KeywordFeatureNode $node | |
*/ | |
public function doVisitKeyword( KeywordFeatureNode $node ) { | |
$name = $node->getKeyword() | |
->getFeatureName( $node->getKey(), $node->getDelimiter() ); | |
$this->features[$name] = true; | |
} | |
}; | |
$this->root->accept( $visitor ); | |
$this->featuresUsed = array_keys( $visitor->features ); | |
if ( $this->namespaceHeader ) { | |
$this->featuresUsed[] = 'namespace_header'; | |
} | |
} | |