Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
96.23% |
51 / 53 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
| NearMatchFieldQueryBuilder | |
96.23% |
51 / 53 |
|
80.00% |
4 / 5 |
14 | |
0.00% |
0 / 1 |
| defaultFromSearchConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| defaultFromWeight | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| buildFromParsedQuery | |
93.75% |
30 / 32 |
|
0.00% |
0 / 1 |
8.02 | |||
| buildFromQueryString | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Query\Builder; |
| 4 | |
| 5 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
| 6 | use CirrusSearch\Parser\AST\FuzzyNode; |
| 7 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
| 8 | use CirrusSearch\Parser\AST\ParsedNode; |
| 9 | use CirrusSearch\Parser\AST\ParsedQuery; |
| 10 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
| 11 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
| 12 | use CirrusSearch\Parser\AST\PrefixNode; |
| 13 | use CirrusSearch\Parser\AST\Visitor\LeafVisitor; |
| 14 | use CirrusSearch\Parser\AST\WildcardNode; |
| 15 | use CirrusSearch\Parser\AST\WordsQueryNode; |
| 16 | use CirrusSearch\Query\InTitleFeature; |
| 17 | use CirrusSearch\SearchConfig; |
| 18 | use Elastica\Query\AbstractQuery; |
| 19 | use Elastica\Query\MatchNone; |
| 20 | use Elastica\Query\MultiMatch; |
| 21 | use Wikimedia\Assert\Assert; |
| 22 | |
| 23 | /** |
| 24 | * ParseQuery visitor that attempts to extract a form that resembles to the near match query. |
| 25 | * This implementation tries to mimic the strategy of the old query parser that works by removing |
| 26 | * keywords. It might make sense in the future to reconsider this approach and see if there are |
| 27 | * better strategies to apply with the help of the ParsedQuery. |
| 28 | */ |
| 29 | class NearMatchFieldQueryBuilder { |
| 30 | public const ALL_NEAR_MATCH = "all_near_match"; |
| 31 | public const ALL_NEAR_MATCH_ACCENT_FOLDED = self::ALL_NEAR_MATCH . ".asciifolding"; |
| 32 | private array $profile; |
| 33 | |
| 34 | public static function defaultFromSearchConfig( SearchConfig $config ): self { |
| 35 | return self::defaultFromWeight( $config->get( 'CirrusSearchNearMatchWeight' ) ?: 2 ); |
| 36 | } |
| 37 | |
| 38 | public static function defaultFromWeight( float $weight ): self { |
| 39 | return new self( |
| 40 | [ "fields" => [ |
| 41 | [ "name" => self::ALL_NEAR_MATCH, "weight" => round( $weight, 3 ) ], |
| 42 | [ "name" => self::ALL_NEAR_MATCH_ACCENT_FOLDED, "weight" => round( $weight * 0.75, 3 ) ] |
| 43 | ] ] |
| 44 | ); |
| 45 | } |
| 46 | |
| 47 | public function __construct( array $profile ) { |
| 48 | $this->profile = $profile; |
| 49 | } |
| 50 | |
| 51 | public function buildFromParsedQuery( ParsedQuery $query ): AbstractQuery { |
| 52 | $visitor = new class( $query ) extends LeafVisitor { |
| 53 | public string $nearMatch; |
| 54 | |
| 55 | public function __construct( ParsedQuery $query ) { |
| 56 | parent::__construct(); |
| 57 | $this->nearMatch = $query->getQuery(); |
| 58 | $nsHeader = $query->getNamespaceHeader(); |
| 59 | if ( $nsHeader != null ) { |
| 60 | $this->blank( $nsHeader ); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | /** |
| 65 | * Blank the portion of the search query located at the same location as the $node. |
| 66 | * A custom replacement can be passed but must not have a length greater than this location. |
| 67 | * @param ParsedNode $node the node holding the location of the query string we want to blank |
| 68 | * @param string $replacement optional replacement string to use |
| 69 | */ |
| 70 | private function blank( ParsedNode $node, string $replacement = "" ): void { |
| 71 | $l = $node->getEndOffset() - $node->getStartOffset(); |
| 72 | Assert::parameter( strlen( $replacement ) < $l, '$replacement', |
| 73 | 'must be shorter than the replaced ParsedNode' ); |
| 74 | $this->nearMatch = substr_replace( |
| 75 | $this->nearMatch, |
| 76 | str_pad( $replacement, $l, " " ), |
| 77 | $node->getStartOffset(), $l |
| 78 | ); |
| 79 | } |
| 80 | |
| 81 | /** {@inheritdoc} */ |
| 82 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
| 83 | } |
| 84 | |
| 85 | /** {@inheritdoc} */ |
| 86 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
| 87 | } |
| 88 | |
| 89 | /** {@inheritdoc} */ |
| 90 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
| 91 | } |
| 92 | |
| 93 | /** {@inheritdoc} */ |
| 94 | public function visitFuzzyNode( FuzzyNode $node ) { |
| 95 | } |
| 96 | |
| 97 | /** {@inheritdoc} */ |
| 98 | public function visitPrefixNode( PrefixNode $node ) { |
| 99 | } |
| 100 | |
| 101 | /** {@inheritdoc} */ |
| 102 | public function visitWildcardNode( WildcardNode $node ) { |
| 103 | } |
| 104 | |
| 105 | /** {@inheritdoc} */ |
| 106 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
| 107 | } |
| 108 | |
| 109 | /** {@inheritdoc} */ |
| 110 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
| 111 | if ( !$this->negated() && ( $node->getKeyword() instanceof InTitleFeature ) && $node->getParsedValue() == [] ) { |
| 112 | // TODO: generalize this InTitleFeature behavior |
| 113 | // We want to keep the text of the intitle keyword on if: |
| 114 | // - it's not negated |
| 115 | // - it's not a regular expression (using $node->getParsedValue() == [] ) |
| 116 | $this->blank( $node, $node->getQuotedValue() ); |
| 117 | } else { |
| 118 | $clause = $this->getCurrentBooleanClause(); |
| 119 | // painful attempt to keep a weird edge-case of the old query parser that does not |
| 120 | // support negating keyword clause with an explicit NOT. |
| 121 | // Might be interesting to re-consider the usefulness of such edge-case |
| 122 | // "NOT keyword:value" becomes "NOT" |
| 123 | // but "-keyword:value" becomes "" |
| 124 | // we detect the use of NOT or - using BooleanClause::isExplicit |
| 125 | $negatedNode = $clause != null ? $clause->getNegatedNode() : null; |
| 126 | if ( $negatedNode !== null && !$clause->isExplicit() ) { |
| 127 | // the negated node should have the proper offsets to blank the "-" |
| 128 | $this->blank( $negatedNode ); |
| 129 | } else { |
| 130 | $this->blank( $node ); |
| 131 | } |
| 132 | } |
| 133 | } |
| 134 | }; |
| 135 | $query->getRoot()->accept( $visitor ); |
| 136 | $queryString = trim( preg_replace( '/\s{2,}/', ' ', $visitor->nearMatch ) ); |
| 137 | |
| 138 | return $this->buildFromQueryString( $queryString ); |
| 139 | } |
| 140 | |
| 141 | public function buildFromQueryString( string $query ): AbstractQuery { |
| 142 | if ( preg_match( '/^\s*$/', $query ) === 1 ) { |
| 143 | return new MatchNone(); |
| 144 | } |
| 145 | $allQuery = new MultiMatch(); |
| 146 | $allQuery->setQuery( $query ); |
| 147 | $allQuery->setFields( |
| 148 | array_map( |
| 149 | static function ( array $fieldDef ): string { |
| 150 | return $fieldDef["name"] . "^" . $fieldDef["weight"]; |
| 151 | }, |
| 152 | $this->profile["fields"] |
| 153 | ) |
| 154 | ); |
| 155 | return $allQuery; |
| 156 | } |
| 157 | |
| 158 | } |