Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.23% |
51 / 53 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
NearMatchFieldQueryBuilder | |
96.23% |
51 / 53 |
|
80.00% |
4 / 5 |
14 | |
0.00% |
0 / 1 |
defaultFromSearchConfig | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
defaultFromWeight | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
buildFromParsedQuery | |
93.75% |
30 / 32 |
|
0.00% |
0 / 1 |
8.02 | |||
buildFromQueryString | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Query\Builder; |
4 | |
5 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
6 | use CirrusSearch\Parser\AST\FuzzyNode; |
7 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
8 | use CirrusSearch\Parser\AST\ParsedNode; |
9 | use CirrusSearch\Parser\AST\ParsedQuery; |
10 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
11 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
12 | use CirrusSearch\Parser\AST\PrefixNode; |
13 | use CirrusSearch\Parser\AST\Visitor\LeafVisitor; |
14 | use CirrusSearch\Parser\AST\WildcardNode; |
15 | use CirrusSearch\Parser\AST\WordsQueryNode; |
16 | use CirrusSearch\Query\InTitleFeature; |
17 | use CirrusSearch\SearchConfig; |
18 | use Elastica\Query\AbstractQuery; |
19 | use Elastica\Query\MatchNone; |
20 | use Elastica\Query\MultiMatch; |
21 | use Wikimedia\Assert\Assert; |
22 | |
23 | /** |
24 | * ParseQuery visitor that attempts to extract a form that resembles to the near match query. |
25 | * This implementation tries to mimic the strategy of the old query parser that works by removing |
26 | * keywords. It might make sense in the future to reconsider this approach and see if there are |
27 | * better strategies to apply with the help of the ParsedQuery. |
28 | */ |
29 | class NearMatchFieldQueryBuilder { |
30 | public const ALL_NEAR_MATCH = "all_near_match"; |
31 | public const ALL_NEAR_MATCH_ACCENT_FOLDED = self::ALL_NEAR_MATCH . ".asciifolding"; |
32 | private array $profile; |
33 | |
34 | public static function defaultFromSearchConfig( SearchConfig $config ): self { |
35 | return self::defaultFromWeight( $config->get( 'CirrusSearchNearMatchWeight' ) ?: 2 ); |
36 | } |
37 | |
38 | public static function defaultFromWeight( float $weight ): self { |
39 | return new self( |
40 | [ "fields" => [ |
41 | [ "name" => self::ALL_NEAR_MATCH, "weight" => round( $weight, 3 ) ], |
42 | [ "name" => self::ALL_NEAR_MATCH_ACCENT_FOLDED, "weight" => round( $weight * 0.75, 3 ) ] |
43 | ] ] |
44 | ); |
45 | } |
46 | |
47 | public function __construct( array $profile ) { |
48 | $this->profile = $profile; |
49 | } |
50 | |
51 | public function buildFromParsedQuery( ParsedQuery $query ): AbstractQuery { |
52 | $visitor = new class( $query ) extends LeafVisitor { |
53 | public string $nearMatch; |
54 | |
55 | public function __construct( ParsedQuery $query ) { |
56 | parent::__construct(); |
57 | $this->nearMatch = $query->getQuery(); |
58 | $nsHeader = $query->getNamespaceHeader(); |
59 | if ( $nsHeader != null ) { |
60 | $this->blank( $nsHeader ); |
61 | } |
62 | } |
63 | |
64 | /** |
65 | * Blank the portion of the search query located at the same location as the $node. |
66 | * A custom replacement can be passed but must not have a length greater than this location. |
67 | * @param ParsedNode $node the node holding the location of the query string we want to blank |
68 | * @param string $replacement optional replacement string to use |
69 | */ |
70 | private function blank( ParsedNode $node, string $replacement = "" ): void { |
71 | $l = $node->getEndOffset() - $node->getStartOffset(); |
72 | Assert::parameter( strlen( $replacement ) < $l, '$replacement', |
73 | 'must be shorter than the replaced ParsedNode' ); |
74 | $this->nearMatch = substr_replace( |
75 | $this->nearMatch, |
76 | str_pad( $replacement, $l, " " ), |
77 | $node->getStartOffset(), $l |
78 | ); |
79 | } |
80 | |
81 | /** {@inheritdoc} */ |
82 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
83 | } |
84 | |
85 | /** {@inheritdoc} */ |
86 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
87 | } |
88 | |
89 | /** {@inheritdoc} */ |
90 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
91 | } |
92 | |
93 | /** {@inheritdoc} */ |
94 | public function visitFuzzyNode( FuzzyNode $node ) { |
95 | } |
96 | |
97 | /** {@inheritdoc} */ |
98 | public function visitPrefixNode( PrefixNode $node ) { |
99 | } |
100 | |
101 | /** {@inheritdoc} */ |
102 | public function visitWildcardNode( WildcardNode $node ) { |
103 | } |
104 | |
105 | /** {@inheritdoc} */ |
106 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
107 | } |
108 | |
109 | /** {@inheritdoc} */ |
110 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
111 | if ( !$this->negated() && ( $node->getKeyword() instanceof InTitleFeature ) && $node->getParsedValue() == [] ) { |
112 | // TODO: generalize this InTitleFeature behavior |
113 | // We want to keep the text of the intitle keyword on if: |
114 | // - it's not negated |
115 | // - it's not a regular expression (using $node->getParsedValue() == [] ) |
116 | $this->blank( $node, $node->getQuotedValue() ); |
117 | } else { |
118 | $clause = $this->getCurrentBooleanClause(); |
119 | // painful attempt to keep a weird edge-case of the old query parser that does not |
120 | // support negating keyword clause with an explicit NOT. |
121 | // Might be interesting to re-consider the usefulness of such edge-case |
122 | // "NOT keyword:value" becomes "NOT" |
123 | // but "-keyword:value" becomes "" |
124 | // we detect the use of NOT or - using BooleanClause::isExplicit |
125 | $negatedNode = $clause != null ? $clause->getNegatedNode() : null; |
126 | if ( $negatedNode !== null && !$clause->isExplicit() ) { |
127 | // the negated node should have the proper offsets to blank the "-" |
128 | $this->blank( $negatedNode ); |
129 | } else { |
130 | $this->blank( $node ); |
131 | } |
132 | } |
133 | } |
134 | }; |
135 | $query->getRoot()->accept( $visitor ); |
136 | $queryString = trim( preg_replace( '/\s{2,}/', ' ', $visitor->nearMatch ) ); |
137 | |
138 | return $this->buildFromQueryString( $queryString ); |
139 | } |
140 | |
141 | public function buildFromQueryString( string $query ): AbstractQuery { |
142 | if ( preg_match( '/^\s*$/', $query ) === 1 ) { |
143 | return new MatchNone(); |
144 | } |
145 | $allQuery = new MultiMatch(); |
146 | $allQuery->setQuery( $query ); |
147 | $allQuery->setFields( |
148 | array_map( |
149 | static function ( array $fieldDef ): string { |
150 | return $fieldDef["name"] . "^" . $fieldDef["weight"]; |
151 | }, |
152 | $this->profile["fields"] |
153 | ) |
154 | ); |
155 | return $allQuery; |
156 | } |
157 | |
158 | } |