Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.23% covered (success)
96.23%
51 / 53
80.00% covered (warning)
80.00%
4 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
NearMatchFieldQueryBuilder
96.23% covered (success)
96.23%
51 / 53
80.00% covered (warning)
80.00%
4 / 5
14
0.00% covered (danger)
0.00%
0 / 1
 defaultFromSearchConfig
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
2
 defaultFromWeight
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 buildFromParsedQuery
93.75% covered (success)
93.75%
30 / 32
0.00% covered (danger)
0.00%
0 / 1
8.02
 buildFromQueryString
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace CirrusSearch\Query\Builder;
4
5use CirrusSearch\Parser\AST\EmptyQueryNode;
6use CirrusSearch\Parser\AST\FuzzyNode;
7use CirrusSearch\Parser\AST\KeywordFeatureNode;
8use CirrusSearch\Parser\AST\ParsedNode;
9use CirrusSearch\Parser\AST\ParsedQuery;
10use CirrusSearch\Parser\AST\PhrasePrefixNode;
11use CirrusSearch\Parser\AST\PhraseQueryNode;
12use CirrusSearch\Parser\AST\PrefixNode;
13use CirrusSearch\Parser\AST\Visitor\LeafVisitor;
14use CirrusSearch\Parser\AST\WildcardNode;
15use CirrusSearch\Parser\AST\WordsQueryNode;
16use CirrusSearch\Query\InTitleFeature;
17use CirrusSearch\SearchConfig;
18use Elastica\Query\AbstractQuery;
19use Elastica\Query\MatchNone;
20use Elastica\Query\MultiMatch;
21use Wikimedia\Assert\Assert;
22
23/**
24 * ParseQuery visitor that attempts to extract a form that resembles to the near match query.
25 * This implementation tries to mimic the strategy of the old query parser that works by removing
26 * keywords. It might make sense in the future to reconsider this approach and see if there are
27 * better strategies to apply with the help of the ParsedQuery.
28 */
29class NearMatchFieldQueryBuilder {
30    public const ALL_NEAR_MATCH = "all_near_match";
31    public const ALL_NEAR_MATCH_ACCENT_FOLDED = self::ALL_NEAR_MATCH . ".asciifolding";
32    private array $profile;
33
34    public static function defaultFromSearchConfig( SearchConfig $config ): self {
35        return self::defaultFromWeight( $config->get( 'CirrusSearchNearMatchWeight' ) ?: 2 );
36    }
37
38    public static function defaultFromWeight( float $weight ): self {
39        return new self(
40            [ "fields" => [
41                [ "name" => self::ALL_NEAR_MATCH, "weight" => round( $weight, 3 ) ],
42                [ "name" => self::ALL_NEAR_MATCH_ACCENT_FOLDED, "weight" => round( $weight * 0.75, 3 ) ]
43            ] ]
44        );
45    }
46
47    public function __construct( array $profile ) {
48        $this->profile = $profile;
49    }
50
51    public function buildFromParsedQuery( ParsedQuery $query ): AbstractQuery {
52        $visitor = new class( $query ) extends LeafVisitor {
53            public string $nearMatch;
54
55            public function __construct( ParsedQuery $query ) {
56                parent::__construct();
57                $this->nearMatch = $query->getQuery();
58                $nsHeader = $query->getNamespaceHeader();
59                if ( $nsHeader != null ) {
60                    $this->blank( $nsHeader );
61                }
62            }
63
64            /**
65             * Blank the portion of the search query located at the same location as the $node.
66             * A custom replacement can be passed but must not have a length greater than this location.
67             * @param ParsedNode $node the node holding the location of the query string we want to blank
68             * @param string $replacement optional replacement string to use
69             */
70            private function blank( ParsedNode $node, string $replacement = "" ): void {
71                $l = $node->getEndOffset() - $node->getStartOffset();
72                Assert::parameter( strlen( $replacement ) < $l, '$replacement',
73                    'must be shorter than the replaced ParsedNode' );
74                $this->nearMatch = substr_replace(
75                    $this->nearMatch,
76                    str_pad( $replacement, $l, " " ),
77                    $node->getStartOffset(), $l
78                );
79            }
80
81            /** {@inheritdoc} */
82            public function visitWordsQueryNode( WordsQueryNode $node ) {
83            }
84
85            /** {@inheritdoc} */
86            public function visitPhraseQueryNode( PhraseQueryNode $node ) {
87            }
88
89            /** {@inheritdoc} */
90            public function visitPhrasePrefixNode( PhrasePrefixNode $node ) {
91            }
92
93            /** {@inheritdoc} */
94            public function visitFuzzyNode( FuzzyNode $node ) {
95            }
96
97            /** {@inheritdoc} */
98            public function visitPrefixNode( PrefixNode $node ) {
99            }
100
101            /** {@inheritdoc} */
102            public function visitWildcardNode( WildcardNode $node ) {
103            }
104
105            /** {@inheritdoc} */
106            public function visitEmptyQueryNode( EmptyQueryNode $node ) {
107            }
108
109            /** {@inheritdoc} */
110            public function visitKeywordFeatureNode( KeywordFeatureNode $node ) {
111                if ( !$this->negated() && ( $node->getKeyword() instanceof InTitleFeature ) && $node->getParsedValue() == [] ) {
112                    // TODO: generalize this InTitleFeature behavior
113                    // We want to keep the text of the intitle keyword on if:
114                    // - it's not negated
115                    // - it's not a regular expression (using $node->getParsedValue() == [] )
116                    $this->blank( $node, $node->getQuotedValue() );
117                } else {
118                    $clause = $this->getCurrentBooleanClause();
119                    // painful attempt to keep a weird edge-case of the old query parser that does not
120                    // support negating keyword clause with an explicit NOT.
121                    // Might be interesting to re-consider the usefulness of such edge-case
122                    // "NOT keyword:value" becomes "NOT"
123                    // but "-keyword:value" becomes ""
124                    // we detect the use of NOT or - using BooleanClause::isExplicit
125                    $negatedNode = $clause != null ? $clause->getNegatedNode() : null;
126                    if ( $negatedNode !== null && !$clause->isExplicit() ) {
127                        // the negated node should have the proper offsets to blank the "-"
128                        $this->blank( $negatedNode );
129                    } else {
130                        $this->blank( $node );
131                    }
132                }
133            }
134        };
135        $query->getRoot()->accept( $visitor );
136        $queryString = trim( preg_replace( '/\s{2,}/', ' ', $visitor->nearMatch ) );
137
138        return $this->buildFromQueryString( $queryString );
139    }
140
141    public function buildFromQueryString( string $query ): AbstractQuery {
142        if ( preg_match( '/^\s*$/', $query ) === 1 ) {
143            return new MatchNone();
144        }
145        $allQuery = new MultiMatch();
146        $allQuery->setQuery( $query );
147        $allQuery->setFields(
148            array_map(
149                static function ( array $fieldDef ): string {
150                    return $fieldDef["name"] . "^" . $fieldDef["weight"];
151                },
152                $this->profile["fields"]
153            )
154        );
155        return $allQuery;
156    }
157
158}