Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
2 / 2
CRAP
100.00% covered (success)
100.00%
1 / 1
PhraseQueryParser
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
2 / 2
10
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 parse
100.00% covered (success)
100.00%
28 / 28
100.00% covered (success)
100.00%
1 / 1
9
1<?php
2
3namespace CirrusSearch\Parser\QueryStringRegex;
4
5use CirrusSearch\Parser\AST\NegatedNode;
6use CirrusSearch\Parser\AST\PhrasePrefixNode;
7use CirrusSearch\Parser\AST\PhraseQueryNode;
8use CirrusSearch\Search\Escaper;
9use Wikimedia\Assert\Assert;
10
11/**
12 * Detects phrase queries:
13 * "simple phrase" : use the plain fields
14 * "simple phrase"~ : use the stem fields
15 * "simple phrase"~2 : force the slop to be 2
16 * "simple phrase"~2~ : force the slop to be 2 and use the stem fields
17 *
18 * The phrase can be negated using a ! or -
19 * Quotes can be escaped using \
20 *
21 * Supports phrase prefix as well:
22 * "simple phras*"
23 * iff slop and stem are not provided otherwise we send a simple phrase node
24 */
25class PhraseQueryParser {
26
27    /**
28     * Start of a phrase
29     */
30    public const PHRASE_START = '/\G(?<negate>-|!)?"/';
31
32    /**
33     * Normal phrase detection
34     */
35    private const PHRASE_REGEX = '/\G(?<negate>-|!)?"(?<value>(?:\\\\.|[^"])*)"(?<slop>~(?<slopvalue>\d+))?(?<fuzzy>~)?/';
36
37    /**
38     * @var Escaper
39     */
40    private $escaper;
41
42    public function __construct( Escaper $escaper ) {
43        $this->escaper = $escaper;
44    }
45
46    /**
47     * @param string $query
48     * @param int $start
49     * @param int $end
50     * @return PhraseQueryNode|PhrasePrefixNode|null
51     */
52    public function parse( $query, $start, $end ) {
53        $match = [];
54        Assert::precondition( $start < $end, '$start < $end' );
55        Assert::precondition( $end <= strlen( $query ), '$end <= strlen( $query )' );
56        if ( preg_match( self::PHRASE_REGEX, $query, $match, 0, $start ) === 1 ) {
57            if ( strlen( $match[0] ) + $start <= $end ) {
58                $slop = -1;
59                $phrasePrefix = false;
60                $quotedvalue = $match['value'];
61                // Detects phrase prefix (still unclear why we do not allow *)
62                if ( preg_match( '/^(?:\\\\.|[^*])+[*]$/', $quotedvalue ) === 1 ) {
63                    $phrasePrefix = true;
64                }
65                if ( isset( $match['slopvalue'] ) && strlen( $match['slopvalue'] ) > 0 ) {
66                    $slop = intval( $match['slopvalue'] );
67                    $phrasePrefix = false;
68                }
69                $stem = false;
70                if ( isset( $match['fuzzy'] ) ) {
71                    $stem = true;
72                    $phrasePrefix = false;
73                }
74                $negated = $match['negate'];
75                $phraseStart = $start + strlen( $match['negate'] );
76                $value = $this->escaper->unescape( $quotedvalue );
77                if ( $phrasePrefix ) {
78                    $node = new PhrasePrefixNode( $phraseStart, strlen( $match[0] ) + $start, rtrim( $value, '*' ) );
79                } else {
80                    $node = new PhraseQueryNode( $phraseStart, strlen( $match[0] ) + $start, $value, $slop,
81                        $stem );
82                }
83                if ( $negated !== '' ) {
84                    $node = new NegatedNode( $start, $node->getEndOffset(), $node, $negated );
85                }
86                return $node;
87            }
88        }
89        return null;
90    }
91}