Code Coverage for /workspace/src/extensions/CirrusSearch/includes/Parser/QueryStringRegex/PhraseQueryParser.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	100.00% covered (success)	100.00%	29 / 29	100.00% covered (success)	100.00%	2 / 2	CRAP	100.00% covered (success)	100.00%	1 / 1
PhraseQueryParser	100.00% covered (success)	100.00%	29 / 29	100.00% covered (success)	100.00%	2 / 2	10	100.00% covered (success)	100.00%	1 / 1
__construct	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
parse	100.00% covered (success)	100.00%	28 / 28	100.00% covered (success)	100.00%	1 / 1	9

1	<?php
2
3	namespace CirrusSearch\Parser\QueryStringRegex;
4
5	use CirrusSearch\Parser\AST\NegatedNode;
6	use CirrusSearch\Parser\AST\PhrasePrefixNode;
7	use CirrusSearch\Parser\AST\PhraseQueryNode;
8	use CirrusSearch\Search\Escaper;
9	use Wikimedia\Assert\Assert;
10
11	/**
12	* Detects phrase queries:
13	* "simple phrase" : use the plain fields
14	* "simple phrase"~ : use the stem fields
15	* "simple phrase"~2 : force the slop to be 2
16	* "simple phrase"~2~ : force the slop to be 2 and use the stem fields
17	*
18	* The phrase can be negated using a ! or -
19	* Quotes can be escaped using \
20	*
21	* Supports phrase prefix as well:
22	* "simple phras*"
23	* iff slop and stem are not provided otherwise we send a simple phrase node
24	*/
25	class PhraseQueryParser {
26
27	/**
28	* Start of a phrase
29	*/
30	public const PHRASE_START = '/\G(?<negate>-\|!)?"/';
31
32	/**
33	* Normal phrase detection
34	*/
35	private const PHRASE_REGEX = '/\G(?<negate>-\|!)?"(?<value>(?:\\\\.\|[^"])*)"(?<slop>~(?<slopvalue>\d+))?(?<fuzzy>~)?/';
36
37	/**
38	* @var Escaper
39	*/
40	private $escaper;
41
42	public function __construct( Escaper $escaper ) {
43	$this->escaper = $escaper;
44	}
45
46	/**
47	* @param string $query
48	* @param int $start
49	* @param int $end
50	* @return PhraseQueryNode\|PhrasePrefixNode\|null
51	*/
52	public function parse( $query, $start, $end ) {
53	$match = [];
54	Assert::precondition( $start < $end, '$start < $end' );
55	Assert::precondition( $end <= strlen( $query ), '$end <= strlen( $query )' );
56	if ( preg_match( self::PHRASE_REGEX, $query, $match, 0, $start ) === 1 ) {
57	if ( strlen( $match[0] ) + $start <= $end ) {
58	$slop = -1;
59	$phrasePrefix = false;
60	$quotedvalue = $match['value'];
61	// Detects phrase prefix (still unclear why we do not allow *)
62	if ( preg_match( '/^(?:\\\\.\|[^])+[]$/', $quotedvalue ) === 1 ) {
63	$phrasePrefix = true;
64	}
65	if ( isset( $match['slopvalue'] ) && strlen( $match['slopvalue'] ) > 0 ) {
66	$slop = intval( $match['slopvalue'] );
67	$phrasePrefix = false;
68	}
69	$stem = false;
70	if ( isset( $match['fuzzy'] ) ) {
71	$stem = true;
72	$phrasePrefix = false;
73	}
74	$negated = $match['negate'];
75	$phraseStart = $start + strlen( $match['negate'] );
76	$value = $this->escaper->unescape( $quotedvalue );
77	if ( $phrasePrefix ) {
78	$node = new PhrasePrefixNode( $phraseStart, strlen( $match[0] ) + $start, rtrim( $value, '*' ) );
79	} else {
80	$node = new PhraseQueryNode( $phraseStart, strlen( $match[0] ) + $start, $value, $slop,
81	$stem );
82	}
83	if ( $negated !== '' ) {
84	$node = new NegatedNode( $start, $node->getEndOffset(), $node, $negated );
85	}
86	return $node;
87	}
88	}
89	return null;
90	}
91	}