Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.00% |
38 / 40 |
|
86.67% |
13 / 15 |
CRAP | |
0.00% |
0 / 1 |
Token | |
95.00% |
38 / 40 |
|
86.67% |
13 / 15 |
20 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
reset | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getImage | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setType | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setOffsets | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
eof | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
node | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getStart | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getEnd | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
ignorable | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
copyTo | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getTypeLabels | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
getTypeLabel | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Parser\QueryStringRegex; |
4 | |
5 | use CirrusSearch\Parser\AST\ParsedNode; |
6 | use Wikimedia\Assert\Assert; |
7 | |
8 | /** |
9 | * A token used in parsing the cirrus fulltext syntax |
10 | */ |
11 | class Token { |
12 | |
13 | private const EOF = 0; |
14 | |
15 | /** explicit boolean AND */ |
16 | public const BOOL_AND = 1; |
17 | |
18 | /** explicit boolean OR */ |
19 | public const BOOL_OR = 2; |
20 | |
21 | /** explicit negation */ |
22 | public const NOT = 3; |
23 | |
24 | /** |
25 | * Parsed node, due to its "mixed" nature |
26 | * the parser is able is able to spawn complex |
27 | * nodes directly from the query string without using |
28 | * tokens. The PARSED_NODE token type represent |
29 | * this type of "complex" token |
30 | */ |
31 | public const PARSED_NODE = 4; |
32 | |
33 | public const WHITESPACE = 5; |
34 | |
35 | /** |
36 | * @var string[] token type labels |
37 | */ |
38 | private static $TYPE_LABEL = [ |
39 | self::EOF => 'EOF', |
40 | self::BOOL_AND => 'AND', |
41 | self::BOOL_OR => 'OR', |
42 | self::NOT => 'NOT', |
43 | self::PARSED_NODE => 'QUERY', |
44 | self::WHITESPACE => 'WHITESPACE', |
45 | ]; |
46 | |
47 | /** |
48 | * @var int start offset |
49 | */ |
50 | private $start; |
51 | |
52 | /** |
53 | * @var int end offset (excl) |
54 | */ |
55 | private $end; |
56 | |
57 | /** |
58 | * @var string |
59 | */ |
60 | private $query; |
61 | |
62 | /** |
63 | * @var int|null token type |
64 | */ |
65 | private $type; |
66 | |
67 | /** |
68 | * @var string|null token image cache |
69 | */ |
70 | private $image; |
71 | |
72 | /** |
73 | * @var ParsedNode|null |
74 | */ |
75 | private $node; |
76 | |
77 | /** |
78 | * @param string $query |
79 | */ |
80 | public function __construct( $query ) { |
81 | Assert::parameter( $query !== null, '$query', 'cannot be null' ); |
82 | $this->query = $query; |
83 | $this->reset(); |
84 | } |
85 | |
86 | /** |
87 | * Reset the token state so that it can be reused |
88 | */ |
89 | public function reset() { |
90 | $this->start = -1; |
91 | $this->end = -1; |
92 | $this->type = null; |
93 | $this->image = null; |
94 | } |
95 | |
96 | /** |
97 | * Get the image of the token in the query |
98 | * @return bool|null|string |
99 | */ |
100 | public function getImage() { |
101 | Assert::precondition( $this->start >= 0 && $this->end >= 0, 'Trying to get token image at offset -1' ); |
102 | if ( $this->image === null ) { |
103 | $this->image = substr( $this->query, $this->start, $this->end - $this->start ); |
104 | } |
105 | return $this->image; |
106 | } |
107 | |
108 | /** |
109 | * the token type |
110 | * @return int |
111 | */ |
112 | public function getType() { |
113 | return $this->type; |
114 | } |
115 | |
116 | /** |
117 | * @param int $type token type |
118 | * @param int $start offset |
119 | * @param int $end offset (exc) |
120 | */ |
121 | public function setType( $type, $start, $end ) { |
122 | $this->type = $type; |
123 | $this->setOffsets( $start, $end ); |
124 | } |
125 | |
126 | /** |
127 | * @param int $start offset |
128 | * @param int $end offset (exc) |
129 | */ |
130 | public function setOffsets( $start, $end ) { |
131 | $len = strlen( $this->query ); |
132 | Assert::precondition( $start < $end && $start < $len && $end <= $len, |
133 | 'invalid $start and $end param' ); |
134 | $this->start = $start; |
135 | $this->end = $end; |
136 | } |
137 | |
138 | public function eof() { |
139 | $this->type = self::EOF; |
140 | $this->start = -1; |
141 | $this->end = -1; |
142 | } |
143 | |
144 | /** |
145 | * Initialize the token from a parsed node |
146 | * |
147 | * @param ParsedNode $node |
148 | */ |
149 | public function node( ParsedNode $node ) { |
150 | $this->setType( self::PARSED_NODE, $node->getStartOffset(), $node->getEndOffset() ); |
151 | $this->node = $node; |
152 | } |
153 | |
154 | /** |
155 | * @return int start offset |
156 | */ |
157 | public function getStart() { |
158 | return $this->start; |
159 | } |
160 | |
161 | /** |
162 | * @return int end offset (excl) |
163 | */ |
164 | public function getEnd() { |
165 | return $this->end; |
166 | } |
167 | |
168 | /** |
169 | * @return bool true if this token can be ignored |
170 | */ |
171 | public function ignorable() { |
172 | return $this->type === self::WHITESPACE; |
173 | } |
174 | |
175 | /** |
176 | * Get the node if the token was initialized from a pre-parsed |
177 | * node. |
178 | * @return ParsedNode|null |
179 | */ |
180 | public function getNode() { |
181 | return $this->node; |
182 | } |
183 | |
184 | /** |
185 | * Copy state from this token to the token |
186 | * argument |
187 | * @param Token $lookBehind |
188 | */ |
189 | public function copyTo( Token $lookBehind ) { |
190 | $lookBehind->query = $this->query; |
191 | $lookBehind->start = $this->start; |
192 | $lookBehind->end = $this->end; |
193 | $lookBehind->image = $this->image; |
194 | $lookBehind->node = $this->node; |
195 | $lookBehind->type = $this->type; |
196 | } |
197 | |
198 | /** |
199 | * @param int[]|int $types |
200 | * @return string[] type labels |
201 | */ |
202 | public static function getTypeLabels( $types ) { |
203 | if ( is_int( $types ) ) { |
204 | return [ self::getTypeLabel( $types ) ]; |
205 | } |
206 | return array_map( static function ( $type ) { |
207 | return self::$TYPE_LABEL[$type]; |
208 | }, $types ); |
209 | } |
210 | |
211 | /** |
212 | * @param int $type |
213 | * @return string type labels |
214 | */ |
215 | public static function getTypeLabel( $type ) { |
216 | return self::$TYPE_LABEL[$type]; |
217 | } |
218 | } |