Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
93 / 93 |
|
100.00% |
9 / 9 |
CRAP | |
100.00% |
1 / 1 |
HasWbStatementFeature | |
100.00% |
93 / 93 |
|
100.00% |
9 / 9 |
24 | |
100.00% |
1 / 1 |
getKeywords | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doApply | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
combineQueries | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
5 | |||
parseValue | |
100.00% |
36 / 36 |
|
100.00% |
1 / 1 |
7 | |||
isStatementStringValid | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
2 | |||
statementContainsPropertyOnly | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
statementEndsWithWildcard | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
statementContainsOnlyWildcard | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFilterQuery | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Wikibase\Search\Elastic\Query; |
4 | |
5 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
6 | use CirrusSearch\Query\Builder\QueryBuildingContext; |
7 | use CirrusSearch\Query\FilterQueryFeature; |
8 | use CirrusSearch\Query\SimpleKeywordFeature; |
9 | use CirrusSearch\Search\SearchContext; |
10 | use CirrusSearch\WarningCollector; |
11 | use Elastica\Query\AbstractQuery; |
12 | use Elastica\Query\BoolQuery; |
13 | use Elastica\Query\Exists; |
14 | use Elastica\Query\MatchQuery; |
15 | use Elastica\Query\Prefix; |
16 | use Wikibase\DataModel\Entity\NumericPropertyId; |
17 | use Wikibase\Search\Elastic\Fields\StatementsField; |
18 | |
19 | /** |
20 | * Handles the search keyword 'haswbstatement:' |
21 | * |
22 | * Allows the user to search for pages/items that have wikibase properties or statements associated |
23 | * with them. |
24 | * |
25 | * If a file page has ANY statement about property 'P180' ('depicts') then it can be found |
26 | * by including 'haswbstatement:P180' in the search query. |
27 | * |
28 | * If a file page has the statement 'P180=Q527' (meaning 'depicts sky') associated |
29 | * with it then it can be found by including 'haswbstatement:P180=Q527' in the |
30 | * search query. |
31 | * |
32 | * If a file page has the statement 'P2014=79802' (meaning 'MoMA artwork id 79802') |
33 | * associated with it then it can be found by including 'haswbstatement:P2014=79802' in the |
34 | * search query. |
35 | * |
36 | * A '*' at the end of a 'haswbstatement' string triggers a prefix search. If different file pages |
37 | * have the statements: |
38 | * - P180=Q146[P462=Q23445] ('depicts cat, color black') |
39 | * - P180=Q146[P462=Q23444] ('depicts cat, color white') |
40 | * ... then both those pages will be found if 'P180=Q146[P462=*' is |
41 | * included in the search query. |
42 | * |
43 | * A '*' as the statement triggers existence search. Any page containing any statement will |
44 | * be returned. |
45 | * |
46 | * Statements can be combined using logical OR by separating them with a | character in a single |
47 | * haswbstatement query e.g. 'haswbstatement:P999=Q888|P999=Q777' |
48 | * |
49 | * Statements can be combined using logical AND by using two separate haswbstatement queries e.g. |
50 | * 'haswbstatement:P999=Q888 haswbstatement:P999=Q777' |
51 | * |
52 | * |
53 | * Note that NOT ALL STATEMENTS ARE INDEXED. Searching for a statement about a property that has |
54 | * not been indexed will give an empty result set. |
55 | * |
56 | * @uses CirrusSearch |
57 | * @see https://phabricator.wikimedia.org/T190022 |
58 | */ |
59 | class HasWbStatementFeature extends SimpleKeywordFeature implements FilterQueryFeature { |
60 | |
61 | /** |
62 | * @return string[] |
63 | */ |
64 | protected function getKeywords() { |
65 | return [ 'haswbstatement' ]; |
66 | } |
67 | |
68 | /** |
69 | * @param SearchContext $context |
70 | * @param string $key The keyword |
71 | * @param string $value The value attached to the keyword with quotes stripped |
72 | * @param string $quotedValue The original value in the search string, including quotes if used |
73 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
74 | * that will be negated as necessary. Used for any other building/context necessary. |
75 | * @return array Two element array, first an AbstractQuery or null to apply to the |
76 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
77 | * string. |
78 | */ |
79 | protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ) { |
80 | $queries = $this->parseValue( |
81 | $key, |
82 | $value, |
83 | $quotedValue, |
84 | '', |
85 | '', |
86 | $context |
87 | ); |
88 | if ( count( $queries ) == 0 ) { |
89 | $context->setResultsPossible( false ); |
90 | return [ null, false ]; |
91 | } |
92 | |
93 | return [ $this->combineQueries( $queries ), false ]; |
94 | } |
95 | |
96 | /** |
97 | * Builds an OR between many statements about the wikibase item |
98 | * |
99 | * @param string[][] $queries queries to combine. See parseValue() for fields. |
100 | * @return \Elastica\Query\AbstractQuery |
101 | */ |
102 | private function combineQueries( array $queries ) { |
103 | $return = new BoolQuery(); |
104 | foreach ( $queries as $query ) { |
105 | if ( $query['class'] === Prefix::class ) { |
106 | $return->addShould( new Prefix( [ |
107 | $query['field'] => |
108 | [ |
109 | 'value' => $query['string'], |
110 | 'rewrite' => 'top_terms_1024' |
111 | ] |
112 | ] ) ); |
113 | } elseif ( $query['class'] === MatchQuery::class ) { |
114 | $return->addShould( new MatchQuery( |
115 | $query['field'], |
116 | [ 'query' => $query['string'] ] |
117 | ) ); |
118 | } elseif ( $query['class'] === Exists::class ) { |
119 | // In a boolean 'OR' having an existence check negates the |
120 | // need for the remaining queries. |
121 | return new Exists( $query['field'] ); |
122 | } |
123 | } |
124 | return $return; |
125 | } |
126 | |
127 | /** |
128 | * @param string $key |
129 | * @param string $value |
130 | * @param string $quotedValue |
131 | * @param string $valueDelimiter |
132 | * @param string $suffix |
133 | * @param WarningCollector $warningCollector |
134 | * @return array [ |
135 | * [ |
136 | * 'class' => \Elastica\Query class name to be used to construct the query, |
137 | * 'field' => document field to run the query against, |
138 | * 'string' => string to search for |
139 | * ], |
140 | * ... |
141 | * ] |
142 | */ |
143 | public function parseValue( |
144 | $key, |
145 | $value, |
146 | $quotedValue, |
147 | $valueDelimiter, |
148 | $suffix, |
149 | WarningCollector $warningCollector |
150 | ) { |
151 | $queries = []; |
152 | $statementStrings = explode( '|', $value ); |
153 | foreach ( $statementStrings as $statementString ) { |
154 | if ( !$this->isStatementStringValid( $statementString ) ) { |
155 | // TODO: Add warning to avoid unexpected behaviour |
156 | continue; |
157 | } |
158 | if ( $this->statementContainsOnlyWildcard( $statementString ) ) { |
159 | $queries[] = [ |
160 | 'class' => Exists::class, |
161 | 'field' => StatementsField::NAME |
162 | ]; |
163 | continue; |
164 | } |
165 | if ( $this->statementContainsPropertyOnly( $statementString ) ) { |
166 | $queries[] = [ |
167 | 'class' => MatchQuery::class, |
168 | 'field' => StatementsField::NAME . '.property', |
169 | 'string' => $statementString, |
170 | ]; |
171 | continue; |
172 | } |
173 | if ( $this->statementEndsWithWildcard( $statementString ) ) { |
174 | $queries[] = [ |
175 | 'class' => Prefix::class, |
176 | 'field' => StatementsField::NAME, |
177 | 'string' => substr( $statementString, 0, strlen( $statementString ) - 1 ), |
178 | ]; |
179 | continue; |
180 | } |
181 | $queries[] = [ |
182 | 'class' => MatchQuery::class, |
183 | 'field' => StatementsField::NAME, |
184 | 'string' => $statementString, |
185 | ]; |
186 | } |
187 | if ( count( $queries ) == 0 ) { |
188 | $warningCollector->addWarning( |
189 | 'wikibasecirrus-haswbstatement-feature-no-valid-statements', |
190 | $key |
191 | ); |
192 | } |
193 | return $queries; |
194 | } |
195 | |
196 | /** |
197 | * Check that a statement string is valid. A valid string is a P-id |
198 | * optionally suffixed with an equals sign. |
199 | * |
200 | * The following strings are valid: |
201 | * P2014=79802 |
202 | * P999 |
203 | * |
204 | * The following strings are invalid: |
205 | * PrefixedId:P123=Q537 |
206 | * PA=Q888 |
207 | * PF=1234567 |
208 | * |
209 | * @param string $statementString |
210 | * @return bool |
211 | */ |
212 | private function isStatementStringValid( $statementString ) { |
213 | if ( $this->statementContainsOnlyWildcard( $statementString ) ) { |
214 | // Simpler than integrating into basically unrelated regex |
215 | return true; |
216 | } |
217 | |
218 | //Strip delimiters, anchors and pattern modifiers from NumericPropertyId::PATTERN |
219 | $propertyIdPattern = preg_replace( |
220 | '/([^\sa-zA-Z0-9\\\])(\^|\\\A)?(.*?)(\$|\\\z|\\\Z)?\\1[a-zA-Z]*/', |
221 | '$3', |
222 | NumericPropertyId::PATTERN |
223 | ); |
224 | $validStatementStringPattern = '/^' . |
225 | $propertyIdPattern . |
226 | '(' . StatementsField::STATEMENT_SEPARATOR . '|$)' . |
227 | '/i'; |
228 | |
229 | return (bool)preg_match( |
230 | $validStatementStringPattern, |
231 | $statementString |
232 | ); |
233 | } |
234 | |
235 | private function statementContainsPropertyOnly( $statementString ) { |
236 | if ( strpos( $statementString, '=' ) === false ) { |
237 | return true; |
238 | } |
239 | return false; |
240 | } |
241 | |
242 | private function statementEndsWithWildcard( $statementString ) { |
243 | if ( substr( $statementString, -1 ) == '*' ) { |
244 | return true; |
245 | } |
246 | return false; |
247 | } |
248 | |
249 | private function statementContainsOnlyWildcard( $statementString ) { |
250 | return $statementString === '*'; |
251 | } |
252 | |
253 | /** |
254 | * @param KeywordFeatureNode $node |
255 | * @param QueryBuildingContext $context |
256 | * @return AbstractQuery|null |
257 | */ |
258 | public function getFilterQuery( KeywordFeatureNode $node, QueryBuildingContext $context ) { |
259 | $statements = $node->getParsedValue(); |
260 | if ( $statements === [] ) { |
261 | return null; |
262 | } |
263 | return $this->combineQueries( $statements ); |
264 | } |
265 | |
266 | } |