Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
84.34% |
70 / 83 |
|
77.78% |
7 / 9 |
CRAP | |
0.00% |
0 / 1 |
HasLicenseFeature | |
84.34% |
70 / 83 |
|
77.78% |
7 / 9 |
31.01 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getKeywords | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doApply | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
3.00 | |||
combineQueries | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
4 | |||
parseValue | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
licenseStringToQueries | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
getQueriesForOther | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
4 | |||
getFilterQuery | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getConfiguredLicenseMap | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | |
3 | namespace Wikibase\Search\Elastic\Query; |
4 | |
5 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
6 | use CirrusSearch\Query\Builder\QueryBuildingContext; |
7 | use CirrusSearch\Query\FilterQueryFeature; |
8 | use CirrusSearch\Query\SimpleKeywordFeature; |
9 | use CirrusSearch\Search\SearchContext; |
10 | use CirrusSearch\Util; |
11 | use CirrusSearch\WarningCollector; |
12 | use Elastica\Query\AbstractQuery; |
13 | use Elastica\Query\BoolQuery; |
14 | use Elastica\Query\MatchQuery; |
15 | use MediaWiki\Config\Config; |
16 | use Wikibase\Search\Elastic\Fields\StatementsField; |
17 | use Wikimedia\Assert\Assert; |
18 | |
19 | /** |
20 | * Handles the search keyword 'haslicense:' |
21 | * |
22 | * Allows the user to search for sets of licence statements in statement_keywords |
23 | * |
24 | * A mapping between search strings and sets of statements is passed in the constructor in this |
25 | * format: |
26 | * [ |
27 | * 'cc-by-sa' => [ |
28 | * 'P275=Q18199165', // copyright licence = CC-BY-SA 4.0 |
29 | * ], |
30 | * 'cc-by' => [ |
31 | * 'P275=Q19125117', // copyright licence = CC-BY 2.0 |
32 | * ], |
33 | * 'unrestricted' => [ |
34 | * 'P275=Q6938433', // copyright licence = cc0 |
35 | * 'P6216=Q19652', // copyright status = public domain |
36 | * ] |
37 | * ] |
38 | * |
39 | * So searching for `haslicense:cc-by` searches for documents with P275=Q19125117 in |
40 | * statement_keywords |
41 | * |
42 | * A search for `haslicense:other` will return pages that have ANY of the *properties* |
43 | * from the licence mapping array AND NONE of the statements. |
44 | * |
45 | * A user can search for more than one type of licence by combining the search strings using the | |
46 | * character. Note that combining "other" with other licence types will result in *only* "other" |
47 | * licences being returned, because "other" specifically excludes all other licence types. |
48 | * |
49 | * So for the config above, searching for `haslicense:other` searches for documents with (P275 OR |
50 | * P6216 in statement_keywords.property) AND NOT (P275=Q18199165 OR P275=Q19125117 OR P275=Q6938433 |
51 | * OR P6216=Q19652 in statement_keywords) |
52 | * |
53 | * @uses CirrusSearch |
54 | * @see https://phabricator.wikimedia.org/T257938 |
55 | */ |
56 | class HasLicenseFeature extends SimpleKeywordFeature implements FilterQueryFeature { |
57 | |
58 | /** |
59 | * @var array |
60 | */ |
61 | private $licenseMapping; |
62 | |
63 | /** |
64 | * @param array $licenseMapping Mapping between licence search strings and wikidata ids |
65 | * e.g. [ |
66 | * 'cc-by-sa' => [ |
67 | * 'P275=Q18199165', // copyright licence = CC-BY-SA 4.0 |
68 | * ], |
69 | * 'cc-by' => [ |
70 | * 'P275=Q19125117', // copyright licence = CC-BY 2.0 |
71 | * ], |
72 | * 'unrestricted' => [ |
73 | * 'P275=Q6938433', // copyright licence = cc0 |
74 | * 'P6216=Q19652', // copyright status = public domain |
75 | * ] |
76 | * ] |
77 | */ |
78 | public function __construct( $licenseMapping ) { |
79 | Assert::parameterElementType( 'array', $licenseMapping, 'licenseMapping' ); |
80 | $this->licenseMapping = $licenseMapping; |
81 | } |
82 | |
83 | /** |
84 | * @return string[] |
85 | */ |
86 | protected function getKeywords() { |
87 | return [ 'haslicense' ]; |
88 | } |
89 | |
90 | /** |
91 | * @param SearchContext $context |
92 | * @param string $key The keyword |
93 | * @param string $value The value attached to the keyword with quotes stripped |
94 | * @param string $quotedValue The original value in the search string, including quotes if used |
95 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
96 | * that will be negated as necessary. Used for any other building/context necessary. |
97 | * @return array Two element array, first an AbstractQuery or null to apply to the |
98 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
99 | * string (always false for this class) |
100 | */ |
101 | protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ) { |
102 | if ( $value === '' ) { |
103 | return [ null, false ]; |
104 | } |
105 | $queries = $this->parseValue( |
106 | $key, |
107 | $value, |
108 | $quotedValue, |
109 | '', |
110 | '', |
111 | $context |
112 | ); |
113 | if ( count( $queries ) == 0 ) { |
114 | $context->setResultsPossible( false ); |
115 | return [ null, false ]; |
116 | } |
117 | |
118 | return [ $this->combineQueries( $queries ), false ]; |
119 | } |
120 | |
121 | /** |
122 | * @param string[][] $queries queries to combine. See parseValue() for fields. |
123 | * @return \Elastica\Query\AbstractQuery |
124 | */ |
125 | private function combineQueries( array $queries ) { |
126 | $return = new BoolQuery(); |
127 | $return->setMinimumShouldMatch( 1 ); |
128 | foreach ( $queries as $query ) { |
129 | if ( $query['occur'] === 'must_not' ) { |
130 | $return->addMustNot( new MatchQuery( |
131 | $query['field'], |
132 | [ 'query' => $query['string'] ] |
133 | ) ); |
134 | } elseif ( $query['occur'] === 'should' ) { |
135 | $return->addShould( new MatchQuery( $query['field'], [ 'query' => $query['string'] ] ) ); |
136 | } |
137 | } |
138 | return $return; |
139 | } |
140 | |
141 | /** |
142 | * @param string $key |
143 | * @param string $value |
144 | * @param string $quotedValue |
145 | * @param string $valueDelimiter |
146 | * @param string $suffix |
147 | * @param WarningCollector $warningCollector |
148 | * @return array [ |
149 | * [ |
150 | * 'class' => \Elastica\Query class name to be used to construct the query, |
151 | * 'field' => document field to run the query against, |
152 | * 'string' => string to search for |
153 | * ], |
154 | * ... |
155 | * ] |
156 | */ |
157 | public function parseValue( |
158 | $key, |
159 | $value, |
160 | $quotedValue, |
161 | $valueDelimiter, |
162 | $suffix, |
163 | WarningCollector $warningCollector |
164 | ) { |
165 | $queries = []; |
166 | $licenseStrings = explode( '|', $value ); |
167 | $licenseStrings = array_slice( $licenseStrings, 0, 20 ); |
168 | foreach ( $licenseStrings as $licenseString ) { |
169 | $queries = array_merge( $queries, $this->licenseStringToQueries( $licenseString ) ); |
170 | } |
171 | if ( count( $queries ) === 0 ) { |
172 | $warningCollector->addWarning( |
173 | 'wikibasecirrus-haslicense-feature-no-valid-arguments', |
174 | $key |
175 | ); |
176 | } |
177 | return $queries; |
178 | } |
179 | |
180 | private function licenseStringToQueries( $licenseString ) { |
181 | $queries = []; |
182 | if ( $licenseString === 'other' ) { |
183 | return $this->getQueriesForOther(); |
184 | } |
185 | if ( !isset( $this->licenseMapping[ $licenseString ] ) ) { |
186 | return $queries; |
187 | } |
188 | foreach ( $this->licenseMapping[ $licenseString ] as $statementString ) { |
189 | $queries[] = [ |
190 | 'occur' => 'should', |
191 | 'field' => StatementsField::NAME, |
192 | 'string' => $statementString, |
193 | ]; |
194 | } |
195 | return $queries; |
196 | } |
197 | |
198 | /** |
199 | * For "other" licence types, search for results that match the properties |
200 | * but not the statements |
201 | */ |
202 | private function getQueriesForOther() { |
203 | $queries = []; |
204 | foreach ( $this->licenseMapping as $mapping ) { |
205 | foreach ( $mapping as $statementString ) { |
206 | [ $propertyId, ] = explode( '=', $statementString ); |
207 | if ( !isset( $queries[$propertyId] ) ) { |
208 | $queries[$propertyId] = [ |
209 | 'occur' => 'should', |
210 | 'field' => StatementsField::NAME . '.property', |
211 | 'string' => $propertyId, |
212 | ]; |
213 | } |
214 | $queries[] = [ |
215 | 'occur' => 'must_not', |
216 | 'field' => StatementsField::NAME, |
217 | 'string' => $statementString, |
218 | ]; |
219 | } |
220 | } |
221 | return array_values( $queries ); |
222 | } |
223 | |
224 | /** |
225 | * @param KeywordFeatureNode $node |
226 | * @param QueryBuildingContext $context |
227 | * @return AbstractQuery|null |
228 | */ |
229 | public function getFilterQuery( KeywordFeatureNode $node, QueryBuildingContext $context ) { |
230 | $statements = $node->getParsedValue(); |
231 | if ( $statements === [] ) { |
232 | return null; |
233 | } |
234 | return $this->combineQueries( $statements ); |
235 | } |
236 | |
237 | /** |
238 | * License mapping can come a message, allowing wiki-specific config/overrides, |
239 | * controlled by users, or in code config (which overrides messages) |
240 | * |
241 | * @param Config $searchConfig |
242 | * @return array |
243 | */ |
244 | public static function getConfiguredLicenseMap( Config $searchConfig ) { |
245 | // license mapping can come a message, allowing wiki-specific config/overrides, |
246 | // controlled by users, or in code config (which overrides messages) |
247 | $licenseMapping = $searchConfig->get( 'LicenseMapping' ) ?: []; |
248 | $licenseMessage = wfMessage( 'wikibasecirrus-license-mapping' )->inContentLanguage(); |
249 | if ( !$licenseMapping && !$licenseMessage->isDisabled() ) { |
250 | $lines = Util::parseSettingsInMessage( $licenseMessage->plain() ); |
251 | // reformat lines to allow for whitespace in the license config |
252 | $joined = implode( "\n", $lines ); |
253 | $stripped = preg_replace( '/\n*?([|,])\n?(?![^\n]+\|)/', '$1', $joined ); |
254 | $lines = explode( "\n", $stripped ); |
255 | // parse message, add to license mapping |
256 | foreach ( $lines as $line ) { |
257 | $data = explode( '|', $line ); |
258 | if ( count( $data ) === 2 ) { |
259 | $licenseMapping[$data[0]] = array_filter( explode( ',', $data[1] ) ); |
260 | } |
261 | } |
262 | } |
263 | return $licenseMapping; |
264 | } |
265 | } |