Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
84.15% |
69 / 82 |
|
77.78% |
7 / 9 |
CRAP | |
0.00% |
0 / 1 |
| HasLicenseFeature | |
84.15% |
69 / 82 |
|
77.78% |
7 / 9 |
31.12 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getKeywords | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| doApply | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
3.00 | |||
| combineQueries | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
4 | |||
| parseValue | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
| licenseStringToQueries | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| getQueriesForOther | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
4 | |||
| getFilterQuery | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getConfiguredLicenseMap | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Wikibase\Search\Elastic\Query; |
| 4 | |
| 5 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
| 6 | use CirrusSearch\Query\Builder\QueryBuildingContext; |
| 7 | use CirrusSearch\Query\FilterQueryFeature; |
| 8 | use CirrusSearch\Query\SimpleKeywordFeature; |
| 9 | use CirrusSearch\Search\SearchContext; |
| 10 | use CirrusSearch\Util; |
| 11 | use CirrusSearch\WarningCollector; |
| 12 | use Elastica\Query\AbstractQuery; |
| 13 | use Elastica\Query\BoolQuery; |
| 14 | use Elastica\Query\MatchQuery; |
| 15 | use MediaWiki\Config\Config; |
| 16 | use Wikibase\Search\Elastic\Fields\StatementsField; |
| 17 | |
| 18 | /** |
| 19 | * Handles the search keyword 'haslicense:' |
| 20 | * |
| 21 | * Allows the user to search for sets of licence statements in statement_keywords |
| 22 | * |
| 23 | * A mapping between search strings and sets of statements is passed in the constructor in this |
| 24 | * format: |
| 25 | * [ |
| 26 | * 'cc-by-sa' => [ |
| 27 | * 'P275=Q18199165', // copyright licence = CC-BY-SA 4.0 |
| 28 | * ], |
| 29 | * 'cc-by' => [ |
| 30 | * 'P275=Q19125117', // copyright licence = CC-BY 2.0 |
| 31 | * ], |
| 32 | * 'unrestricted' => [ |
| 33 | * 'P275=Q6938433', // copyright licence = cc0 |
| 34 | * 'P6216=Q19652', // copyright status = public domain |
| 35 | * ] |
| 36 | * ] |
| 37 | * |
| 38 | * So searching for `haslicense:cc-by` searches for documents with P275=Q19125117 in |
| 39 | * statement_keywords |
| 40 | * |
| 41 | * A search for `haslicense:other` will return pages that have ANY of the *properties* |
| 42 | * from the licence mapping array AND NONE of the statements. |
| 43 | * |
| 44 | * A user can search for more than one type of licence by combining the search strings using the | |
| 45 | * character. Note that combining "other" with other licence types will result in *only* "other" |
| 46 | * licences being returned, because "other" specifically excludes all other licence types. |
| 47 | * |
| 48 | * So for the config above, searching for `haslicense:other` searches for documents with (P275 OR |
| 49 | * P6216 in statement_keywords.property) AND NOT (P275=Q18199165 OR P275=Q19125117 OR P275=Q6938433 |
| 50 | * OR P6216=Q19652 in statement_keywords) |
| 51 | * |
| 52 | * @uses CirrusSearch |
| 53 | * @see https://phabricator.wikimedia.org/T257938 |
| 54 | */ |
| 55 | class HasLicenseFeature extends SimpleKeywordFeature implements FilterQueryFeature { |
| 56 | |
| 57 | /** |
| 58 | * @param array $licenseMapping Mapping between licence search strings and wikidata ids |
| 59 | * e.g. [ |
| 60 | * 'cc-by-sa' => [ |
| 61 | * 'P275=Q18199165', // copyright licence = CC-BY-SA 4.0 |
| 62 | * ], |
| 63 | * 'cc-by' => [ |
| 64 | * 'P275=Q19125117', // copyright licence = CC-BY 2.0 |
| 65 | * ], |
| 66 | * 'unrestricted' => [ |
| 67 | * 'P275=Q6938433', // copyright licence = cc0 |
| 68 | * 'P6216=Q19652', // copyright status = public domain |
| 69 | * ] |
| 70 | * ] |
| 71 | */ |
| 72 | public function __construct( |
| 73 | private readonly array $licenseMapping, |
| 74 | ) { |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * @return string[] |
| 79 | */ |
| 80 | protected function getKeywords() { |
| 81 | return [ 'haslicense' ]; |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * @param SearchContext $context |
| 86 | * @param string $key The keyword |
| 87 | * @param string $value The value attached to the keyword with quotes stripped |
| 88 | * @param string $quotedValue The original value in the search string, including quotes if used |
| 89 | * @param bool $negated Is the search negated? Not used to generate the returned AbstractQuery, |
| 90 | * that will be negated as necessary. Used for any other building/context necessary. |
| 91 | * @return array Two element array, first an AbstractQuery or null to apply to the |
| 92 | * query. Second a boolean indicating if the quotedValue should be kept in the search |
| 93 | * string (always false for this class) |
| 94 | */ |
| 95 | protected function doApply( SearchContext $context, $key, $value, $quotedValue, $negated ) { |
| 96 | if ( $value === '' ) { |
| 97 | return [ null, false ]; |
| 98 | } |
| 99 | $queries = $this->parseValue( |
| 100 | $key, |
| 101 | $value, |
| 102 | $quotedValue, |
| 103 | '', |
| 104 | '', |
| 105 | $context |
| 106 | ); |
| 107 | if ( count( $queries ) == 0 ) { |
| 108 | $context->setResultsPossible( false ); |
| 109 | return [ null, false ]; |
| 110 | } |
| 111 | |
| 112 | return [ $this->combineQueries( $queries ), false ]; |
| 113 | } |
| 114 | |
| 115 | /** |
| 116 | * @param string[][] $queries queries to combine. See parseValue() for fields. |
| 117 | * @return \Elastica\Query\AbstractQuery |
| 118 | */ |
| 119 | private function combineQueries( array $queries ) { |
| 120 | $return = new BoolQuery(); |
| 121 | $return->setMinimumShouldMatch( 1 ); |
| 122 | foreach ( $queries as $query ) { |
| 123 | if ( $query['occur'] === 'must_not' ) { |
| 124 | $return->addMustNot( new MatchQuery( |
| 125 | $query['field'], |
| 126 | [ 'query' => $query['string'] ] |
| 127 | ) ); |
| 128 | } elseif ( $query['occur'] === 'should' ) { |
| 129 | $return->addShould( new MatchQuery( $query['field'], [ 'query' => $query['string'] ] ) ); |
| 130 | } |
| 131 | } |
| 132 | return $return; |
| 133 | } |
| 134 | |
| 135 | /** |
| 136 | * @param string $key |
| 137 | * @param string $value |
| 138 | * @param string $quotedValue |
| 139 | * @param string $valueDelimiter |
| 140 | * @param string $suffix |
| 141 | * @param WarningCollector $warningCollector |
| 142 | * @return array [ |
| 143 | * [ |
| 144 | * 'class' => \Elastica\Query class name to be used to construct the query, |
| 145 | * 'field' => document field to run the query against, |
| 146 | * 'string' => string to search for |
| 147 | * ], |
| 148 | * ... |
| 149 | * ] |
| 150 | */ |
| 151 | public function parseValue( |
| 152 | $key, |
| 153 | $value, |
| 154 | $quotedValue, |
| 155 | $valueDelimiter, |
| 156 | $suffix, |
| 157 | WarningCollector $warningCollector |
| 158 | ) { |
| 159 | $queries = []; |
| 160 | $licenseStrings = explode( '|', $value ); |
| 161 | $licenseStrings = array_slice( $licenseStrings, 0, 20 ); |
| 162 | foreach ( $licenseStrings as $licenseString ) { |
| 163 | $queries = array_merge( $queries, $this->licenseStringToQueries( $licenseString ) ); |
| 164 | } |
| 165 | if ( count( $queries ) === 0 ) { |
| 166 | $warningCollector->addWarning( |
| 167 | 'wikibasecirrus-haslicense-feature-no-valid-arguments', |
| 168 | $key |
| 169 | ); |
| 170 | } |
| 171 | return $queries; |
| 172 | } |
| 173 | |
| 174 | private function licenseStringToQueries( string $licenseString ): array { |
| 175 | $queries = []; |
| 176 | if ( $licenseString === 'other' ) { |
| 177 | return $this->getQueriesForOther(); |
| 178 | } |
| 179 | if ( !isset( $this->licenseMapping[ $licenseString ] ) ) { |
| 180 | return $queries; |
| 181 | } |
| 182 | foreach ( $this->licenseMapping[ $licenseString ] as $statementString ) { |
| 183 | $queries[] = [ |
| 184 | 'occur' => 'should', |
| 185 | 'field' => StatementsField::NAME, |
| 186 | 'string' => $statementString, |
| 187 | ]; |
| 188 | } |
| 189 | return $queries; |
| 190 | } |
| 191 | |
| 192 | /** |
| 193 | * For "other" licence types, search for results that match the properties |
| 194 | * but not the statements |
| 195 | */ |
| 196 | private function getQueriesForOther(): array { |
| 197 | $queries = []; |
| 198 | foreach ( $this->licenseMapping as $mapping ) { |
| 199 | foreach ( $mapping as $statementString ) { |
| 200 | [ $propertyId, ] = explode( '=', $statementString ); |
| 201 | if ( !isset( $queries[$propertyId] ) ) { |
| 202 | $queries[$propertyId] = [ |
| 203 | 'occur' => 'should', |
| 204 | 'field' => StatementsField::NAME . '.property', |
| 205 | 'string' => $propertyId, |
| 206 | ]; |
| 207 | } |
| 208 | $queries[] = [ |
| 209 | 'occur' => 'must_not', |
| 210 | 'field' => StatementsField::NAME, |
| 211 | 'string' => $statementString, |
| 212 | ]; |
| 213 | } |
| 214 | } |
| 215 | return array_values( $queries ); |
| 216 | } |
| 217 | |
| 218 | /** |
| 219 | * @param KeywordFeatureNode $node |
| 220 | * @param QueryBuildingContext $context |
| 221 | * @return AbstractQuery|null |
| 222 | */ |
| 223 | public function getFilterQuery( KeywordFeatureNode $node, QueryBuildingContext $context ) { |
| 224 | $statements = $node->getParsedValue(); |
| 225 | if ( $statements === [] ) { |
| 226 | return null; |
| 227 | } |
| 228 | return $this->combineQueries( $statements ); |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * License mapping can come a message, allowing wiki-specific config/overrides, |
| 233 | * controlled by users, or in code config (which overrides messages) |
| 234 | * |
| 235 | * @param Config $searchConfig |
| 236 | * @return array |
| 237 | */ |
| 238 | public static function getConfiguredLicenseMap( Config $searchConfig ) { |
| 239 | // license mapping can come a message, allowing wiki-specific config/overrides, |
| 240 | // controlled by users, or in code config (which overrides messages) |
| 241 | $licenseMapping = $searchConfig->get( 'LicenseMapping' ) ?: []; |
| 242 | $licenseMessage = wfMessage( 'wikibasecirrus-license-mapping' )->inContentLanguage(); |
| 243 | if ( !$licenseMapping && !$licenseMessage->isDisabled() ) { |
| 244 | $lines = Util::parseSettingsInMessage( $licenseMessage->plain() ); |
| 245 | // reformat lines to allow for whitespace in the license config |
| 246 | $joined = implode( "\n", $lines ); |
| 247 | $stripped = preg_replace( '/\n*?([|,])\n?(?![^\n]+\|)/', '$1', $joined ); |
| 248 | $lines = explode( "\n", $stripped ); |
| 249 | // parse message, add to license mapping |
| 250 | foreach ( $lines as $line ) { |
| 251 | $data = explode( '|', $line ); |
| 252 | if ( count( $data ) === 2 ) { |
| 253 | $licenseMapping[$data[0]] = array_filter( explode( ',', $data[1] ) ); |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | return $licenseMapping; |
| 258 | } |
| 259 | } |