Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
46.99% |
39 / 83 |
|
14.29% |
1 / 7 |
CRAP | |
0.00% |
0 / 1 |
| ElasticTermResult | |
46.99% |
39 / 83 |
|
14.29% |
1 / 7 |
79.59 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| getSourceFiltering | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| getFields | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getHighlightingConfiguration | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
12 | |||
| transformElasticsearchResult | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
6 | |||
| getTermSearchResult | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| extractTermFromHighlight | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
6.01 | |||
| createEmptyResult | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Wikibase\Search\Elastic; |
| 4 | |
| 5 | use CirrusSearch\Search\BaseResultsType; |
| 6 | use Wikibase\DataModel\Term\Term; |
| 7 | use Wikibase\Lib\Interactors\TermSearchResult; |
| 8 | use Wikibase\Lib\TermLanguageFallbackChain; |
| 9 | use Wikibase\Search\Elastic\Fields\DescriptionsField; |
| 10 | use Wikibase\Search\Elastic\Fields\LabelsField; |
| 11 | |
| 12 | /** |
| 13 | * This result type implements the result for searching |
| 14 | * an entity by its {@link LabelsField label or alias} |
| 15 | * (also showing {@link DescriptionsField descriptions}). |
| 16 | * |
| 17 | * Fully implemented by {@link EntityElasticTermResult} for Wikibase entities. |
| 18 | * May also be used by other extensions, |
| 19 | * provided they use those same fields |
| 20 | * (via {@link \Wikibase\Search\Elastic\Fields\LabelsProviderFieldDefinitions LabelsProviderFieldDefinitions} |
| 21 | * and {@link \Wikibase\Search\Elastic\Fields\DescriptionsProviderFieldDefinitions DescriptionsProviderFieldDefinitions}). |
| 22 | * |
| 23 | * @license GPL-2.0-or-later |
| 24 | * @author Stas Malyshev |
| 25 | */ |
| 26 | abstract class ElasticTermResult extends BaseResultsType { |
| 27 | |
| 28 | /** |
| 29 | * List of language codes in the search fallback chain, the first |
| 30 | * is the preferred language. |
| 31 | * @var string[] |
| 32 | */ |
| 33 | private $searchLanguageCodes; |
| 34 | |
| 35 | /** |
| 36 | * Display fallback chain. |
| 37 | * @var TermLanguageFallbackChain |
| 38 | */ |
| 39 | private $termFallbackChain; |
| 40 | private string $highlightSubField; |
| 41 | |
| 42 | /** |
| 43 | * @param string[] $searchLanguageCodes Language fallback chain for search |
| 44 | * @param TermLanguageFallbackChain $displayFallbackChain Fallback chain for display |
| 45 | * @param string $highlightSubField 'prefix' or 'plain' |
| 46 | */ |
| 47 | public function __construct( |
| 48 | array $searchLanguageCodes, |
| 49 | TermLanguageFallbackChain $displayFallbackChain, |
| 50 | string $highlightSubField = 'prefix' |
| 51 | ) { |
| 52 | $this->searchLanguageCodes = $searchLanguageCodes; |
| 53 | $this->termFallbackChain = $displayFallbackChain; |
| 54 | $this->highlightSubField = $highlightSubField; |
| 55 | } |
| 56 | |
| 57 | /** |
| 58 | * Get the source filtering to be used loading the result. |
| 59 | * |
| 60 | * @return string[] |
| 61 | */ |
| 62 | public function getSourceFiltering() { |
| 63 | $fields = parent::getSourceFiltering(); |
| 64 | foreach ( $this->termFallbackChain->getFetchLanguageCodes() as $code ) { |
| 65 | $fields[] = LabelsField::NAME . '.' . $code; |
| 66 | $fields[] = DescriptionsField::NAME . '.' . $code; |
| 67 | } |
| 68 | return $fields; |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Get the fields to load. Most of the time we'll use source filtering instead but |
| 73 | * some fields aren't part of the source. |
| 74 | * |
| 75 | * @return string[] |
| 76 | */ |
| 77 | public function getFields() { |
| 78 | return []; |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * Get the highlighting configuration. |
| 83 | * |
| 84 | * @param array $highlightSource configuration for how to highlight the source. |
| 85 | * Empty if source should be ignored. |
| 86 | * @return array|null highlighting configuration for elasticsearch |
| 87 | */ |
| 88 | public function getHighlightingConfiguration( array $highlightSource ) { |
| 89 | $config = [ |
| 90 | 'pre_tags' => [ '' ], |
| 91 | 'post_tags' => [ '' ], |
| 92 | 'fields' => [], |
| 93 | ]; |
| 94 | $config['fields']['title'] = [ |
| 95 | 'type' => 'experimental', |
| 96 | 'fragmenter' => "none", |
| 97 | 'number_of_fragments' => 0, |
| 98 | 'matched_fields' => [ 'title.keyword' ] |
| 99 | ]; |
| 100 | $labelsName = LabelsField::NAME; |
| 101 | $order = $this->highlightSubField === 'plain' ? 'score' : 'none'; |
| 102 | foreach ( $this->searchLanguageCodes as $code ) { |
| 103 | $config['fields']["$labelsName.$code.{$this->highlightSubField}"] = [ |
| 104 | 'type' => 'experimental', |
| 105 | 'fragmenter' => "none", |
| 106 | 'order' => $order, |
| 107 | 'number_of_fragments' => 0, |
| 108 | 'options' => [ |
| 109 | 'skip_if_last_matched' => true, |
| 110 | 'return_snippets_and_offsets' => true |
| 111 | ], |
| 112 | ]; |
| 113 | } |
| 114 | $config['fields']["$labelsName.*.{$this->highlightSubField}"] = [ |
| 115 | 'type' => 'experimental', |
| 116 | 'fragmenter' => "none", |
| 117 | 'order' => $order, |
| 118 | 'number_of_fragments' => 0, |
| 119 | 'options' => [ |
| 120 | 'skip_if_last_matched' => true, |
| 121 | 'return_snippets_and_offsets' => true |
| 122 | ], |
| 123 | ]; |
| 124 | |
| 125 | return $config; |
| 126 | } |
| 127 | |
| 128 | /** |
| 129 | * Convert search result from ElasticSearch result set to TermSearchResult. |
| 130 | * @param \Elastica\ResultSet $result |
| 131 | * @return TermSearchResult[] Set of search results, the types of which vary by implementation. |
| 132 | */ |
| 133 | public function transformElasticsearchResult( \Elastica\ResultSet $result ) { |
| 134 | $results = []; |
| 135 | foreach ( $result->getResults() as $r ) { |
| 136 | $sourceData = $r->getSource(); |
| 137 | |
| 138 | // Highlight part contains information about what has actually been matched. |
| 139 | $highlight = $r->getHighlights(); |
| 140 | $displayLabel = EntitySearchUtils::findTermForDisplay( $sourceData, LabelsField::NAME, $this->termFallbackChain ); |
| 141 | $displayDescription = EntitySearchUtils::findTermForDisplay( $sourceData, DescriptionsField::NAME, $this->termFallbackChain ); |
| 142 | |
| 143 | if ( !empty( $highlight['title'] ) ) { |
| 144 | // If we matched title, this means it's a match by ID |
| 145 | $matchedTermType = 'entityId'; |
| 146 | $matchedTerm = new Term( 'qid', $sourceData['title'] ); |
| 147 | } elseif ( !$highlight ) { |
| 148 | // Something went wrong, we don't have any highlighting data |
| 149 | continue; |
| 150 | } else { |
| 151 | [ $matchedTermType, $langCode, $term ] = |
| 152 | $this->extractTermFromHighlight( $highlight, $sourceData ); |
| 153 | $matchedTerm = new Term( $langCode, $term ); |
| 154 | } |
| 155 | |
| 156 | if ( !$displayLabel ) { |
| 157 | // This should not happen, but just in case, it's better to return something |
| 158 | $displayLabel = $matchedTerm; |
| 159 | } |
| 160 | |
| 161 | $termSearchResult = $this->getTermSearchResult( |
| 162 | $sourceData, $matchedTerm, $matchedTermType, $displayLabel, $displayDescription |
| 163 | ); |
| 164 | if ( $termSearchResult !== null ) { |
| 165 | $results[$termSearchResult->getEntityIdSerialization()] = $termSearchResult; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | return $results; |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * Turn the given result data into a {@link TermSearchResult} |
| 174 | * (or skip this result if null is returned). |
| 175 | */ |
| 176 | abstract protected function getTermSearchResult( |
| 177 | array $sourceData, |
| 178 | Term $matchedTerm, |
| 179 | string $matchedTermType, |
| 180 | ?Term $displayLabel, |
| 181 | ?Term $displayDescription |
| 182 | ): ?TermSearchResult; |
| 183 | |
| 184 | /** |
| 185 | * New highlighter pattern. |
| 186 | * The new highlighter can return offsets as: 1:1-XX:YY|Text Snippet |
| 187 | * or even SNIPPET_START:MATCH1_START-MATCH1_END,MATCH2_START-MATCH2_END,...:SNIPPET_END|Text |
| 188 | */ |
| 189 | public const HIGHLIGHT_PATTERN = '/^\d+:\d+-\d+(?:,\d+-\d+)*:\d+\|(.+)/'; |
| 190 | |
| 191 | /** |
| 192 | * Extract term, language and type from highlighter results. |
| 193 | * @param array $highlight Data from highlighter |
| 194 | * @param array[] $sourceData Data from _source |
| 195 | * @return array Array of: [string $termType, string $languageCode, string $term] |
| 196 | */ |
| 197 | private function extractTermFromHighlight( array $highlight, array $sourceData ) { |
| 198 | /** |
| 199 | * Highlighter returns: |
| 200 | * { |
| 201 | * labels.en.prefix: [ |
| 202 | * "metre" // or "0:0-5:5|metre" |
| 203 | * ] |
| 204 | * } |
| 205 | */ |
| 206 | $matchedTermType = 'label'; |
| 207 | $term = reset( $highlight ); // Take the first one |
| 208 | $term = $term[0]; // Highlighter returns array |
| 209 | $field = key( $highlight ); |
| 210 | if ( preg_match( '/^' . preg_quote( LabelsField::NAME ) . "\.([^.]+)\.{$this->highlightSubField}$/", $field, $match ) ) { |
| 211 | $langCode = $match[1]; |
| 212 | if ( preg_match( self::HIGHLIGHT_PATTERN, $term, $termMatch ) ) { |
| 213 | $isFirst = ( $term[0] === '0' ); |
| 214 | $term = $termMatch[1]; |
| 215 | } else { |
| 216 | $isFirst = true; |
| 217 | } |
| 218 | if ( !empty( $sourceData[LabelsField::NAME][$langCode] ) ) { |
| 219 | // Here we have match in one of the languages we asked for. |
| 220 | // Primary label always comes first, so if it's not the first one, |
| 221 | // it's an alias. |
| 222 | if ( $sourceData[LabelsField::NAME][$langCode][0] !== $term ) { |
| 223 | $matchedTermType = 'alias'; |
| 224 | } |
| 225 | } else { |
| 226 | // Here we have match in one of the "other" languages. |
| 227 | // If it's the first one in the list, it's label, otherwise it is alias. |
| 228 | $matchedTermType = $isFirst ? 'label' : 'alias'; |
| 229 | } |
| 230 | } else { |
| 231 | // This is weird since we didn't ask to match anything else, |
| 232 | // but we'll return it anyway for debugging. |
| 233 | $langCode = 'unknown'; |
| 234 | } |
| 235 | return [ $matchedTermType, $langCode, $term ]; |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * @return TermSearchResult[] Empty set of search results |
| 240 | */ |
| 241 | public function createEmptyResult() { |
| 242 | return []; |
| 243 | } |
| 244 | |
| 245 | } |