Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 34 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
EntitySearchUtils | |
0.00% |
0 / 34 |
|
0.00% |
0 / 4 |
156 | |
0.00% |
0 / 1 |
makeConstScoreQuery | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
normalizeId | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
parseOrNull | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
findTermForDisplay | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | namespace Wikibase\Search\Elastic; |
3 | |
4 | use Elastica\Query\ConstantScore; |
5 | use Elastica\Query\MatchQuery; |
6 | use Wikibase\DataModel\Entity\EntityIdParser; |
7 | use Wikibase\DataModel\Entity\EntityIdParsingException; |
8 | use Wikibase\DataModel\Term\Term; |
9 | use Wikibase\Lib\TermLanguageFallbackChain; |
10 | |
11 | /** |
12 | * Utilities useful for entity searches. |
13 | */ |
14 | final class EntitySearchUtils { |
15 | |
16 | /** |
17 | * Create constant score query for a field. |
18 | * @param string $field |
19 | * @param string|double $boost |
20 | * @param string $text |
21 | * @return ConstantScore |
22 | */ |
23 | public static function makeConstScoreQuery( $field, $boost, $text ) { |
24 | $csquery = new ConstantScore(); |
25 | $csquery->setFilter( new MatchQuery( $field, $text ) ); |
26 | $csquery->setBoost( $boost ); |
27 | return $csquery; |
28 | } |
29 | |
30 | /** |
31 | * If the text looks like ID, normalize it to ID title |
32 | * Cases handled: |
33 | * - q42 |
34 | * - (q42) |
35 | * - leading/trailing spaces |
36 | * - http://www.wikidata.org/entity/Q42 |
37 | * @param string $text |
38 | * @param EntityIdParser $idParser |
39 | * @return string Normalized ID or original string |
40 | */ |
41 | public static function normalizeId( $text, EntityIdParser $idParser ) { |
42 | // TODO: this is a bit hacky, better way would be to make the field case-insensitive |
43 | // or add new subfiled which is case-insensitive |
44 | $text = strtoupper( str_replace( [ '(', ')' ], '', trim( $text ) ) ); |
45 | $id = self::parseOrNull( $text, $idParser ); |
46 | if ( $id ) { |
47 | return $id->getSerialization(); |
48 | } |
49 | if ( preg_match( '/\b(\w+)$/', $text, $matches ) && $matches[1] ) { |
50 | $id = self::parseOrNull( $matches[1], $idParser ); |
51 | if ( $id ) { |
52 | return $id->getSerialization(); |
53 | } |
54 | } |
55 | return $text; |
56 | } |
57 | |
58 | /** |
59 | * Parse entity ID or return null |
60 | * @param string $text |
61 | * @param EntityIdParser $idParser |
62 | * @return null|\Wikibase\DataModel\Entity\EntityId |
63 | */ |
64 | public static function parseOrNull( $text, EntityIdParser $idParser ) { |
65 | try { |
66 | $id = $idParser->parse( $text ); |
67 | } catch ( EntityIdParsingException $ex ) { |
68 | return null; |
69 | } |
70 | return $id; |
71 | } |
72 | |
73 | /** |
74 | * Locate label for display among the source data, basing on fallback chain. |
75 | * @param array $sourceData |
76 | * @param string $field |
77 | * @param TermLanguageFallbackChain $termFallbackChain |
78 | * @return null|Term |
79 | */ |
80 | public static function findTermForDisplay( $sourceData, $field, TermLanguageFallbackChain $termFallbackChain ) { |
81 | if ( empty( $sourceData[$field] ) ) { |
82 | return null; |
83 | } |
84 | |
85 | $data = $sourceData[$field]; |
86 | $first = reset( $data ); |
87 | if ( is_array( $first ) ) { |
88 | // If we have multiple, like for labels, extract the first one |
89 | $labels_data = array_map( |
90 | static function ( $data ) { |
91 | return $data[0] ?? null; |
92 | }, |
93 | $data |
94 | ); |
95 | } else { |
96 | $labels_data = $data; |
97 | } |
98 | // Drop empty ones |
99 | $labels_data = array_filter( $labels_data ); |
100 | |
101 | $preferredValue = $termFallbackChain->extractPreferredValueOrAny( $labels_data ); |
102 | if ( $preferredValue ) { |
103 | return new Term( $preferredValue['language'], $preferredValue['value'] ); |
104 | } |
105 | |
106 | return null; |
107 | } |
108 | |
109 | } |