Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.77% |
84 / 104 |
|
66.67% |
6 / 9 |
CRAP | |
0.00% |
0 / 1 |
SimpleSuggester | |
80.77% |
84 / 104 |
|
66.67% |
6 / 9 |
29.44 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setDeprecatedPropertyIds | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setClassifyingPropertyIds | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setInitialSuggestions | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
setEventLogger | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSuggestions | |
97.50% |
39 / 40 |
|
0.00% |
0 / 1 |
9 | |||
suggestByPropertyIds | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
suggestByItem | |
53.85% |
21 / 39 |
|
0.00% |
0 / 1 |
11.82 | |||
buildResult | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace PropertySuggester\Suggesters; |
4 | |
5 | use InvalidArgumentException; |
6 | use LogicException; |
7 | use PropertySuggester\EventLogger; |
8 | use Wikibase\DataModel\Entity\EntityIdValue; |
9 | use Wikibase\DataModel\Entity\Item; |
10 | use Wikibase\DataModel\Entity\ItemId; |
11 | use Wikibase\DataModel\Entity\NumericPropertyId; |
12 | use Wikibase\DataModel\Snak\PropertyValueSnak; |
13 | use Wikimedia\Rdbms\ILoadBalancer; |
14 | use Wikimedia\Rdbms\IResultWrapper; |
15 | use Wikimedia\Rdbms\SelectQueryBuilder; |
16 | |
17 | /** |
18 | * a Suggester implementation that creates suggestion via MySQL |
19 | * Needs the wbs_propertypairs table filled with pair probabilities. |
20 | * |
21 | * @author BP2013N2 |
22 | * @license GPL-2.0-or-later |
23 | */ |
24 | class SimpleSuggester implements SuggesterEngine { |
25 | |
26 | /** |
27 | * @var int[] |
28 | */ |
29 | private $deprecatedPropertyIds = []; |
30 | |
31 | /** |
32 | * @var array Numeric property ids as keys, values are meaningless. |
33 | */ |
34 | private $classifyingPropertyIds = []; |
35 | |
36 | /** |
37 | * @var Suggestion[] |
38 | */ |
39 | private $initialSuggestions = []; |
40 | |
41 | /** |
42 | * @var ILoadBalancer |
43 | */ |
44 | private $lb; |
45 | |
46 | /** |
47 | * @var EventLogger|null |
48 | */ |
49 | private $eventLogger; |
50 | |
51 | public function __construct( ILoadBalancer $lb ) { |
52 | $this->lb = $lb; |
53 | } |
54 | |
55 | /** |
56 | * @param int[] $deprecatedPropertyIds |
57 | */ |
58 | public function setDeprecatedPropertyIds( array $deprecatedPropertyIds ) { |
59 | $this->deprecatedPropertyIds = $deprecatedPropertyIds; |
60 | } |
61 | |
62 | /** |
63 | * @param int[] $classifyingPropertyIds |
64 | */ |
65 | public function setClassifyingPropertyIds( array $classifyingPropertyIds ) { |
66 | $this->classifyingPropertyIds = array_flip( $classifyingPropertyIds ); |
67 | } |
68 | |
69 | /** |
70 | * @param int[] $initialSuggestionIds |
71 | */ |
72 | public function setInitialSuggestions( array $initialSuggestionIds ) { |
73 | $suggestions = []; |
74 | foreach ( $initialSuggestionIds as $id ) { |
75 | $suggestions[] = new Suggestion( NumericPropertyId::newFromNumber( $id ), 1.0 ); |
76 | } |
77 | |
78 | $this->initialSuggestions = $suggestions; |
79 | } |
80 | |
81 | /** |
82 | * @param EventLogger $eventLogger |
83 | */ |
84 | public function setEventLogger( EventLogger $eventLogger ) { |
85 | $this->eventLogger = $eventLogger; |
86 | } |
87 | |
88 | /** |
89 | * @param int[] $propertyIds |
90 | * @param array[] $idTuples Array of ( int property ID, int item ID ) tuples |
91 | * @param int $limit |
92 | * @param float $minProbability |
93 | * @param string $context |
94 | * @param string $include |
95 | * @throws InvalidArgumentException |
96 | * @return Suggestion[] |
97 | */ |
98 | private function getSuggestions( |
99 | array $propertyIds, |
100 | array $idTuples, |
101 | $limit, |
102 | $minProbability, |
103 | $context, |
104 | $include |
105 | ) { |
106 | $this->eventLogger->setPropertySuggesterName( 'PropertySuggester' ); |
107 | $startTime = microtime( true ); |
108 | |
109 | if ( !is_int( $limit ) ) { |
110 | throw new InvalidArgumentException( '$limit must be int!' ); |
111 | } |
112 | if ( !is_float( $minProbability ) ) { |
113 | throw new InvalidArgumentException( '$minProbability must be float!' ); |
114 | } |
115 | if ( !in_array( $include, [ self::SUGGEST_ALL, self::SUGGEST_NEW ] ) ) { |
116 | throw new InvalidArgumentException( '$include must be one of the SUGGEST_* constants!' ); |
117 | } |
118 | if ( !$propertyIds ) { |
119 | $this->eventLogger->setRequestDuration( (int)( ( microtime( true ) - $startTime ) * 1000 ) ); |
120 | return $this->initialSuggestions; |
121 | } |
122 | |
123 | $excludedIds = []; |
124 | if ( $include === self::SUGGEST_NEW ) { |
125 | $excludedIds = array_merge( $propertyIds, $this->deprecatedPropertyIds ); |
126 | } |
127 | |
128 | $count = count( $propertyIds ); |
129 | |
130 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
131 | |
132 | $tupleConditions = []; |
133 | foreach ( $idTuples as [ $pid, $qid ] ) { |
134 | $tupleConditions[] = $dbr->expr( 'pid1', '=', (int)$pid )->and( 'qid1', '=', (int)$qid ); |
135 | } |
136 | |
137 | if ( !$tupleConditions ) { |
138 | $condition = $dbr->expr( 'pid1', '=', $propertyIds ); |
139 | } else { |
140 | $condition = $dbr->orExpr( $tupleConditions ); |
141 | } |
142 | $res = $dbr->newSelectQueryBuilder() |
143 | ->select( [ |
144 | 'pid' => 'pid2', |
145 | 'prob' => "sum(probability)/$count", |
146 | ] ) |
147 | ->from( 'wbs_propertypairs' ) |
148 | ->where( $condition ) |
149 | ->andWhere( [ 'context' => $context ] ) |
150 | ->andWhere( $excludedIds ? $dbr->expr( 'pid2', '!=', $excludedIds ) : [] ) |
151 | ->groupBy( 'pid2' ) |
152 | ->having( 'prob > ' . $minProbability ) |
153 | ->orderBy( 'prob', SelectQueryBuilder::SORT_DESC ) |
154 | ->limit( $limit ) |
155 | ->caller( __METHOD__ ) |
156 | ->fetchResultSet(); |
157 | |
158 | $results = $this->buildResult( $res ); |
159 | $this->eventLogger->setRequestDuration( (int)( ( microtime( true ) - $startTime ) * 1000 ) ); |
160 | return $results; |
161 | } |
162 | |
163 | /** |
164 | * @see SuggesterEngine::suggestByPropertyIds |
165 | * @param NumericPropertyId[] $propertyIds |
166 | * @param ItemId[] $typesIds |
167 | * @param int $limit |
168 | * @param float $minProbability |
169 | * @param string $context |
170 | * @param string $include One of the self::SUGGEST_* constants |
171 | * @return Suggestion[] |
172 | */ |
173 | public function suggestByPropertyIds( |
174 | array $propertyIds, |
175 | array $typesIds, |
176 | $limit, |
177 | $minProbability, |
178 | $context, |
179 | $include |
180 | ) { |
181 | $numericIds = array_map( static function ( NumericPropertyId $propertyId ) { |
182 | return $propertyId->getNumericId(); |
183 | }, $propertyIds ); |
184 | |
185 | return $this->getSuggestions( |
186 | $numericIds, |
187 | [], |
188 | $limit, |
189 | $minProbability, |
190 | $context, |
191 | $include |
192 | ); |
193 | } |
194 | |
195 | /** |
196 | * @see SuggesterEngine::suggestByEntity |
197 | * |
198 | * @param Item $item |
199 | * @param int $limit |
200 | * @param float $minProbability |
201 | * @param string $context |
202 | * @param string $include One of the self::SUGGEST_* constants |
203 | * @throws LogicException |
204 | * @return Suggestion[] |
205 | */ |
206 | public function suggestByItem( Item $item, $limit, $minProbability, $context, $include ) { |
207 | $ids = []; |
208 | $idTuples = []; |
209 | $types = []; |
210 | |
211 | foreach ( $item->getStatements()->toArray() as $statement ) { |
212 | $mainSnak = $statement->getMainSnak(); |
213 | |
214 | $id = $mainSnak->getPropertyId(); |
215 | if ( !( $id instanceof NumericPropertyId ) ) { |
216 | throw new LogicException( 'PropertySuggester is incompatible with non-numeric Property IDs' ); |
217 | } |
218 | |
219 | $numericPropertyId = $id->getNumericId(); |
220 | $ids[] = $numericPropertyId; |
221 | |
222 | if ( !isset( $this->classifyingPropertyIds[$numericPropertyId] ) ) { |
223 | $idTuples[] = [ $numericPropertyId, 0 ]; |
224 | } elseif ( $mainSnak instanceof PropertyValueSnak ) { |
225 | $dataValue = $mainSnak->getDataValue(); |
226 | |
227 | if ( !( $dataValue instanceof EntityIdValue ) ) { |
228 | throw new LogicException( |
229 | "Property $numericPropertyId in wgPropertySuggesterClassifyingPropertyIds" |
230 | . ' does not have value type wikibase-entityid' |
231 | ); |
232 | } |
233 | |
234 | $entityId = $dataValue->getEntityId(); |
235 | |
236 | if ( !( $entityId instanceof ItemId ) ) { |
237 | throw new LogicException( |
238 | "PropertyValueSnak for $numericPropertyId, configured in " . |
239 | ' wgPropertySuggesterClassifyingPropertyIds, has an unexpected value ' . |
240 | 'and data type (not wikibase-item).' |
241 | ); |
242 | } |
243 | |
244 | $numericEntityId = $entityId->getNumericId(); |
245 | $idTuples[] = [ $numericPropertyId, $numericEntityId ]; |
246 | $types[] = $numericEntityId; |
247 | } |
248 | } |
249 | |
250 | $this->eventLogger->setExistingProperties( array_map( 'strval', $ids ) ); |
251 | $this->eventLogger->setExistingTypes( array_map( 'strval', $types ) ); |
252 | |
253 | return $this->getSuggestions( |
254 | $ids, |
255 | $idTuples, |
256 | $limit, |
257 | $minProbability, |
258 | $context, |
259 | $include |
260 | ); |
261 | } |
262 | |
263 | /** |
264 | * Converts the rows of the SQL result to Suggestion objects |
265 | * |
266 | * @param IResultWrapper $res |
267 | * @return Suggestion[] |
268 | */ |
269 | private function buildResult( IResultWrapper $res ) { |
270 | $resultArray = []; |
271 | foreach ( $res as $row ) { |
272 | $pid = NumericPropertyId::newFromNumber( $row->pid ); |
273 | $suggestion = new Suggestion( $pid, $row->prob ); |
274 | $resultArray[] = $suggestion; |
275 | } |
276 | return $resultArray; |
277 | } |
278 | |
279 | } |