Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 178 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
QueryGeoSearchElastic | |
0.00% |
0 / 178 |
|
0.00% |
0 / 5 |
2070 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 139 |
|
0.00% |
0 / 1 |
812 | |||
makeCoord | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
filterCoord | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
110 | |||
addDebugInfo | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace GeoData\Api; |
4 | |
5 | use ApiPageSet; |
6 | use ApiQuery; |
7 | use FormatJson; |
8 | use GeoData\Coord; |
9 | use GeoData\Searcher; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\Title\Title; |
12 | |
13 | class QueryGeoSearchElastic extends QueryGeoSearch { |
14 | /** @var array|null */ |
15 | private $params; |
16 | |
17 | /** |
18 | * @param ApiQuery $query |
19 | * @param string $moduleName |
20 | */ |
21 | public function __construct( ApiQuery $query, $moduleName ) { |
22 | parent::__construct( $query, $moduleName ); |
23 | } |
24 | |
25 | /** |
26 | * @param ApiPageSet|null $resultPageSet |
27 | */ |
28 | protected function run( $resultPageSet = null ): void { |
29 | global $wgDefaultGlobe; |
30 | |
31 | parent::run( $resultPageSet ); |
32 | // @fixme: refactor to make this unnecessary |
33 | $this->resetQueryParams(); |
34 | |
35 | $params = $this->params = $this->extractRequestParams(); |
36 | $namespaces = array_map( 'intval', $params['namespace'] ); |
37 | |
38 | $filter = new \Elastica\Query\BoolQuery(); |
39 | $nestedPropsFilter = new \Elastica\Query\BoolQuery(); |
40 | |
41 | if ( $this->idToExclude ) { |
42 | $filter->addMustNot( new \Elastica\Query\Term( [ '_id' => $this->idToExclude ] ) ); |
43 | } |
44 | // Only Earth is supported |
45 | $nestedPropsFilter->addFilter( new \Elastica\Query\Term( [ 'coordinates.globe' => 'earth' ] ) ); |
46 | if ( isset( $params['maxdim'] ) ) { |
47 | $nestedPropsFilter->addFilter( new \Elastica\Query\Range( 'coordinates.dim', |
48 | [ 'to' => $params['maxdim'] ] ) ); |
49 | } |
50 | |
51 | $primary = $params['primary']; |
52 | if ( $primary !== 'all' ) { |
53 | $nestedPropsFilter->addFilter( new \Elastica\Query\Term( [ |
54 | 'coordinates.primary' => $primary === 'primary' |
55 | ] ) ); |
56 | } |
57 | |
58 | if ( $this->bbox ) { |
59 | $distanceFilter = new \Elastica\Query\GeoBoundingBox( 'coordinates.coord', [ |
60 | [ 'lat' => $this->bbox->lat1, 'lon' => $this->bbox->lon1 ], |
61 | [ 'lat' => $this->bbox->lat2, 'lon' => $this->bbox->lon2 ], |
62 | ] ); |
63 | } else { |
64 | $distanceFilter = |
65 | new \Elastica\Query\GeoDistance( 'coordinates.coord', |
66 | [ 'lat' => $this->coord->lat, 'lon' => $this->coord->lon ], |
67 | $this->radius . 'm' ); |
68 | } |
69 | |
70 | $filter->addFilter( $nestedPropsFilter ); |
71 | $filter->addFilter( $distanceFilter ); |
72 | |
73 | $query = new \Elastica\Query(); |
74 | $fields = array_map( |
75 | static function ( $prop ) { |
76 | return "coordinates.$prop"; |
77 | }, |
78 | array_merge( |
79 | [ 'coord', 'primary' ], |
80 | $params['prop'] |
81 | ) |
82 | ); |
83 | $query->setParam( '_source', $fields ); |
84 | |
85 | $nested = new \Elastica\Query\Nested(); |
86 | $nested->setPath( 'coordinates' )->setQuery( $filter ); |
87 | if ( count( $namespaces ) < |
88 | count( MediaWikiServices::getInstance()->getNamespaceInfo()->getValidNamespaces() ) |
89 | ) { |
90 | $outerFilter = new \Elastica\Query\BoolQuery(); |
91 | $outerFilter->addFilter( $nested ); |
92 | $outerFilter->addFilter( new \Elastica\Query\Terms( 'namespace', $namespaces ) ); |
93 | $query->setPostFilter( $outerFilter ); |
94 | } else { |
95 | $query->setPostFilter( $nested ); |
96 | } |
97 | |
98 | $searcher = new Searcher( $this->getUser() ); |
99 | |
100 | if ( $params['sort'] === 'relevance' ) { |
101 | // Should be in sync with |
102 | // https://gerrit.wikimedia.org/g/mediawiki/extensions/CirrusSearch/+/ae9c7338/includes/Search/SearchRequestBuilder.php#97 |
103 | $rescores = $searcher->getRelevanceRescoreConfigurations( $namespaces ); |
104 | if ( $rescores !== [] ) { |
105 | $query->setParam( 'rescore', $rescores ); |
106 | } |
107 | } else { |
108 | $query->addSort( [ |
109 | '_geo_distance' => [ |
110 | 'nested' => [ |
111 | 'path' => 'coordinates', |
112 | 'filter' => $nestedPropsFilter->toArray(), |
113 | ], |
114 | 'coordinates.coord' => [ |
115 | 'lat' => $this->coord->lat, |
116 | 'lon' => $this->coord->lon |
117 | ], |
118 | 'order' => 'asc', |
119 | 'unit' => 'm' |
120 | ] |
121 | ] ); |
122 | } |
123 | |
124 | $query->setSize( $params['limit'] ); |
125 | |
126 | $status = $searcher->performSearch( $query, $namespaces, 'GeoData_spatial_search' ); |
127 | if ( !$status->isOk() ) { |
128 | $this->dieStatus( $status ); |
129 | } |
130 | |
131 | $this->addMessagesFromStatus( $status ); |
132 | $resultSet = $status->getValue(); |
133 | |
134 | if ( isset( $params['debug'] ) && $params['debug'] ) { |
135 | $this->addDebugInfo( $resultSet, $query ); |
136 | } |
137 | |
138 | $data = $resultSet->getResponse()->getData(); |
139 | |
140 | if ( !isset( $data['hits']['hits'] ) ) { |
141 | wfDebugLog( 'CirrusSearch', 'Unexpected result set returned by Elasticsearch', 'all', [ |
142 | 'elastic_query' => FormatJson::encode( $query->toArray() ), |
143 | 'content' => FormatJson::encode( $data ), |
144 | ] ); |
145 | $this->dieDebug( __METHOD__, 'Unexpected result set returned by Elasticsearch' ); |
146 | } |
147 | |
148 | $ids = []; |
149 | $coordinates = []; |
150 | foreach ( $data['hits']['hits'] as $page ) { |
151 | $id = $page['_id']; |
152 | foreach ( $page['_source']['coordinates'] as $coordArray ) { |
153 | $coord = $this->makeCoord( $coordArray ); |
154 | if ( !$this->filterCoord( $coord ) ) { |
155 | continue; |
156 | } |
157 | $coord->pageId = $id; |
158 | $coordinates[] = $coord; |
159 | $ids[$id] = true; |
160 | } |
161 | } |
162 | |
163 | if ( $coordinates === [] ) { |
164 | // No results, no point in doing anything else |
165 | return; |
166 | } |
167 | |
168 | if ( $params['sort'] === 'distance' ) { |
169 | usort( $coordinates, static function ( $coord1, $coord2 ) { |
170 | return $coord1->distance - $coord2->distance; |
171 | } ); |
172 | } |
173 | |
174 | $this->addWhere( [ 'page_id' => array_keys( $ids ) ] ); |
175 | $this->addTables( 'page' ); |
176 | if ( $resultPageSet === null ) { |
177 | $this->addFields( [ 'page_id', 'page_title', 'page_namespace' ] ); |
178 | } else { |
179 | $this->addFields( $resultPageSet->getPageTableFields() ); |
180 | } |
181 | |
182 | $res = $this->select( __METHOD__ ); |
183 | |
184 | if ( $resultPageSet === null ) { |
185 | /** @var Title[] $titles */ |
186 | $titles = []; |
187 | foreach ( $res as $row ) { |
188 | $titles[$row->page_id] = Title::newFromRow( $row ); |
189 | } |
190 | |
191 | $limit = $params['limit']; |
192 | $result = $this->getResult(); |
193 | |
194 | foreach ( $coordinates as $coord ) { |
195 | if ( !$limit-- ) { |
196 | break; |
197 | } |
198 | $id = $coord->pageId; |
199 | if ( !isset( $titles[$id] ) ) { |
200 | continue; |
201 | } |
202 | $title = $titles[$id]; |
203 | $vals = [ |
204 | 'pageid' => intval( $coord->pageId ), |
205 | 'ns' => $title->getNamespace(), |
206 | 'title' => $title->getPrefixedText(), |
207 | 'lat' => floatval( $coord->lat ), |
208 | 'lon' => floatval( $coord->lon ), |
209 | 'dist' => round( $coord->distance, 1 ), |
210 | 'primary' => boolval( $coord->primary ), |
211 | ]; |
212 | |
213 | foreach ( $params['prop'] as $prop ) { |
214 | // Don't output default globe |
215 | if ( !( $prop === 'globe' && $coord->$prop === $wgDefaultGlobe ) ) { |
216 | $vals[$prop] = $coord->$prop; |
217 | } |
218 | } |
219 | $fit = $result->addValue( [ 'query', $this->getModuleName() ], null, $vals ); |
220 | if ( !$fit ) { |
221 | break; |
222 | } |
223 | } |
224 | } else { |
225 | $db = MediaWikiServices::getInstance()->getDBLoadBalancer()->getConnection( DB_REPLICA ); |
226 | $resultPageSet->populateFromQueryResult( $db, $res ); |
227 | $res->rewind(); |
228 | foreach ( $res as $row ) { |
229 | $title = Title::newFromRow( $row ); |
230 | $resultPageSet->setGeneratorData( $title, [ 'index' => $res->key() - 1 ] ); |
231 | } |
232 | } |
233 | } |
234 | |
235 | /** |
236 | * Creates a Coord class instance from an array returned by search |
237 | * |
238 | * @param array $hit Search hit |
239 | * |
240 | * @return Coord |
241 | */ |
242 | private function makeCoord( array $hit ): Coord { |
243 | $lat = $hit['coord']['lat']; |
244 | $lon = $hit['coord']['lon']; |
245 | $coord = new Coord( $lat, $lon ); |
246 | foreach ( Coord::FIELD_MAPPING as $field => $_ ) { |
247 | if ( isset( $hit[$field] ) ) { |
248 | $coord->$field = $hit[$field]; |
249 | } |
250 | } |
251 | $coord->distance = $this->coord->distanceTo( $coord ); |
252 | return $coord; |
253 | } |
254 | |
255 | /** |
256 | * Checks whether given coordinates fall within the requested limits |
257 | * @param Coord $coord |
258 | * |
259 | * @return bool If false these coordinates should be discarded |
260 | */ |
261 | private function filterCoord( Coord $coord ): bool { |
262 | if ( !$this->bbox && $coord->distance > $this->radius ) { |
263 | return false; |
264 | } |
265 | // Only one globe is supported for search, this is future-proof |
266 | if ( $coord->globe != $this->coord->globe ) { |
267 | return false; |
268 | } |
269 | if ( isset( $this->params['maxdim'] ) && $coord->dim > $this->params['maxdim'] ) { |
270 | return false; |
271 | } |
272 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable $params always set here |
273 | $primary = $this->params['primary']; |
274 | if ( ( $primary == 'primary' && !$coord->primary ) |
275 | || ( $primary == 'secondary' && $coord->primary ) |
276 | ) { |
277 | return false; |
278 | } |
279 | return true; |
280 | } |
281 | |
282 | /** |
283 | * Adds debug information to API result |
284 | * @param \Elastica\ResultSet $resultSet |
285 | * @param \Elastica\Query $query |
286 | */ |
287 | private function addDebugInfo( \Elastica\ResultSet $resultSet, \Elastica\Query $query ): void { |
288 | $ti = $resultSet->getResponse()->getTransferInfo(); |
289 | $neededData = [ |
290 | 'url', |
291 | 'total_time', |
292 | 'namelookup_time', |
293 | 'connect_time', |
294 | 'pretransfer_time', |
295 | 'size_upload', |
296 | 'size_download', |
297 | 'starttransfer_time', |
298 | 'redirect_time', |
299 | ]; |
300 | $debug = [ |
301 | 'query' => FormatJson::encode( $query->toArray(), true, FormatJson::UTF8_OK ), |
302 | ]; |
303 | foreach ( $neededData as $name ) { |
304 | if ( isset( $ti[$name] ) ) { |
305 | $debug[$name] = $ti[$name]; |
306 | } |
307 | } |
308 | $this->getResult()->addValue( null, 'geodata-debug', $debug ); |
309 | } |
310 | } |