Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 176 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
| QueryGeoSearchElastic | |
0.00% |
0 / 176 |
|
0.00% |
0 / 5 |
2070 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| run | |
0.00% |
0 / 136 |
|
0.00% |
0 / 1 |
812 | |||
| makeCoord | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| filterCoord | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
110 | |||
| addDebugInfo | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace GeoData\Api; |
| 4 | |
| 5 | use Elastica\Query; |
| 6 | use Elastica\Query\BoolQuery; |
| 7 | use Elastica\Query\GeoBoundingBox; |
| 8 | use Elastica\Query\GeoDistance; |
| 9 | use Elastica\Query\Nested; |
| 10 | use Elastica\Query\Range; |
| 11 | use Elastica\Query\Term; |
| 12 | use Elastica\Query\Terms; |
| 13 | use Elastica\ResultSet; |
| 14 | use GeoData\Coord; |
| 15 | use GeoData\Globe; |
| 16 | use GeoData\Searcher; |
| 17 | use MediaWiki\Api\ApiPageSet; |
| 18 | use MediaWiki\Api\ApiQuery; |
| 19 | use MediaWiki\Json\FormatJson; |
| 20 | use MediaWiki\Title\NamespaceInfo; |
| 21 | use MediaWiki\Title\Title; |
| 22 | |
| 23 | class QueryGeoSearchElastic extends QueryGeoSearch { |
| 24 | /** @var array|null */ |
| 25 | private $params; |
| 26 | |
| 27 | public function __construct( |
| 28 | ApiQuery $query, |
| 29 | string $moduleName, |
| 30 | private readonly NamespaceInfo $namespaceInfo, |
| 31 | ) { |
| 32 | parent::__construct( $query, $moduleName ); |
| 33 | } |
| 34 | |
| 35 | /** |
| 36 | * @param ApiPageSet|null $resultPageSet |
| 37 | */ |
| 38 | protected function run( $resultPageSet = null ): void { |
| 39 | parent::run( $resultPageSet ); |
| 40 | // @fixme: refactor to make this unnecessary |
| 41 | $this->resetQueryParams(); |
| 42 | |
| 43 | $params = $this->params = $this->extractRequestParams(); |
| 44 | $namespaces = array_map( 'intval', $params['namespace'] ); |
| 45 | |
| 46 | $filter = new BoolQuery(); |
| 47 | $nestedPropsFilter = new BoolQuery(); |
| 48 | |
| 49 | if ( $this->idToExclude ) { |
| 50 | $filter->addMustNot( new Term( [ '_id' => $this->idToExclude ] ) ); |
| 51 | } |
| 52 | $nestedPropsFilter->addFilter( new Term( [ 'coordinates.globe' => $params['globe'] ] ) ); |
| 53 | if ( isset( $params['maxdim'] ) ) { |
| 54 | $nestedPropsFilter->addFilter( new Range( 'coordinates.dim', |
| 55 | [ 'to' => $params['maxdim'] ] ) ); |
| 56 | } |
| 57 | |
| 58 | $primary = $params['primary']; |
| 59 | if ( $primary !== 'all' ) { |
| 60 | $nestedPropsFilter->addFilter( new Term( [ |
| 61 | 'coordinates.primary' => $primary === 'primary' |
| 62 | ] ) ); |
| 63 | } |
| 64 | |
| 65 | if ( $this->bbox ) { |
| 66 | $coord1 = $this->bbox->topLeft(); |
| 67 | $coord2 = $this->bbox->bottomRight(); |
| 68 | $distanceFilter = new GeoBoundingBox( 'coordinates.coord', [ |
| 69 | [ 'lat' => $coord1->lat, 'lon' => $coord1->lon ], |
| 70 | [ 'lat' => $coord2->lat, 'lon' => $coord2->lon ], |
| 71 | ] ); |
| 72 | } else { |
| 73 | $distanceFilter = |
| 74 | new GeoDistance( 'coordinates.coord', |
| 75 | [ 'lat' => $this->coord->lat, 'lon' => $this->coord->lon ], |
| 76 | $this->radius . 'm' ); |
| 77 | } |
| 78 | |
| 79 | $filter->addFilter( $nestedPropsFilter ); |
| 80 | $filter->addFilter( $distanceFilter ); |
| 81 | |
| 82 | $query = new Query(); |
| 83 | $fields = array_map( |
| 84 | static fn ( $prop ) => "coordinates.$prop", |
| 85 | [ 'coord', 'primary', ...$params['prop'] ] |
| 86 | ); |
| 87 | $query->setParam( '_source', $fields ); |
| 88 | |
| 89 | $nested = new Nested(); |
| 90 | $nested->setPath( 'coordinates' )->setQuery( $filter ); |
| 91 | if ( count( $namespaces ) < |
| 92 | count( $this->namespaceInfo->getValidNamespaces() ) |
| 93 | ) { |
| 94 | $outerFilter = new BoolQuery(); |
| 95 | $outerFilter->addFilter( $nested ); |
| 96 | $outerFilter->addFilter( new Terms( 'namespace', $namespaces ) ); |
| 97 | $query->setPostFilter( $outerFilter ); |
| 98 | } else { |
| 99 | $query->setPostFilter( $nested ); |
| 100 | } |
| 101 | |
| 102 | $searcher = new Searcher( $this->getUser() ); |
| 103 | |
| 104 | if ( $params['sort'] === 'relevance' ) { |
| 105 | // Should be in sync with |
| 106 | // https://gerrit.wikimedia.org/g/mediawiki/extensions/CirrusSearch/+/ae9c7338/includes/Search/SearchRequestBuilder.php#97 |
| 107 | $rescores = $searcher->getRelevanceRescoreConfigurations( $namespaces ); |
| 108 | if ( $rescores ) { |
| 109 | $query->setParam( 'rescore', $rescores ); |
| 110 | } |
| 111 | } else { |
| 112 | $query->addSort( [ |
| 113 | '_geo_distance' => [ |
| 114 | 'nested' => [ |
| 115 | 'path' => 'coordinates', |
| 116 | 'filter' => $nestedPropsFilter->toArray(), |
| 117 | ], |
| 118 | 'coordinates.coord' => [ |
| 119 | 'lat' => $this->coord->lat, |
| 120 | 'lon' => $this->coord->lon |
| 121 | ], |
| 122 | 'order' => 'asc', |
| 123 | 'unit' => 'm' |
| 124 | ] |
| 125 | ] ); |
| 126 | } |
| 127 | |
| 128 | $query->setSize( $params['limit'] ); |
| 129 | |
| 130 | $status = $searcher->performSearch( $query, $namespaces, 'GeoData_spatial_search' ); |
| 131 | if ( !$status->isOk() ) { |
| 132 | $this->dieStatus( $status ); |
| 133 | } |
| 134 | |
| 135 | $this->addMessagesFromStatus( $status ); |
| 136 | /** @var ResultSet $resultSet */ |
| 137 | $resultSet = $status->getValue(); |
| 138 | |
| 139 | if ( isset( $params['debug'] ) && $params['debug'] ) { |
| 140 | $this->addDebugInfo( $resultSet, $query ); |
| 141 | } |
| 142 | |
| 143 | $data = $resultSet->getResponse()->getData(); |
| 144 | |
| 145 | if ( !isset( $data['hits']['hits'] ) ) { |
| 146 | wfDebugLog( 'CirrusSearch', 'Unexpected result set returned by Elasticsearch', 'all', [ |
| 147 | 'elastic_query' => FormatJson::encode( $query->toArray() ), |
| 148 | 'content' => FormatJson::encode( $data ), |
| 149 | ] ); |
| 150 | $this->dieDebug( __METHOD__, 'Unexpected result set returned by Elasticsearch' ); |
| 151 | } |
| 152 | |
| 153 | /** @var array<int,true> $ids */ |
| 154 | $ids = []; |
| 155 | $coordinates = []; |
| 156 | foreach ( $data['hits']['hits'] as $page ) { |
| 157 | $id = (int)$page['_id']; |
| 158 | foreach ( $page['_source']['coordinates'] as $coordArray ) { |
| 159 | $coord = $this->makeCoord( $coordArray ); |
| 160 | if ( !$this->filterCoord( $coord ) ) { |
| 161 | continue; |
| 162 | } |
| 163 | $coord->pageId = $id; |
| 164 | $coordinates[] = $coord; |
| 165 | $ids[$id] = true; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | if ( !$coordinates ) { |
| 170 | // No results, no point in doing anything else |
| 171 | return; |
| 172 | } |
| 173 | |
| 174 | if ( $params['sort'] === 'distance' ) { |
| 175 | usort( $coordinates, static function ( $coord1, $coord2 ) { |
| 176 | return $coord1->distance - $coord2->distance; |
| 177 | } ); |
| 178 | } |
| 179 | |
| 180 | $this->addWhere( [ 'page_id' => array_keys( $ids ) ] ); |
| 181 | $this->addTables( 'page' ); |
| 182 | if ( $resultPageSet === null ) { |
| 183 | $this->addFields( [ 'page_id', 'page_title', 'page_namespace' ] ); |
| 184 | } else { |
| 185 | $this->addFields( $resultPageSet->getPageTableFields() ); |
| 186 | } |
| 187 | |
| 188 | $res = $this->select( __METHOD__ ); |
| 189 | |
| 190 | if ( $resultPageSet === null ) { |
| 191 | /** @var array<int,Title> $titles */ |
| 192 | $titles = []; |
| 193 | foreach ( $res as $row ) { |
| 194 | $titles[$row->page_id] = Title::newFromRow( $row ); |
| 195 | } |
| 196 | |
| 197 | $limit = $params['limit']; |
| 198 | $result = $this->getResult(); |
| 199 | |
| 200 | foreach ( $coordinates as $coord ) { |
| 201 | if ( !$limit-- ) { |
| 202 | break; |
| 203 | } |
| 204 | $id = $coord->pageId; |
| 205 | if ( !isset( $titles[$id] ) ) { |
| 206 | continue; |
| 207 | } |
| 208 | $title = $titles[$id]; |
| 209 | $vals = [ |
| 210 | 'pageid' => $id, |
| 211 | 'ns' => $title->getNamespace(), |
| 212 | 'title' => $title->getPrefixedText(), |
| 213 | 'lat' => floatval( $coord->lat ), |
| 214 | 'lon' => floatval( $coord->lon ), |
| 215 | 'dist' => round( $coord->distance, 1 ), |
| 216 | 'primary' => boolval( $coord->primary ), |
| 217 | ]; |
| 218 | |
| 219 | foreach ( $params['prop'] as $prop ) { |
| 220 | // Don't output default globe |
| 221 | if ( !( $prop === 'globe' && $coord->$prop === Globe::EARTH ) ) { |
| 222 | $vals[$prop] = $coord->$prop; |
| 223 | } |
| 224 | } |
| 225 | $fit = $result->addValue( [ 'query', $this->getModuleName() ], null, $vals ); |
| 226 | if ( !$fit ) { |
| 227 | break; |
| 228 | } |
| 229 | } |
| 230 | } else { |
| 231 | $db = $this->getDB(); |
| 232 | $resultPageSet->populateFromQueryResult( $db, $res ); |
| 233 | $res->rewind(); |
| 234 | foreach ( $res as $row ) { |
| 235 | $title = Title::newFromRow( $row ); |
| 236 | $resultPageSet->setGeneratorData( $title, [ 'index' => $res->key() - 1 ] ); |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * Creates a Coord class instance from an array returned by search |
| 243 | * |
| 244 | * @param array $hit Search hit |
| 245 | * |
| 246 | * @return Coord |
| 247 | */ |
| 248 | private function makeCoord( array $hit ): Coord { |
| 249 | $lat = $hit['coord']['lat']; |
| 250 | $lon = $hit['coord']['lon']; |
| 251 | $globe = $hit['coord']['globe'] ?? Globe::EARTH; |
| 252 | $coord = new Coord( $lat, $lon, $globe ); |
| 253 | foreach ( Coord::FIELD_MAPPING as $field => $_ ) { |
| 254 | if ( isset( $hit[$field] ) ) { |
| 255 | $coord->$field = $hit[$field]; |
| 256 | } |
| 257 | } |
| 258 | $coord->distance = $this->coord->distanceTo( $coord ); |
| 259 | return $coord; |
| 260 | } |
| 261 | |
| 262 | /** |
| 263 | * Checks whether given coordinates fall within the requested limits |
| 264 | * @param Coord $coord |
| 265 | * |
| 266 | * @return bool If false these coordinates should be discarded |
| 267 | */ |
| 268 | private function filterCoord( Coord $coord ): bool { |
| 269 | if ( !$this->bbox && $coord->distance > $this->radius ) { |
| 270 | return false; |
| 271 | } |
| 272 | // Only one globe is supported for search, this is future-proof |
| 273 | if ( !$this->coord->sameGlobe( $coord ) ) { |
| 274 | return false; |
| 275 | } |
| 276 | if ( isset( $this->params['maxdim'] ) && $coord->dim > $this->params['maxdim'] ) { |
| 277 | return false; |
| 278 | } |
| 279 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable $params always set here |
| 280 | $primary = $this->params['primary']; |
| 281 | if ( ( $primary == 'primary' && !$coord->primary ) |
| 282 | || ( $primary == 'secondary' && $coord->primary ) |
| 283 | ) { |
| 284 | return false; |
| 285 | } |
| 286 | return true; |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Adds debug information to API result |
| 291 | */ |
| 292 | private function addDebugInfo( ResultSet $resultSet, Query $query ): void { |
| 293 | $ti = $resultSet->getResponse()->getTransferInfo(); |
| 294 | $neededData = [ |
| 295 | 'url', |
| 296 | 'total_time', |
| 297 | 'namelookup_time', |
| 298 | 'connect_time', |
| 299 | 'pretransfer_time', |
| 300 | 'size_upload', |
| 301 | 'size_download', |
| 302 | 'starttransfer_time', |
| 303 | 'redirect_time', |
| 304 | ]; |
| 305 | $debug = [ |
| 306 | 'query' => FormatJson::encode( $query->toArray(), true, FormatJson::UTF8_OK ), |
| 307 | ]; |
| 308 | foreach ( $neededData as $name ) { |
| 309 | if ( isset( $ti[$name] ) ) { |
| 310 | $debug[$name] = $ti[$name]; |
| 311 | } |
| 312 | } |
| 313 | $this->getResult()->addValue( null, 'geodata-debug', $debug ); |
| 314 | } |
| 315 | } |