Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 107
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiFeatureUsageQueryEngineElastica
0.00% covered (danger)
0.00%
0 / 107
0.00% covered (danger)
0.00%
0 / 7
506
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
6
 getClient
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 getIndexNames
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
12
 enumerate
0.00% covered (danger)
0.00%
0 / 66
0.00% covered (danger)
0.00%
0 / 1
110
 suggestDateRange
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
12
 record
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 prune
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace MediaWiki\Extension\ApiFeatureUsage;
4
5use DateInterval;
6use Elastica\Aggregation\DateHistogram;
7use Elastica\Aggregation\Terms as AggregationTerms;
8use Elastica\Client;
9use Elastica\Query;
10use Elastica\Query\BoolQuery;
11use Elastica\Query\Prefix;
12use Elastica\Query\Range;
13use Elastica\Query\Terms as QueryTerms;
14use Elastica\Search;
15use MediaWiki\Registration\ExtensionRegistry;
16use MediaWiki\Status\Status;
17use MediaWiki\Utils\MWTimestamp;
18use RuntimeException;
19
20/**
21 * Query feature usage data from Elasticsearch.
22 *
23 * Config fields are:
24 *  serverList: Array of servers to connect to
25 *  maxConnectionAttempts: Maximum connection attempts
26 *  indexPrefix: Index prefix
27 *  indexFormat: Date format string for index
28 *  featureField: Name of the field holding $feature
29 *  timestampField: Name of the field holding the timestamp
30 *  agentField: Name of the field holding the user agent
31 */
32class ApiFeatureUsageQueryEngineElastica extends ApiFeatureUsageQueryEngine {
33    /** @var Client|null */
34    private $client = null;
35    /** @var string[]|null */
36    private $indexNames = null;
37
38    /**
39     * @param array $options
40     */
41    public function __construct( array $options ) {
42        if ( !ExtensionRegistry::getInstance()->isLoaded( 'Elastica' ) ) {
43            throw new RuntimeException( "Use of ElasticSearch requires the Elastica library" );
44        }
45        $options += [
46            'indexPrefix' => 'apifeatureusage-',
47            'indexFormat' => 'Y.m.d',
48            'featureField' => 'feature',
49            'featureFieldAggSize' => 10000,
50            'timestampField' => '@timestamp',
51            'agentField' => 'agent',
52        ];
53
54        parent::__construct( $options );
55    }
56
57    /**
58     * @return Client
59     */
60    protected function getClient() {
61        if ( !$this->client ) {
62            $connection = new ApiFeatureUsageQueryEngineElasticaConnection( $this->options );
63            $this->client = $connection->getClient();
64        }
65        return $this->client;
66    }
67
68    /**
69     * @return string[]
70     */
71    protected function getIndexNames() {
72        if ( !$this->indexNames ) {
73            $response = $this->getClient()->request(
74                urlencode( $this->options['indexPrefix'] ) . '*/_alias'
75            );
76            if ( $response->isOK() ) {
77                $this->indexNames = array_keys( $response->getData() );
78            } else {
79                throw new RuntimeException( __METHOD__ .
80                    ': Cannot fetch index names from elasticsearch: ' .
81                    $response->getError()
82                );
83            }
84        }
85        return $this->indexNames;
86    }
87
88    /** @inheritDoc */
89    public function enumerate(
90        string $agent,
91        MWTimestamp $start,
92        MWTimestamp $end,
93        ?array $features = null
94    ) {
95        $status = Status::newGood( [] );
96
97        # Force $start and $end to day boundaries
98        $oneDay = new DateInterval( 'P1D' );
99        $start = clone $start;
100        $start->timestamp = clone $start->timestamp;
101        $start->timestamp->setTime( 0, 0, 0 );
102        $end = clone $end;
103        $end->timestamp = clone $end->timestamp;
104        $end->timestamp->setTime( 0, 0, 0 );
105        $end->timestamp->add( $oneDay )->sub( new DateInterval( 'PT1S' ) );
106
107        $query = new Query();
108
109        $bools = new BoolQuery();
110
111        $prefix = new Prefix();
112        $prefix->setPrefix( $this->options['agentField'], $agent );
113        $bools->addMust( $prefix );
114
115        $bools->addMust( new Range( $this->options['timestampField'], [
116            'gte' => $start->getTimestamp( TS_ISO_8601 ),
117            'lte' => $end->getTimestamp( TS_ISO_8601 ),
118        ] ) );
119
120        if ( $features !== null ) {
121            $bools->addMust( new QueryTerms( $this->options['featureField'], $features ) );
122        }
123
124        $query->setQuery( $bools );
125
126        $termsAgg = new AggregationTerms( 'feature' );
127        $termsAgg->setField( $this->options['featureField'] );
128        $termsAgg->setSize( $this->options['featureFieldAggSize'] );
129
130        $datesAgg = new DateHistogram(
131            'date', $this->options['timestampField'], 'day'
132        );
133        $datesAgg->setFormat( '8uuuu-MM-dd' );
134
135        $termsAgg->addAggregation( $datesAgg );
136        $query->addAggregation( $termsAgg );
137
138        $search = new Search( $this->getClient() );
139        $search->setOption( Search::OPTION_SIZE, 0 );
140
141        $allIndexes = $this->getIndexNames();
142        $indexAvailable = false;
143        $skippedAny = false;
144        $s = clone $start->timestamp;
145        while ( $s <= $end->timestamp ) {
146            $index = $this->options['indexPrefix'] . $s->format( $this->options['indexFormat'] );
147            if ( in_array( $index, $allIndexes ) ) {
148                $indexAvailable = true;
149            } else {
150                $skippedAny = true;
151            }
152            $s->add( $oneDay );
153        }
154        if ( !$indexAvailable ) {
155            // No dates in range
156            $status->warning( 'apifeatureusage-no-indexes' );
157            return $status;
158        }
159        if ( $skippedAny ) {
160            $status->warning( 'apifeatureusage-missing-indexes' );
161        }
162
163        $search->setQuery( $query );
164        // Prefer the wildcard approach over using an explicit list of indices to avoid building a
165        // list that might be too long to encode in the search URL.
166        // This feature is rarely used so that it's probably fine to hit all these indices and let
167        // the date filtering quickly skip unrelated ones.
168        $search->addIndexByName( $this->options['indexPrefix'] . '*' );
169
170        $res = $search->search();
171
172        if ( $res->getResponse()->hasError() ) {
173            return Status::newFatal(
174                'apifeatureusage-elasticsearch-error', $res->getResponse()->getError()
175            );
176        }
177
178        $ret = [];
179        $aggs = $res->getAggregations();
180        if ( isset( $aggs['feature'] ) ) {
181            foreach ( $aggs['feature']['buckets'] as $feature ) {
182                foreach ( $feature['date']['buckets'] as $date ) {
183                    $ret[] = [
184                        'feature' => $feature['key'],
185                        'date' => $date['key_as_string'],
186                        'count' => $date['doc_count'],
187                    ];
188                }
189            }
190        }
191        $status->value = $ret;
192
193        return $status;
194    }
195
196    public function suggestDateRange() {
197        $start = new MWTimestamp();
198        $start->setTimezone( 'UTC' );
199        $start->timestamp->setTime( 0, 0, 0 );
200        $end = new MWTimestamp();
201        $end->setTimezone( 'UTC' );
202
203        $oneDay = new DateInterval( 'P1D' );
204        $allIndexes = $this->getIndexNames();
205        while ( true ) {
206            $start->timestamp->sub( $oneDay );
207            $index = $this->options['indexPrefix'] . $start->format( $this->options['indexFormat'] );
208            if ( !in_array( $index, $allIndexes ) ) {
209                $start->timestamp->add( $oneDay );
210                return [ $start, $end ];
211            }
212        }
213    }
214
215    /** @inheritDoc */
216    public function record(
217        string $feature,
218        string $agent,
219        string $ipAddress
220    ) {
221        // no-op; rely on debug log entries being routed to elastic search
222    }
223
224    /** @inheritDoc */
225    public function prune( $progressFn = null, $limit = INF ) {
226        // no-op; rely on manual index deletion
227        return false;
228    }
229}