Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 103
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiFeatureUsageQueryEngineElastica
0.00% covered (danger)
0.00%
0 / 103
0.00% covered (danger)
0.00%
0 / 5
380
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
2
 getClient
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 getIndexNames
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
12
 execute
0.00% covered (danger)
0.00%
0 / 66
0.00% covered (danger)
0.00%
0 / 1
110
 suggestDateRange
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3namespace MediaWiki\Extension\ApiFeatureUsage;
4
5use DateInterval;
6use Elastica\Aggregation\DateHistogram;
7use Elastica\Aggregation\Terms as AggregationTerms;
8use Elastica\Client;
9use Elastica\Query;
10use Elastica\Query\BoolQuery;
11use Elastica\Query\Prefix;
12use Elastica\Query\Range;
13use Elastica\Query\Terms as QueryTerms;
14use Elastica\Search;
15use MediaWiki\Status\Status;
16use MediaWiki\Utils\MWTimestamp;
17use RuntimeException;
18
19/**
20 * Query feature usage data from Elasticsearch.
21 *
22 * Config fields are:
23 *  serverList: Array of servers to connect to
24 *  maxConnectionAttempts: Maximum connection attempts
25 *  indexPrefix: Index prefix
26 *  indexFormat: Date format string for index
27 *  featureField: Name of the field holding $feature
28 *  timestampField: Name of the field holding the timestamp
29 *  agentField: Name of the field holding the user agent
30 */
31class ApiFeatureUsageQueryEngineElastica extends ApiFeatureUsageQueryEngine {
32    /** @var Client|null */
33    private $client = null;
34    /** @var string[]|null */
35    private $indexNames = null;
36
37    /**
38     * @param array $options
39     */
40    public function __construct( array $options ) {
41        $options += [
42            'indexPrefix' => 'apifeatureusage-',
43            'indexFormat' => 'Y.m.d',
44            'featureField' => 'feature',
45            'featureFieldAggSize' => 10000,
46            'timestampField' => '@timestamp',
47            'agentField' => 'agent',
48        ];
49
50        parent::__construct( $options );
51    }
52
53    /**
54     * @return Client
55     */
56    protected function getClient() {
57        if ( !$this->client ) {
58            $connection = new ApiFeatureUsageQueryEngineElasticaConnection( $this->options );
59            $this->client = $connection->getClient();
60        }
61        return $this->client;
62    }
63
64    /**
65     * @return string[]
66     */
67    protected function getIndexNames() {
68        if ( !$this->indexNames ) {
69            $response = $this->getClient()->request(
70                urlencode( $this->options['indexPrefix'] ) . '*/_alias'
71            );
72            if ( $response->isOK() ) {
73                $this->indexNames = array_keys( $response->getData() );
74            } else {
75                throw new RuntimeException( __METHOD__ .
76                    ': Cannot fetch index names from elasticsearch: ' .
77                    $response->getError()
78                );
79            }
80        }
81        return $this->indexNames;
82    }
83
84    /** @inheritDoc */
85    public function execute( $agent, MWTimestamp $start, MWTimestamp $end, array $features = null ) {
86        $status = Status::newGood( [] );
87
88        # Force $start and $end to day boundaries
89        $oneDay = new DateInterval( 'P1D' );
90        $start = clone $start;
91        $start->timestamp = clone $start->timestamp;
92        $start->timestamp->setTime( 0, 0, 0 );
93        $end = clone $end;
94        $end->timestamp = clone $end->timestamp;
95        $end->timestamp->setTime( 0, 0, 0 );
96        $end->timestamp->add( $oneDay )->sub( new DateInterval( 'PT1S' ) );
97
98        $query = new Query();
99
100        $bools = new BoolQuery();
101
102        $prefix = new Prefix();
103        $prefix->setPrefix( $this->options['agentField'], $agent );
104        $bools->addMust( $prefix );
105
106        $bools->addMust( new Range( $this->options['timestampField'], [
107            'gte' => $start->getTimestamp( TS_ISO_8601 ),
108            'lte' => $end->getTimestamp( TS_ISO_8601 ),
109        ] ) );
110
111        if ( $features !== null ) {
112            $bools->addMust( new QueryTerms( $this->options['featureField'], $features ) );
113        }
114
115        $query->setQuery( $bools );
116
117        $termsAgg = new AggregationTerms( 'feature' );
118        $termsAgg->setField( $this->options['featureField'] );
119        $termsAgg->setSize( $this->options['featureFieldAggSize'] );
120
121        $datesAgg = new DateHistogram(
122            'date', $this->options['timestampField'], 'day'
123        );
124        $datesAgg->setFormat( '8uuuu-MM-dd' );
125
126        $termsAgg->addAggregation( $datesAgg );
127        $query->addAggregation( $termsAgg );
128
129        $search = new Search( $this->getClient() );
130        $search->setOption( Search::OPTION_SIZE, 0 );
131
132        $allIndexes = $this->getIndexNames();
133        $indexAvailable = false;
134        $skippedAny = false;
135        $s = clone $start->timestamp;
136        while ( $s <= $end->timestamp ) {
137            $index = $this->options['indexPrefix'] . $s->format( $this->options['indexFormat'] );
138            if ( in_array( $index, $allIndexes ) ) {
139                $indexAvailable = true;
140            } else {
141                $skippedAny = true;
142            }
143            $s->add( $oneDay );
144        }
145        if ( !$indexAvailable ) {
146            // No dates in range
147            $status->warning( 'apifeatureusage-no-indexes' );
148            return $status;
149        }
150        if ( $skippedAny ) {
151            $status->warning( 'apifeatureusage-missing-indexes' );
152        }
153
154        $search->setQuery( $query );
155        // Prefer the wildcard approach over using an explicit list of indices to avoid building a
156        // list that might be too long to encode in the search URL.
157        // This feature is rarely used so that it's probably fine to hit all these indices and let
158        // the date filtering quickly skip unrelated ones.
159        $search->addIndexByName( $this->options['indexPrefix'] . '*' );
160
161        $res = $search->search();
162
163        if ( $res->getResponse()->hasError() ) {
164            return Status::newFatal(
165                'apifeatureusage-elasticsearch-error', $res->getResponse()->getError()
166            );
167        }
168
169        $ret = [];
170        $aggs = $res->getAggregations();
171        if ( isset( $aggs['feature'] ) ) {
172            foreach ( $aggs['feature']['buckets'] as $feature ) {
173                foreach ( $feature['date']['buckets'] as $date ) {
174                    $ret[] = [
175                        'feature' => $feature['key'],
176                        'date' => $date['key_as_string'],
177                        'count' => $date['doc_count'],
178                    ];
179                }
180            }
181        }
182        $status->value = $ret;
183
184        return $status;
185    }
186
187    public function suggestDateRange() {
188        $start = new MWTimestamp();
189        $start->setTimezone( 'UTC' );
190        $start->timestamp->setTime( 0, 0, 0 );
191        $end = new MWTimestamp();
192        $end->setTimezone( 'UTC' );
193
194        $oneDay = new DateInterval( 'P1D' );
195        $allIndexes = $this->getIndexNames();
196        while ( true ) {
197            $start->timestamp->sub( $oneDay );
198            $index = $this->options['indexPrefix'] . $start->format( $this->options['indexFormat'] );
199            if ( !in_array( $index, $allIndexes ) ) {
200                $start->timestamp->add( $oneDay );
201                return [ $start, $end ];
202            }
203        }
204    }
205}