Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 103 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
ApiFeatureUsageQueryEngineElastica | |
0.00% |
0 / 103 |
|
0.00% |
0 / 5 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
getClient | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getIndexNames | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
execute | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
110 | |||
suggestDateRange | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\ApiFeatureUsage; |
4 | |
5 | use DateInterval; |
6 | use Elastica\Aggregation\DateHistogram; |
7 | use Elastica\Aggregation\Terms as AggregationTerms; |
8 | use Elastica\Client; |
9 | use Elastica\Query; |
10 | use Elastica\Query\BoolQuery; |
11 | use Elastica\Query\Prefix; |
12 | use Elastica\Query\Range; |
13 | use Elastica\Query\Terms as QueryTerms; |
14 | use Elastica\Search; |
15 | use MediaWiki\Status\Status; |
16 | use MediaWiki\Utils\MWTimestamp; |
17 | use RuntimeException; |
18 | |
19 | /** |
20 | * Query feature usage data from Elasticsearch. |
21 | * |
22 | * Config fields are: |
23 | * serverList: Array of servers to connect to |
24 | * maxConnectionAttempts: Maximum connection attempts |
25 | * indexPrefix: Index prefix |
26 | * indexFormat: Date format string for index |
27 | * featureField: Name of the field holding $feature |
28 | * timestampField: Name of the field holding the timestamp |
29 | * agentField: Name of the field holding the user agent |
30 | */ |
31 | class ApiFeatureUsageQueryEngineElastica extends ApiFeatureUsageQueryEngine { |
32 | /** @var Client|null */ |
33 | private $client = null; |
34 | /** @var string[]|null */ |
35 | private $indexNames = null; |
36 | |
37 | /** |
38 | * @param array $options |
39 | */ |
40 | public function __construct( array $options ) { |
41 | $options += [ |
42 | 'indexPrefix' => 'apifeatureusage-', |
43 | 'indexFormat' => 'Y.m.d', |
44 | 'featureField' => 'feature', |
45 | 'featureFieldAggSize' => 10000, |
46 | 'timestampField' => '@timestamp', |
47 | 'agentField' => 'agent', |
48 | ]; |
49 | |
50 | parent::__construct( $options ); |
51 | } |
52 | |
53 | /** |
54 | * @return Client |
55 | */ |
56 | protected function getClient() { |
57 | if ( !$this->client ) { |
58 | $connection = new ApiFeatureUsageQueryEngineElasticaConnection( $this->options ); |
59 | $this->client = $connection->getClient(); |
60 | } |
61 | return $this->client; |
62 | } |
63 | |
64 | /** |
65 | * @return string[] |
66 | */ |
67 | protected function getIndexNames() { |
68 | if ( !$this->indexNames ) { |
69 | $response = $this->getClient()->request( |
70 | urlencode( $this->options['indexPrefix'] ) . '*/_alias' |
71 | ); |
72 | if ( $response->isOK() ) { |
73 | $this->indexNames = array_keys( $response->getData() ); |
74 | } else { |
75 | throw new RuntimeException( __METHOD__ . |
76 | ': Cannot fetch index names from elasticsearch: ' . |
77 | $response->getError() |
78 | ); |
79 | } |
80 | } |
81 | return $this->indexNames; |
82 | } |
83 | |
84 | /** @inheritDoc */ |
85 | public function execute( $agent, MWTimestamp $start, MWTimestamp $end, array $features = null ) { |
86 | $status = Status::newGood( [] ); |
87 | |
88 | # Force $start and $end to day boundaries |
89 | $oneDay = new DateInterval( 'P1D' ); |
90 | $start = clone $start; |
91 | $start->timestamp = clone $start->timestamp; |
92 | $start->timestamp->setTime( 0, 0, 0 ); |
93 | $end = clone $end; |
94 | $end->timestamp = clone $end->timestamp; |
95 | $end->timestamp->setTime( 0, 0, 0 ); |
96 | $end->timestamp->add( $oneDay )->sub( new DateInterval( 'PT1S' ) ); |
97 | |
98 | $query = new Query(); |
99 | |
100 | $bools = new BoolQuery(); |
101 | |
102 | $prefix = new Prefix(); |
103 | $prefix->setPrefix( $this->options['agentField'], $agent ); |
104 | $bools->addMust( $prefix ); |
105 | |
106 | $bools->addMust( new Range( $this->options['timestampField'], [ |
107 | 'gte' => $start->getTimestamp( TS_ISO_8601 ), |
108 | 'lte' => $end->getTimestamp( TS_ISO_8601 ), |
109 | ] ) ); |
110 | |
111 | if ( $features !== null ) { |
112 | $bools->addMust( new QueryTerms( $this->options['featureField'], $features ) ); |
113 | } |
114 | |
115 | $query->setQuery( $bools ); |
116 | |
117 | $termsAgg = new AggregationTerms( 'feature' ); |
118 | $termsAgg->setField( $this->options['featureField'] ); |
119 | $termsAgg->setSize( $this->options['featureFieldAggSize'] ); |
120 | |
121 | $datesAgg = new DateHistogram( |
122 | 'date', $this->options['timestampField'], 'day' |
123 | ); |
124 | $datesAgg->setFormat( '8uuuu-MM-dd' ); |
125 | |
126 | $termsAgg->addAggregation( $datesAgg ); |
127 | $query->addAggregation( $termsAgg ); |
128 | |
129 | $search = new Search( $this->getClient() ); |
130 | $search->setOption( Search::OPTION_SIZE, 0 ); |
131 | |
132 | $allIndexes = $this->getIndexNames(); |
133 | $indexAvailable = false; |
134 | $skippedAny = false; |
135 | $s = clone $start->timestamp; |
136 | while ( $s <= $end->timestamp ) { |
137 | $index = $this->options['indexPrefix'] . $s->format( $this->options['indexFormat'] ); |
138 | if ( in_array( $index, $allIndexes ) ) { |
139 | $indexAvailable = true; |
140 | } else { |
141 | $skippedAny = true; |
142 | } |
143 | $s->add( $oneDay ); |
144 | } |
145 | if ( !$indexAvailable ) { |
146 | // No dates in range |
147 | $status->warning( 'apifeatureusage-no-indexes' ); |
148 | return $status; |
149 | } |
150 | if ( $skippedAny ) { |
151 | $status->warning( 'apifeatureusage-missing-indexes' ); |
152 | } |
153 | |
154 | $search->setQuery( $query ); |
155 | // Prefer the wildcard approach over using an explicit list of indices to avoid building a |
156 | // list that might be too long to encode in the search URL. |
157 | // This feature is rarely used so that it's probably fine to hit all these indices and let |
158 | // the date filtering quickly skip unrelated ones. |
159 | $search->addIndexByName( $this->options['indexPrefix'] . '*' ); |
160 | |
161 | $res = $search->search(); |
162 | |
163 | if ( $res->getResponse()->hasError() ) { |
164 | return Status::newFatal( |
165 | 'apifeatureusage-elasticsearch-error', $res->getResponse()->getError() |
166 | ); |
167 | } |
168 | |
169 | $ret = []; |
170 | $aggs = $res->getAggregations(); |
171 | if ( isset( $aggs['feature'] ) ) { |
172 | foreach ( $aggs['feature']['buckets'] as $feature ) { |
173 | foreach ( $feature['date']['buckets'] as $date ) { |
174 | $ret[] = [ |
175 | 'feature' => $feature['key'], |
176 | 'date' => $date['key_as_string'], |
177 | 'count' => $date['doc_count'], |
178 | ]; |
179 | } |
180 | } |
181 | } |
182 | $status->value = $ret; |
183 | |
184 | return $status; |
185 | } |
186 | |
187 | public function suggestDateRange() { |
188 | $start = new MWTimestamp(); |
189 | $start->setTimezone( 'UTC' ); |
190 | $start->timestamp->setTime( 0, 0, 0 ); |
191 | $end = new MWTimestamp(); |
192 | $end->setTimezone( 'UTC' ); |
193 | |
194 | $oneDay = new DateInterval( 'P1D' ); |
195 | $allIndexes = $this->getIndexNames(); |
196 | while ( true ) { |
197 | $start->timestamp->sub( $oneDay ); |
198 | $index = $this->options['indexPrefix'] . $start->format( $this->options['indexFormat'] ); |
199 | if ( !in_array( $index, $allIndexes ) ) { |
200 | $start->timestamp->add( $oneDay ); |
201 | return [ $start, $end ]; |
202 | } |
203 | } |
204 | } |
205 | } |