Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 107 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
ApiFeatureUsageQueryEngineElastica | |
0.00% |
0 / 107 |
|
0.00% |
0 / 7 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
getClient | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getIndexNames | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
enumerate | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
110 | |||
suggestDateRange | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
record | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
prune | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\ApiFeatureUsage; |
4 | |
5 | use DateInterval; |
6 | use Elastica\Aggregation\DateHistogram; |
7 | use Elastica\Aggregation\Terms as AggregationTerms; |
8 | use Elastica\Client; |
9 | use Elastica\Query; |
10 | use Elastica\Query\BoolQuery; |
11 | use Elastica\Query\Prefix; |
12 | use Elastica\Query\Range; |
13 | use Elastica\Query\Terms as QueryTerms; |
14 | use Elastica\Search; |
15 | use MediaWiki\Registration\ExtensionRegistry; |
16 | use MediaWiki\Status\Status; |
17 | use MediaWiki\Utils\MWTimestamp; |
18 | use RuntimeException; |
19 | |
20 | /** |
21 | * Query feature usage data from Elasticsearch. |
22 | * |
23 | * Config fields are: |
24 | * serverList: Array of servers to connect to |
25 | * maxConnectionAttempts: Maximum connection attempts |
26 | * indexPrefix: Index prefix |
27 | * indexFormat: Date format string for index |
28 | * featureField: Name of the field holding $feature |
29 | * timestampField: Name of the field holding the timestamp |
30 | * agentField: Name of the field holding the user agent |
31 | */ |
32 | class ApiFeatureUsageQueryEngineElastica extends ApiFeatureUsageQueryEngine { |
33 | /** @var Client|null */ |
34 | private $client = null; |
35 | /** @var string[]|null */ |
36 | private $indexNames = null; |
37 | |
38 | /** |
39 | * @param array $options |
40 | */ |
41 | public function __construct( array $options ) { |
42 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'Elastica' ) ) { |
43 | throw new RuntimeException( "Use of ElasticSearch requires the Elastica library" ); |
44 | } |
45 | $options += [ |
46 | 'indexPrefix' => 'apifeatureusage-', |
47 | 'indexFormat' => 'Y.m.d', |
48 | 'featureField' => 'feature', |
49 | 'featureFieldAggSize' => 10000, |
50 | 'timestampField' => '@timestamp', |
51 | 'agentField' => 'agent', |
52 | ]; |
53 | |
54 | parent::__construct( $options ); |
55 | } |
56 | |
57 | /** |
58 | * @return Client |
59 | */ |
60 | protected function getClient() { |
61 | if ( !$this->client ) { |
62 | $connection = new ApiFeatureUsageQueryEngineElasticaConnection( $this->options ); |
63 | $this->client = $connection->getClient(); |
64 | } |
65 | return $this->client; |
66 | } |
67 | |
68 | /** |
69 | * @return string[] |
70 | */ |
71 | protected function getIndexNames() { |
72 | if ( !$this->indexNames ) { |
73 | $response = $this->getClient()->request( |
74 | urlencode( $this->options['indexPrefix'] ) . '*/_alias' |
75 | ); |
76 | if ( $response->isOK() ) { |
77 | $this->indexNames = array_keys( $response->getData() ); |
78 | } else { |
79 | throw new RuntimeException( __METHOD__ . |
80 | ': Cannot fetch index names from elasticsearch: ' . |
81 | $response->getError() |
82 | ); |
83 | } |
84 | } |
85 | return $this->indexNames; |
86 | } |
87 | |
88 | /** @inheritDoc */ |
89 | public function enumerate( |
90 | string $agent, |
91 | MWTimestamp $start, |
92 | MWTimestamp $end, |
93 | ?array $features = null |
94 | ) { |
95 | $status = Status::newGood( [] ); |
96 | |
97 | # Force $start and $end to day boundaries |
98 | $oneDay = new DateInterval( 'P1D' ); |
99 | $start = clone $start; |
100 | $start->timestamp = clone $start->timestamp; |
101 | $start->timestamp->setTime( 0, 0, 0 ); |
102 | $end = clone $end; |
103 | $end->timestamp = clone $end->timestamp; |
104 | $end->timestamp->setTime( 0, 0, 0 ); |
105 | $end->timestamp->add( $oneDay )->sub( new DateInterval( 'PT1S' ) ); |
106 | |
107 | $query = new Query(); |
108 | |
109 | $bools = new BoolQuery(); |
110 | |
111 | $prefix = new Prefix(); |
112 | $prefix->setPrefix( $this->options['agentField'], $agent ); |
113 | $bools->addMust( $prefix ); |
114 | |
115 | $bools->addMust( new Range( $this->options['timestampField'], [ |
116 | 'gte' => $start->getTimestamp( TS_ISO_8601 ), |
117 | 'lte' => $end->getTimestamp( TS_ISO_8601 ), |
118 | ] ) ); |
119 | |
120 | if ( $features !== null ) { |
121 | $bools->addMust( new QueryTerms( $this->options['featureField'], $features ) ); |
122 | } |
123 | |
124 | $query->setQuery( $bools ); |
125 | |
126 | $termsAgg = new AggregationTerms( 'feature' ); |
127 | $termsAgg->setField( $this->options['featureField'] ); |
128 | $termsAgg->setSize( $this->options['featureFieldAggSize'] ); |
129 | |
130 | $datesAgg = new DateHistogram( |
131 | 'date', $this->options['timestampField'], 'day' |
132 | ); |
133 | $datesAgg->setFormat( '8uuuu-MM-dd' ); |
134 | |
135 | $termsAgg->addAggregation( $datesAgg ); |
136 | $query->addAggregation( $termsAgg ); |
137 | |
138 | $search = new Search( $this->getClient() ); |
139 | $search->setOption( Search::OPTION_SIZE, 0 ); |
140 | |
141 | $allIndexes = $this->getIndexNames(); |
142 | $indexAvailable = false; |
143 | $skippedAny = false; |
144 | $s = clone $start->timestamp; |
145 | while ( $s <= $end->timestamp ) { |
146 | $index = $this->options['indexPrefix'] . $s->format( $this->options['indexFormat'] ); |
147 | if ( in_array( $index, $allIndexes ) ) { |
148 | $indexAvailable = true; |
149 | } else { |
150 | $skippedAny = true; |
151 | } |
152 | $s->add( $oneDay ); |
153 | } |
154 | if ( !$indexAvailable ) { |
155 | // No dates in range |
156 | $status->warning( 'apifeatureusage-no-indexes' ); |
157 | return $status; |
158 | } |
159 | if ( $skippedAny ) { |
160 | $status->warning( 'apifeatureusage-missing-indexes' ); |
161 | } |
162 | |
163 | $search->setQuery( $query ); |
164 | // Prefer the wildcard approach over using an explicit list of indices to avoid building a |
165 | // list that might be too long to encode in the search URL. |
166 | // This feature is rarely used so that it's probably fine to hit all these indices and let |
167 | // the date filtering quickly skip unrelated ones. |
168 | $search->addIndexByName( $this->options['indexPrefix'] . '*' ); |
169 | |
170 | $res = $search->search(); |
171 | |
172 | if ( $res->getResponse()->hasError() ) { |
173 | return Status::newFatal( |
174 | 'apifeatureusage-elasticsearch-error', $res->getResponse()->getError() |
175 | ); |
176 | } |
177 | |
178 | $ret = []; |
179 | $aggs = $res->getAggregations(); |
180 | if ( isset( $aggs['feature'] ) ) { |
181 | foreach ( $aggs['feature']['buckets'] as $feature ) { |
182 | foreach ( $feature['date']['buckets'] as $date ) { |
183 | $ret[] = [ |
184 | 'feature' => $feature['key'], |
185 | 'date' => $date['key_as_string'], |
186 | 'count' => $date['doc_count'], |
187 | ]; |
188 | } |
189 | } |
190 | } |
191 | $status->value = $ret; |
192 | |
193 | return $status; |
194 | } |
195 | |
196 | public function suggestDateRange() { |
197 | $start = new MWTimestamp(); |
198 | $start->setTimezone( 'UTC' ); |
199 | $start->timestamp->setTime( 0, 0, 0 ); |
200 | $end = new MWTimestamp(); |
201 | $end->setTimezone( 'UTC' ); |
202 | |
203 | $oneDay = new DateInterval( 'P1D' ); |
204 | $allIndexes = $this->getIndexNames(); |
205 | while ( true ) { |
206 | $start->timestamp->sub( $oneDay ); |
207 | $index = $this->options['indexPrefix'] . $start->format( $this->options['indexFormat'] ); |
208 | if ( !in_array( $index, $allIndexes ) ) { |
209 | $start->timestamp->add( $oneDay ); |
210 | return [ $start, $end ]; |
211 | } |
212 | } |
213 | } |
214 | |
215 | /** @inheritDoc */ |
216 | public function record( |
217 | string $feature, |
218 | string $agent, |
219 | string $ipAddress |
220 | ) { |
221 | // no-op; rely on debug log entries being routed to elastic search |
222 | } |
223 | |
224 | /** @inheritDoc */ |
225 | public function prune( $progressFn = null, $limit = INF ) { |
226 | // no-op; rely on manual index deletion |
227 | return false; |
228 | } |
229 | } |