Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
53.77% |
107 / 199 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
ElasticaErrorHandler | |
53.77% |
107 / 199 |
|
0.00% |
0 / 7 |
275.66 | |
0.00% |
0 / 1 |
logRequestResponse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
extractMessage | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
extractFullError | |
50.91% |
28 / 55 |
|
0.00% |
0 / 1 |
37.19 | |||
classifyError | |
98.48% |
65 / 66 |
|
0.00% |
0 / 1 |
10 | |||
isParseError | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
extractMessageAndStatus | |
13.73% |
7 / 51 |
|
0.00% |
0 / 1 |
104.47 | |||
formatMessage | |
46.67% |
7 / 15 |
|
0.00% |
0 / 1 |
11.46 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use Elastica\Exception\Bulk\ResponseException as BulkResponseException; |
6 | use Elastica\Exception\Connection\HttpException; |
7 | use Elastica\Exception\PartialShardFailureException; |
8 | use Elastica\Exception\ResponseException; |
9 | use MediaWiki\Logger\LoggerFactory; |
10 | use MediaWiki\Status\Status; |
11 | |
12 | /** |
13 | * Generic functions for extracting and reporting on errors/exceptions |
14 | * from Elastica. |
15 | */ |
16 | class ElasticaErrorHandler { |
17 | |
18 | public static function logRequestResponse( Connection $conn, $message, array $context = [] ) { |
19 | $client = $conn->getClient(); |
20 | LoggerFactory::getInstance( 'CirrusSearch' )->info( $message, $context + [ |
21 | 'cluster' => $conn->getClusterName(), |
22 | 'elasticsearch_request' => (string)$client->getLastRequest(), |
23 | 'elasticsearch_response' => $client->getLastResponse() !== null ? json_encode( $client->getLastResponse()->getData() ) : "NULL", |
24 | ] ); |
25 | } |
26 | |
27 | /** |
28 | * @param \Elastica\Exception\ExceptionInterface $exception |
29 | * @return string |
30 | */ |
31 | public static function extractMessage( \Elastica\Exception\ExceptionInterface $exception ) { |
32 | $error = self::extractFullError( $exception ); |
33 | return self::formatMessage( $error ); |
34 | } |
35 | |
36 | /** |
37 | * Extract an error message from an exception thrown by Elastica. |
38 | * @param \Elastica\Exception\ExceptionInterface $exception exception from which to extract a message |
39 | * @return array structuerd error from the exception |
40 | */ |
41 | public static function extractFullError( \Elastica\Exception\ExceptionInterface $exception ): array { |
42 | if ( $exception instanceof BulkResponseException ) { |
43 | $actionReasons = []; |
44 | foreach ( $exception->getActionExceptions() as $actionException ) { |
45 | $actionReasons[] = $actionException->getMessage() . ': ' |
46 | . self::formatMessage( $actionException->getResponse()->getFullError() ); |
47 | } |
48 | return [ |
49 | 'type' => 'bulk', |
50 | 'reason' => $exception->getMessage(), |
51 | 'actionReasons' => $actionReasons, |
52 | ]; |
53 | } elseif ( $exception instanceof HttpException ) { |
54 | return [ |
55 | 'type' => 'http_exception', |
56 | 'reason' => $exception->getMessage() |
57 | ]; |
58 | } elseif ( !( $exception instanceof ResponseException ) ) { |
59 | // simulate the basic full error structure |
60 | return [ |
61 | 'type' => 'unknown', |
62 | 'reason' => $exception->getMessage() |
63 | ]; |
64 | } |
65 | if ( $exception instanceof PartialShardFailureException ) { |
66 | // @todo still needs to be fixed, need a way to trigger this |
67 | // failure |
68 | $shardStats = $exception->getResponse()->getShardsStatistics(); |
69 | $message = []; |
70 | $type = null; |
71 | foreach ( $shardStats[ 'failures' ] as $failure ) { |
72 | $message[] = $failure['reason']['reason']; |
73 | if ( $type === null ) { |
74 | $type = $failure['reason']['type']; |
75 | } |
76 | } |
77 | |
78 | return [ |
79 | 'type' => $type, |
80 | 'reason' => 'Partial failure: ' . implode( ',', $message ), |
81 | 'partial' => true |
82 | ]; |
83 | } |
84 | |
85 | $response = $exception->getResponse(); |
86 | $error = $response->getFullError(); |
87 | if ( is_string( $error ) ) { |
88 | $error = [ |
89 | 'type' => 'unknown', |
90 | 'reason' => $error, |
91 | ]; |
92 | } elseif ( $error === null ) { |
93 | // response wasnt json or didn't contain 'error' key |
94 | // in this case elastica reports nothing. |
95 | $data = $response->getData(); |
96 | $parts = []; |
97 | if ( $response->getStatus() !== null ) { |
98 | $parts[] = 'Status code ' . $response->getStatus(); |
99 | } |
100 | if ( isset( $data['message'] ) ) { |
101 | // Client puts non-json responses here |
102 | $parts[] = substr( $data['message'], 0, 200 ); |
103 | } elseif ( is_string( $data ) && $data !== "" ) { |
104 | // pre-6.0.3 versions of Elastica |
105 | $parts[] = substr( $data, 0, 200 ); |
106 | } |
107 | $reason = implode( "; ", $parts ); |
108 | |
109 | $error = [ |
110 | 'type' => 'unknown', |
111 | 'reason' => $reason, |
112 | ]; |
113 | } |
114 | |
115 | return $error; |
116 | } |
117 | |
118 | /** |
119 | * Broadly classify the error message into failures where |
120 | * we decided to not serve the query, and failures where |
121 | * we just failed to answer |
122 | * |
123 | * @param \Elastica\Exception\ExceptionInterface|null $exception |
124 | * @return string Either 'rejected', 'failed' or 'unknown' |
125 | */ |
126 | public static function classifyError( ?\Elastica\Exception\ExceptionInterface $exception = null ) { |
127 | if ( $exception === null ) { |
128 | return 'unknown'; |
129 | } |
130 | $error = self::extractFullError( $exception ); |
131 | if ( isset( $error['root_cause'][0]['type'] ) ) { |
132 | $error = reset( $error['root_cause'] ); |
133 | } elseif ( !( isset( $error['type'] ) && isset( $error['reason'] ) ) ) { |
134 | return 'unknown'; |
135 | } |
136 | |
137 | $heuristics = [ |
138 | 'rejected' => [ |
139 | 'type_regexes' => [ |
140 | '(^|_)regex_', |
141 | '^too_complex_to_determinize_exception$', |
142 | '^elasticsearch_parse_exception$', |
143 | '^search_parse_exception$', |
144 | '^query_shard_exception$', |
145 | '^illegal_argument_exception$', |
146 | '^too_many_clauses$', |
147 | '^parsing_exception$', |
148 | '^parse_exception$', |
149 | '^script_exception$', |
150 | ], |
151 | 'msg_regexes' => [ |
152 | ], |
153 | ], |
154 | 'failed' => [ |
155 | 'type_regexes' => [ |
156 | '^es_rejected_execution_exception$', |
157 | '^search_phase_execution_exception', |
158 | '^remote_transport_exception$', |
159 | '^search_context_missing_exception$', |
160 | '^null_pointer_exception$', |
161 | '^elasticsearch_timeout_exception$', |
162 | '^retry_on_primary_exception$', |
163 | // These are exceptions thrown by elastica itself |
164 | // (generally connectivity issues in cURL) |
165 | '^http_exception$', |
166 | ], |
167 | 'msg_regexes' => [ |
168 | // ClientException thrown by Elastica |
169 | '^No enabled connection', |
170 | // These are problems raised by the http intermediary layers (nginx/envoy) |
171 | '^Status code 503', |
172 | '^\Qupstream connect error or disconnect/reset\E', |
173 | '^upstream request timeout', |
174 | // see \CirrusSearch\Query\CompSuggestQueryBuilder::postProcess, not ideal to rely |
175 | // on our own exception message for error classification... |
176 | '^\QInvalid response returned from the backend (probable shard failure during the fetch phase)\E', |
177 | ], |
178 | ], |
179 | 'config_issue' => [ |
180 | 'type_regexes' => [ |
181 | '^index_not_found_exception$', |
182 | ], |
183 | 'msg_regexes' => [ |
184 | // for 'bulk' errors index_not_found_exception is set |
185 | // in message and not type |
186 | 'index_not_found_exception', |
187 | ], |
188 | ], |
189 | 'memory_issue' => [ |
190 | 'type_regexes' => [ |
191 | '^circuit_breaking_exception$', |
192 | ], |
193 | 'msg_regexes' => [], |
194 | ], |
195 | ]; |
196 | |
197 | foreach ( $heuristics as $type => $heuristic ) { |
198 | $regex = implode( '|', $heuristic['type_regexes'] ); |
199 | if ( $regex && preg_match( "#$regex#", $error['type'] ) ) { |
200 | return $type; |
201 | } |
202 | $regex = implode( '|', $heuristic['msg_regexes'] ); |
203 | if ( $regex && preg_match( "#$regex#", $error['reason'] ) ) { |
204 | return $type; |
205 | } |
206 | } |
207 | return "unknown"; |
208 | } |
209 | |
210 | /** |
211 | * Does this status represent an Elasticsearch parse error? |
212 | * @param Status $status Status to check |
213 | * @return bool is this a parse error? |
214 | */ |
215 | public static function isParseError( Status $status ) { |
216 | foreach ( $status->getMessages() as $msg ) { |
217 | if ( $msg->getKey() === 'cirrussearch-parse-error' ) { |
218 | return true; |
219 | } |
220 | } |
221 | return false; |
222 | } |
223 | |
224 | /** |
225 | * @param \Elastica\Exception\ExceptionInterface|null $exception |
226 | * @return array Two elements, first is Status object, second is string. |
227 | */ |
228 | public static function extractMessageAndStatus( ?\Elastica\Exception\ExceptionInterface $exception = null ) { |
229 | if ( !$exception ) { |
230 | return [ Status::newFatal( 'cirrussearch-backend-error' ), '' ]; |
231 | } |
232 | |
233 | // Lots of times these are the same as getFullError(), but sometimes |
234 | // they're not. I'm looking at you PartialShardFailureException. |
235 | $error = self::extractFullError( $exception ); |
236 | |
237 | // These can be top level errors, or exceptions that don't extend from |
238 | // ResponseException like PartialShardFailureException or errors |
239 | // contacting the cluster. |
240 | if ( !isset( $error['root_cause'][0]['type'] ) ) { |
241 | return [ |
242 | Status::newFatal( 'cirrussearch-backend-error' ), |
243 | self::formatMessage( $error ) |
244 | ]; |
245 | } |
246 | |
247 | // We can have multiple root causes if the error is not the |
248 | // same on different shards. Errors will be deduplicated based |
249 | // on their type. Currently we display only the first one if |
250 | // it happens. |
251 | $cause = reset( $error['root_cause'] ); |
252 | |
253 | if ( $cause['type'] === 'query_shard_exception' ) { |
254 | // The important part of the parse error message is embedded a few levels down |
255 | // and comes before the next new line so lets slurp it up and log it rather than |
256 | // the huge clump of error. |
257 | $shardFailure = reset( $error['failed_shards'] ); |
258 | if ( !empty( $shardFailure['reason'] ) ) { |
259 | if ( !empty( $shardFailure['reason']['caused_by'] ) ) { |
260 | $message = $shardFailure['reason']['caused_by']['reason']; |
261 | } else { |
262 | $message = $shardFailure['reason']['reason']; |
263 | } |
264 | } else { |
265 | $message = "???"; |
266 | } |
267 | $end = strpos( $message, "\n", 0 ); |
268 | if ( $end === false ) { |
269 | $end = strlen( $message ); |
270 | } |
271 | $parseError = substr( $message, 0, $end ); |
272 | |
273 | return [ |
274 | Status::newFatal( 'cirrussearch-parse-error' ), |
275 | 'Parse error on ' . $parseError |
276 | ]; |
277 | } |
278 | |
279 | if ( $cause['type'] === 'too_complex_to_determinize_exception' ) { |
280 | return [ Status::newFatal( |
281 | 'cirrussearch-regex-too-complex-error' ), |
282 | $cause['reason'] |
283 | ]; |
284 | } |
285 | |
286 | if ( $cause['type'] === 'script_exception' ) { |
287 | // do not use $cause which won't contain the caused_by chain |
288 | $formattedMessage = self::formatMessage( $error['caused_by'] ); |
289 | $formattedMessage .= "\n\t" . implode( "\n\t", $cause['script_stack'] ) . "\n"; |
290 | return [ |
291 | Status::newFatal( 'cirrussearch-backend-error' ), |
292 | $formattedMessage |
293 | ]; |
294 | } |
295 | |
296 | if ( preg_match( '/(^|_)regex_/', $cause['type'] ) ) { |
297 | $syntaxError = $cause['reason']; |
298 | $errorMessage = 'unknown'; |
299 | $position = 'unknown'; |
300 | // Note: we support only error coming from the extra plugin |
301 | // In the case Cirrus is installed without the plugin and |
302 | // is using the Groovy script to do regex then a generic backend error |
303 | // will be displayed. |
304 | |
305 | $matches = []; |
306 | // In some cases elastic will serialize the exception by adding |
307 | // an extra message prefix with the exception type. |
308 | // If the exception is serialized through Transport: |
309 | // invalid_regex_exception: expected ']' at position 2 |
310 | // Or if the exception is thrown locally by the node receiving the query: |
311 | // expected ']' at position 2 |
312 | if ( preg_match( '/(?:[a-z_]+: )?(.+) at position (\d+)/', $syntaxError, $matches ) ) { |
313 | [ , $errorMessage, $position ] = $matches; |
314 | } elseif ( $syntaxError === 'unexpected end-of-string' ) { |
315 | $errorMessage = 'regex too short to be correct'; |
316 | } |
317 | $status = Status::newFatal( 'cirrussearch-regex-syntax-error', $errorMessage, $position ); |
318 | |
319 | return [ $status, 'Regex syntax error: ' . $syntaxError ]; |
320 | } |
321 | |
322 | return [ |
323 | Status::newFatal( 'cirrussearch-backend-error' ), |
324 | self::formatMessage( $cause ) |
325 | ]; |
326 | } |
327 | |
328 | /** |
329 | * Takes an error and converts it into a useful message. Mostly this is to deal with |
330 | * errors where the useful part is hidden inside a caused_by chain. |
331 | * WARNING: In some circumstances, like bulk update failures, this could be multiple |
332 | * megabytes. |
333 | * |
334 | * @param array $error An error array, such as the one returned by extractFullError(). |
335 | * @return string |
336 | */ |
337 | protected static function formatMessage( array $error ) { |
338 | if ( isset( $error['actionReasons'] ) ) { |
339 | $message = $error['type'] . ': ' . $error['reason']; |
340 | foreach ( $error['actionReasons'] as $actionReason ) { |
341 | $message .= " - $actionReason\n"; |
342 | } |
343 | return $message; |
344 | } |
345 | |
346 | $causeChain = []; |
347 | $errorCursor = $error; |
348 | while ( isset( $errorCursor['caused_by'] ) ) { |
349 | $errorCursor = $errorCursor['caused_by']; |
350 | if ( $errorCursor['reason'] ) { |
351 | $causeChain[] = $errorCursor['reason']; |
352 | } |
353 | } |
354 | $message = $error['type'] . ': ' . $error['reason']; |
355 | if ( $causeChain ) { |
356 | $message .= ' (' . implode( ' -> ', array_reverse( $causeChain ) ) . ')'; |
357 | } |
358 | return $message; |
359 | } |
360 | |
361 | } |