Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
54.04% |
107 / 198 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
| ElasticaErrorHandler | |
54.04% |
107 / 198 |
|
0.00% |
0 / 7 |
261.45 | |
0.00% |
0 / 1 |
| logRequestResponse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| extractMessage | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| extractFullError | |
51.85% |
28 / 54 |
|
0.00% |
0 / 1 |
31.86 | |||
| classifyError | |
98.48% |
65 / 66 |
|
0.00% |
0 / 1 |
10 | |||
| isParseError | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| extractMessageAndStatus | |
13.73% |
7 / 51 |
|
0.00% |
0 / 1 |
104.47 | |||
| formatMessage | |
46.67% |
7 / 15 |
|
0.00% |
0 / 1 |
11.46 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch; |
| 4 | |
| 5 | use Elastica\Exception\Bulk\ResponseException as BulkResponseException; |
| 6 | use Elastica\Exception\Connection\HttpException; |
| 7 | use Elastica\Exception\PartialShardFailureException; |
| 8 | use Elastica\Exception\ResponseException; |
| 9 | use MediaWiki\Logger\LoggerFactory; |
| 10 | use MediaWiki\Status\Status; |
| 11 | |
| 12 | /** |
| 13 | * Generic functions for extracting and reporting on errors/exceptions |
| 14 | * from Elastica. |
| 15 | */ |
| 16 | class ElasticaErrorHandler { |
| 17 | |
| 18 | public static function logRequestResponse( Connection $conn, string $message, array $context = [] ) { |
| 19 | $client = $conn->getClient(); |
| 20 | LoggerFactory::getInstance( 'CirrusSearch' )->info( $message, $context + [ |
| 21 | 'cluster' => $conn->getClusterName(), |
| 22 | 'elasticsearch_request' => (string)$client->getLastRequest(), |
| 23 | 'elasticsearch_response' => $client->getLastResponse() !== null ? json_encode( $client->getLastResponse()->getData() ) : "NULL", |
| 24 | ] ); |
| 25 | } |
| 26 | |
| 27 | /** |
| 28 | * @param \Elastica\Exception\ExceptionInterface $exception |
| 29 | * @return string |
| 30 | */ |
| 31 | public static function extractMessage( \Elastica\Exception\ExceptionInterface $exception ) { |
| 32 | $error = self::extractFullError( $exception ); |
| 33 | return self::formatMessage( $error ); |
| 34 | } |
| 35 | |
| 36 | /** |
| 37 | * Extract an error message from an exception thrown by Elastica. |
| 38 | * @param \Elastica\Exception\ExceptionInterface $exception exception from which to extract a message |
| 39 | * @return array structuerd error from the exception |
| 40 | */ |
| 41 | public static function extractFullError( \Elastica\Exception\ExceptionInterface $exception ): array { |
| 42 | if ( $exception instanceof BulkResponseException ) { |
| 43 | $actionReasons = []; |
| 44 | foreach ( $exception->getActionExceptions() as $actionException ) { |
| 45 | $actionReasons[] = $actionException->getMessage() . ': ' |
| 46 | . self::formatMessage( $actionException->getResponse()->getFullError() ); |
| 47 | } |
| 48 | return [ |
| 49 | 'type' => 'bulk', |
| 50 | 'reason' => $exception->getMessage(), |
| 51 | 'actionReasons' => $actionReasons, |
| 52 | ]; |
| 53 | } elseif ( $exception instanceof HttpException ) { |
| 54 | return [ |
| 55 | 'type' => 'http_exception', |
| 56 | 'reason' => $exception->getMessage() |
| 57 | ]; |
| 58 | } elseif ( !( $exception instanceof ResponseException ) ) { |
| 59 | // simulate the basic full error structure |
| 60 | return [ |
| 61 | 'type' => 'unknown', |
| 62 | 'reason' => $exception->getMessage() |
| 63 | ]; |
| 64 | } |
| 65 | if ( $exception instanceof PartialShardFailureException ) { |
| 66 | // @todo still needs to be fixed, need a way to trigger this |
| 67 | // failure |
| 68 | $shardStats = $exception->getResponse()->getShardsStatistics(); |
| 69 | $message = []; |
| 70 | $type = null; |
| 71 | foreach ( $shardStats[ 'failures' ] as $failure ) { |
| 72 | $message[] = $failure['reason']['reason']; |
| 73 | $type ??= $failure['reason']['type']; |
| 74 | } |
| 75 | |
| 76 | return [ |
| 77 | 'type' => $type, |
| 78 | 'reason' => 'Partial failure: ' . implode( ',', $message ), |
| 79 | 'partial' => true |
| 80 | ]; |
| 81 | } |
| 82 | |
| 83 | $response = $exception->getResponse(); |
| 84 | $error = $response->getFullError(); |
| 85 | if ( is_string( $error ) ) { |
| 86 | $error = [ |
| 87 | 'type' => 'unknown', |
| 88 | 'reason' => $error, |
| 89 | ]; |
| 90 | } elseif ( $error === null ) { |
| 91 | // response wasnt json or didn't contain 'error' key |
| 92 | // in this case elastica reports nothing. |
| 93 | $data = $response->getData(); |
| 94 | $parts = []; |
| 95 | if ( $response->getStatus() !== null ) { |
| 96 | $parts[] = 'Status code ' . $response->getStatus(); |
| 97 | } |
| 98 | if ( isset( $data['message'] ) ) { |
| 99 | // Client puts non-json responses here |
| 100 | $parts[] = substr( $data['message'], 0, 200 ); |
| 101 | } elseif ( is_string( $data ) && $data !== "" ) { |
| 102 | // pre-6.0.3 versions of Elastica |
| 103 | $parts[] = substr( $data, 0, 200 ); |
| 104 | } |
| 105 | $reason = implode( "; ", $parts ); |
| 106 | |
| 107 | $error = [ |
| 108 | 'type' => 'unknown', |
| 109 | 'reason' => $reason, |
| 110 | ]; |
| 111 | } |
| 112 | |
| 113 | return $error; |
| 114 | } |
| 115 | |
| 116 | /** |
| 117 | * Broadly classify the error message into failures where |
| 118 | * we decided to not serve the query, and failures where |
| 119 | * we just failed to answer |
| 120 | * |
| 121 | * @param \Elastica\Exception\ExceptionInterface|null $exception |
| 122 | * @return string Either 'rejected', 'failed' or 'unknown' |
| 123 | */ |
| 124 | public static function classifyError( ?\Elastica\Exception\ExceptionInterface $exception = null ) { |
| 125 | if ( $exception === null ) { |
| 126 | return 'unknown'; |
| 127 | } |
| 128 | $error = self::extractFullError( $exception ); |
| 129 | if ( isset( $error['root_cause'][0]['type'] ) ) { |
| 130 | $error = reset( $error['root_cause'] ); |
| 131 | } elseif ( !( isset( $error['type'] ) && isset( $error['reason'] ) ) ) { |
| 132 | return 'unknown'; |
| 133 | } |
| 134 | |
| 135 | $heuristics = [ |
| 136 | 'rejected' => [ |
| 137 | 'type_regexes' => [ |
| 138 | '(^|_)regex_', |
| 139 | '^too_complex_to_determinize_exception$', |
| 140 | '^elasticsearch_parse_exception$', |
| 141 | '^search_parse_exception$', |
| 142 | '^query_shard_exception$', |
| 143 | '^illegal_argument_exception$', |
| 144 | '^too_many_clauses$', |
| 145 | '^parsing_exception$', |
| 146 | '^parse_exception$', |
| 147 | '^script_exception$', |
| 148 | ], |
| 149 | 'msg_regexes' => [ |
| 150 | ], |
| 151 | ], |
| 152 | 'failed' => [ |
| 153 | 'type_regexes' => [ |
| 154 | '^es_rejected_execution_exception$', |
| 155 | '^search_phase_execution_exception', |
| 156 | '^remote_transport_exception$', |
| 157 | '^search_context_missing_exception$', |
| 158 | '^null_pointer_exception$', |
| 159 | '^elasticsearch_timeout_exception$', |
| 160 | '^retry_on_primary_exception$', |
| 161 | // These are exceptions thrown by elastica itself |
| 162 | // (generally connectivity issues in cURL) |
| 163 | '^http_exception$', |
| 164 | ], |
| 165 | 'msg_regexes' => [ |
| 166 | // ClientException thrown by Elastica |
| 167 | '^No enabled connection', |
| 168 | // These are problems raised by the http intermediary layers (nginx/envoy) |
| 169 | '^Status code 503', |
| 170 | '^\Qupstream connect error or disconnect/reset\E', |
| 171 | '^upstream request timeout', |
| 172 | // see \CirrusSearch\Query\CompSuggestQueryBuilder::postProcess, not ideal to rely |
| 173 | // on our own exception message for error classification... |
| 174 | '^\QInvalid response returned from the backend (probable shard failure during the fetch phase)\E', |
| 175 | ], |
| 176 | ], |
| 177 | 'config_issue' => [ |
| 178 | 'type_regexes' => [ |
| 179 | '^index_not_found_exception$', |
| 180 | ], |
| 181 | 'msg_regexes' => [ |
| 182 | // for 'bulk' errors index_not_found_exception is set |
| 183 | // in message and not type |
| 184 | 'index_not_found_exception', |
| 185 | ], |
| 186 | ], |
| 187 | 'memory_issue' => [ |
| 188 | 'type_regexes' => [ |
| 189 | '^circuit_breaking_exception$', |
| 190 | ], |
| 191 | 'msg_regexes' => [], |
| 192 | ], |
| 193 | ]; |
| 194 | |
| 195 | foreach ( $heuristics as $type => $heuristic ) { |
| 196 | $regex = implode( '|', $heuristic['type_regexes'] ); |
| 197 | if ( $regex && preg_match( "#$regex#", $error['type'] ) ) { |
| 198 | return $type; |
| 199 | } |
| 200 | $regex = implode( '|', $heuristic['msg_regexes'] ); |
| 201 | if ( $regex && preg_match( "#$regex#", $error['reason'] ) ) { |
| 202 | return $type; |
| 203 | } |
| 204 | } |
| 205 | return "unknown"; |
| 206 | } |
| 207 | |
| 208 | /** |
| 209 | * Does this status represent an Elasticsearch parse error? |
| 210 | * @param Status $status Status to check |
| 211 | * @return bool is this a parse error? |
| 212 | */ |
| 213 | public static function isParseError( Status $status ) { |
| 214 | foreach ( $status->getMessages() as $msg ) { |
| 215 | if ( $msg->getKey() === 'cirrussearch-parse-error' ) { |
| 216 | return true; |
| 217 | } |
| 218 | } |
| 219 | return false; |
| 220 | } |
| 221 | |
| 222 | /** |
| 223 | * @param \Elastica\Exception\ExceptionInterface|null $exception |
| 224 | * @return array Two elements, first is Status object, second is string. |
| 225 | */ |
| 226 | public static function extractMessageAndStatus( ?\Elastica\Exception\ExceptionInterface $exception = null ) { |
| 227 | if ( !$exception ) { |
| 228 | return [ Status::newFatal( 'cirrussearch-backend-error' ), '' ]; |
| 229 | } |
| 230 | |
| 231 | // Lots of times these are the same as getFullError(), but sometimes |
| 232 | // they're not. I'm looking at you PartialShardFailureException. |
| 233 | $error = self::extractFullError( $exception ); |
| 234 | |
| 235 | // These can be top level errors, or exceptions that don't extend from |
| 236 | // ResponseException like PartialShardFailureException or errors |
| 237 | // contacting the cluster. |
| 238 | if ( !isset( $error['root_cause'][0]['type'] ) ) { |
| 239 | return [ |
| 240 | Status::newFatal( 'cirrussearch-backend-error' ), |
| 241 | self::formatMessage( $error ) |
| 242 | ]; |
| 243 | } |
| 244 | |
| 245 | // We can have multiple root causes if the error is not the |
| 246 | // same on different shards. Errors will be deduplicated based |
| 247 | // on their type. Currently we display only the first one if |
| 248 | // it happens. |
| 249 | $cause = reset( $error['root_cause'] ); |
| 250 | |
| 251 | if ( $cause['type'] === 'query_shard_exception' ) { |
| 252 | // The important part of the parse error message is embedded a few levels down |
| 253 | // and comes before the next new line so lets slurp it up and log it rather than |
| 254 | // the huge clump of error. |
| 255 | $shardFailure = reset( $error['failed_shards'] ); |
| 256 | if ( !empty( $shardFailure['reason'] ) ) { |
| 257 | if ( !empty( $shardFailure['reason']['caused_by'] ) ) { |
| 258 | $message = $shardFailure['reason']['caused_by']['reason']; |
| 259 | } else { |
| 260 | $message = $shardFailure['reason']['reason']; |
| 261 | } |
| 262 | } else { |
| 263 | $message = "???"; |
| 264 | } |
| 265 | $end = strpos( $message, "\n", 0 ); |
| 266 | if ( $end === false ) { |
| 267 | $end = strlen( $message ); |
| 268 | } |
| 269 | $parseError = substr( $message, 0, $end ); |
| 270 | |
| 271 | return [ |
| 272 | Status::newFatal( 'cirrussearch-parse-error' ), |
| 273 | 'Parse error on ' . $parseError |
| 274 | ]; |
| 275 | } |
| 276 | |
| 277 | if ( $cause['type'] === 'too_complex_to_determinize_exception' ) { |
| 278 | return [ Status::newFatal( |
| 279 | 'cirrussearch-regex-too-complex-error' ), |
| 280 | $cause['reason'] |
| 281 | ]; |
| 282 | } |
| 283 | |
| 284 | if ( $cause['type'] === 'script_exception' ) { |
| 285 | // do not use $cause which won't contain the caused_by chain |
| 286 | $formattedMessage = self::formatMessage( $error['caused_by'] ); |
| 287 | $formattedMessage .= "\n\t" . implode( "\n\t", $cause['script_stack'] ) . "\n"; |
| 288 | return [ |
| 289 | Status::newFatal( 'cirrussearch-backend-error' ), |
| 290 | $formattedMessage |
| 291 | ]; |
| 292 | } |
| 293 | |
| 294 | if ( preg_match( '/(^|_)regex_/', $cause['type'] ) ) { |
| 295 | $syntaxError = $cause['reason']; |
| 296 | $errorMessage = 'unknown'; |
| 297 | $position = 'unknown'; |
| 298 | // Note: we support only error coming from the extra plugin |
| 299 | // In the case Cirrus is installed without the plugin and |
| 300 | // is using the Groovy script to do regex then a generic backend error |
| 301 | // will be displayed. |
| 302 | |
| 303 | $matches = []; |
| 304 | // In some cases elastic will serialize the exception by adding |
| 305 | // an extra message prefix with the exception type. |
| 306 | // If the exception is serialized through Transport: |
| 307 | // invalid_regex_exception: expected ']' at position 2 |
| 308 | // Or if the exception is thrown locally by the node receiving the query: |
| 309 | // expected ']' at position 2 |
| 310 | if ( preg_match( '/(?:[a-z_]+: )?(.+) at position (\d+)/', $syntaxError, $matches ) ) { |
| 311 | [ , $errorMessage, $position ] = $matches; |
| 312 | } elseif ( $syntaxError === 'unexpected end-of-string' ) { |
| 313 | $errorMessage = 'regex too short to be correct'; |
| 314 | } |
| 315 | $status = Status::newFatal( 'cirrussearch-regex-syntax-error', $errorMessage, $position ); |
| 316 | |
| 317 | return [ $status, 'Regex syntax error: ' . $syntaxError ]; |
| 318 | } |
| 319 | |
| 320 | return [ |
| 321 | Status::newFatal( 'cirrussearch-backend-error' ), |
| 322 | self::formatMessage( $cause ) |
| 323 | ]; |
| 324 | } |
| 325 | |
| 326 | /** |
| 327 | * Takes an error and converts it into a useful message. Mostly this is to deal with |
| 328 | * errors where the useful part is hidden inside a caused_by chain. |
| 329 | * WARNING: In some circumstances, like bulk update failures, this could be multiple |
| 330 | * megabytes. |
| 331 | * |
| 332 | * @param array $error An error array, such as the one returned by extractFullError(). |
| 333 | * @return string |
| 334 | */ |
| 335 | protected static function formatMessage( array $error ) { |
| 336 | if ( isset( $error['actionReasons'] ) ) { |
| 337 | $message = $error['type'] . ': ' . $error['reason']; |
| 338 | foreach ( $error['actionReasons'] as $actionReason ) { |
| 339 | $message .= " - $actionReason\n"; |
| 340 | } |
| 341 | return $message; |
| 342 | } |
| 343 | |
| 344 | $causeChain = []; |
| 345 | $errorCursor = $error; |
| 346 | while ( isset( $errorCursor['caused_by'] ) ) { |
| 347 | $errorCursor = $errorCursor['caused_by']; |
| 348 | if ( $errorCursor['reason'] ) { |
| 349 | $causeChain[] = $errorCursor['reason']; |
| 350 | } |
| 351 | } |
| 352 | $message = $error['type'] . ': ' . $error['reason']; |
| 353 | if ( $causeChain ) { |
| 354 | $message .= ' (' . implode( ' -> ', array_reverse( $causeChain ) ) . ')'; |
| 355 | } |
| 356 | return $message; |
| 357 | } |
| 358 | |
| 359 | } |