Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | n/a |
0 / 0 |
n/a |
0 / 0 |
CRAP | n/a |
0 / 0 |
|||
| OrchestratorRequest | n/a |
0 / 0 |
n/a |
0 / 0 |
16 | n/a |
0 / 0 |
|||
| __construct | n/a |
0 / 0 |
n/a |
0 / 0 |
2 | |||||
| orchestrate | n/a |
0 / 0 |
n/a |
0 / 0 |
7 | |||||
| handleGuzzleRequestForEvaluate | n/a |
0 / 0 |
n/a |
0 / 0 |
5 | |||||
| getSupportedProgrammingLanguages | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| persistToCache | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| 1 | <?php |
| 2 | /** |
| 3 | * WikiLambda Orchestrator Interface base class |
| 4 | * |
| 5 | * @file |
| 6 | * @ingroup Extensions |
| 7 | * @copyright 2020– Abstract Wikipedia team; see AUTHORS.txt |
| 8 | * @license MIT |
| 9 | */ |
| 10 | |
| 11 | namespace MediaWiki\Extension\WikiLambda; |
| 12 | |
| 13 | use GuzzleHttp\Client; |
| 14 | use GuzzleHttp\Exception\ConnectException; |
| 15 | use GuzzleHttp\Exception\TooManyRedirectsException; |
| 16 | use JsonException; |
| 17 | use MediaWiki\Extension\WikiLambda\Cache\MemcachedWrapper; |
| 18 | use MediaWiki\Extension\WikiLambda\Registry\ZErrorTypeRegistry; |
| 19 | use MediaWiki\Extension\WikiLambda\ZObjects\ZResponseEnvelope; |
| 20 | use MediaWiki\Json\FormatJson; |
| 21 | use MediaWiki\Logger\LoggerFactory; |
| 22 | use MediaWiki\MediaWikiServices; |
| 23 | use MediaWiki\Utils\GitInfo; |
| 24 | use Psr\Http\Message\ResponseInterface; |
| 25 | use Psr\Log\LoggerInterface; |
| 26 | use stdClass; |
| 27 | use Wikimedia\Telemetry\TracerInterface; |
| 28 | |
| 29 | /** |
| 30 | * @codeCoverageIgnore |
| 31 | */ |
| 32 | class OrchestratorRequest { |
| 33 | |
| 34 | protected string $userAgentString; |
| 35 | protected MemcachedWrapper $objectCache; |
| 36 | protected TracerInterface $tracer; |
| 37 | protected LoggerInterface $logger; |
| 38 | |
| 39 | public const FUNCTIONCALL_CACHE_KEY_PREFIX = 'WikiLambdaFunctionCall'; |
| 40 | |
| 41 | /** |
| 42 | * The specialised request interface to control all network access to the function-orchestrator. |
| 43 | * |
| 44 | * @param Client $guzzleClient GuzzleHttp Client used for requests |
| 45 | */ |
| 46 | public function __construct( protected readonly Client $guzzleClient ) { |
| 47 | // We generate a user agent string for better traceability of requests |
| 48 | $this->userAgentString = 'wikifunctions-request/' . MW_VERSION; |
| 49 | $gitInfo = new GitInfo( MW_INSTALL_PATH . '/extensions/WikiLambda' ); |
| 50 | $gitHash = $gitInfo->getHeadSHA1(); |
| 51 | if ( $gitHash !== false ) { |
| 52 | $this->userAgentString .= '-WL' . substr( $gitHash, 0, 8 ); |
| 53 | } |
| 54 | |
| 55 | // Non-injected items |
| 56 | $this->tracer = MediaWikiServices::getInstance()->getTracer(); |
| 57 | $this->objectCache = WikiLambdaServices::getMemcachedWrapper(); |
| 58 | |
| 59 | $this->logger = LoggerFactory::getInstance( 'WikiLambda' ); |
| 60 | } |
| 61 | |
| 62 | /** |
| 63 | * Ask the function-orchestrator to evaluate a function call and saves the |
| 64 | * response in the cache (if not yet cached). |
| 65 | * |
| 66 | * * If bypassCache is true, sends the request directly to the orchestrator and |
| 67 | * does not update the cached value. |
| 68 | * |
| 69 | * @param stdClass|array $query |
| 70 | * @param bool $bypassCache Whether to bypass the MediaWiki-side function call cache; this is |
| 71 | * only to be used for special circumstances, as it's potentially expensive. |
| 72 | * @return array containing Response object (Z22) returned by orchestrator, down-cast to a string |
| 73 | * and the actual http status code from the Orchestrator |
| 74 | * @throws ConnectException If the request fails to connect |
| 75 | * @throws TooManyRedirectsException If the request exceeds the allowed number of redirects |
| 76 | */ |
| 77 | public function orchestrate( $query, $bypassCache = false ): array { |
| 78 | // (T365053) Propagate request tracing headers |
| 79 | $requestHeaders = $this->tracer->getRequestHeaders(); |
| 80 | $requestHeaders['User-Agent'] = $this->userAgentString; |
| 81 | |
| 82 | if ( $bypassCache ) { |
| 83 | return $this->handleGuzzleRequestForEvaluate( $query, $requestHeaders ); |
| 84 | } |
| 85 | |
| 86 | $requestKey = $this->objectCache->makeKey( |
| 87 | self::FUNCTIONCALL_CACHE_KEY_PREFIX, |
| 88 | ZObjectUtils::makeCacheKeyFromZObject( $query ) |
| 89 | ); |
| 90 | |
| 91 | $response = $this->objectCache->get( $requestKey ); |
| 92 | if ( $response !== false ) { |
| 93 | $this->logger->debug( __METHOD__ . ' cache hit for {key}', [ 'key' => $requestKey ] ); |
| 94 | } else { |
| 95 | $this->logger->info( __METHOD__ . ' cache miss for {key}', [ 'key' => $requestKey ] ); |
| 96 | $response = $this->handleGuzzleRequestForEvaluate( $query, $requestHeaders ); |
| 97 | $httpStatus = $response['httpStatusCode'] ?? HttpStatus::INTERNAL_SERVER_ERROR; |
| 98 | |
| 99 | // (T338243) Set TTL conditionally, so that: |
| 100 | // * success (http 200) TTL_MONTH |
| 101 | // * bad request (http 400-422) TTL_WEEK |
| 102 | // * too many requests (http 429) TTL_MINUTE |
| 103 | // * server error (http >= 500) TTL_MINUTE |
| 104 | // So if the request fails due to 400, we can still cache for |
| 105 | // a week, but if it failes due to system outages or timeouts, |
| 106 | // we would benefit from reducing the TTL to something very short. |
| 107 | |
| 108 | // Default: All possible bad request status, set to TTL_WEEK |
| 109 | $exptime = $this->objectCache::TTL_WEEK; |
| 110 | |
| 111 | if ( |
| 112 | ( $httpStatus >= HttpStatus::INTERNAL_SERVER_ERROR ) || |
| 113 | ( $httpStatus === HttpStatus::TOO_MANY_REQUESTS ) |
| 114 | ) { |
| 115 | // Recoverable system errors: set to TTL_MINUTE |
| 116 | $exptime = $this->objectCache::TTL_MINUTE; |
| 117 | $this->logger->warning( |
| 118 | __METHOD__ . ' evaluated response for {key} returned HTTP {status}', |
| 119 | [ 'key' => $requestKey, 'status' => $httpStatus ] |
| 120 | ); |
| 121 | } else { |
| 122 | // Successful value: set to TTL_MONTH |
| 123 | $exptime = $httpStatus === HttpStatus::OK ? $this->objectCache::TTL_MONTH : $exptime; |
| 124 | $this->logger->info( |
| 125 | __METHOD__ . ' evaluated response for {key} returned HTTP {status}', |
| 126 | [ 'key' => $requestKey, 'status' => $httpStatus ] |
| 127 | ); |
| 128 | } |
| 129 | |
| 130 | $this->logger->debug( |
| 131 | __METHOD__ . ' cache store for {key}, TTL {ttl}', [ 'key' => $requestKey, 'ttl' => $exptime ] |
| 132 | ); |
| 133 | $this->objectCache->set( $requestKey, $response, $exptime ); |
| 134 | } |
| 135 | |
| 136 | // (T398410) Check that the response is an array |
| 137 | if ( is_array( $response ) ) { |
| 138 | return $response; |
| 139 | } |
| 140 | |
| 141 | // … if not, delete from cache and return an empty response. |
| 142 | $this->logger->error( |
| 143 | 'Cached orchestrator response was somehow not an array', |
| 144 | [ |
| 145 | 'requestKey' => $requestKey, |
| 146 | // Shortened to avoid over-loading the logging system |
| 147 | 'response' => substr( var_export( $response, true ), 0, 1000 ) |
| 148 | ] |
| 149 | ); |
| 150 | |
| 151 | $this->objectCache->delete( $requestKey ); |
| 152 | return [ 'result' => null, 'httpStatusCode' => 500 ]; |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Helper function to handle client-side HTTP error codes. |
| 157 | * |
| 158 | * Guzzle by default throws an exception on any non-2xx status, in this case returned from the Orchestrator. |
| 159 | * By calling the post method with `http_errors => false`, 400 and 500 errors are returned in the |
| 160 | * response payload instead of being thrown as ClientException or ServerException. |
| 161 | * |
| 162 | * Guzzle throws four types of exception, all extend TransferException (implements GuzzleException): |
| 163 | * * ConnectException: in case of networking error |
| 164 | * * ClientException: for http 400 errors; with `http_errors => false` these are not thrown. |
| 165 | * * ServerException: for http 500 errors; with `http_errors => false` these are not thrown. |
| 166 | * * TooManyRedirectsException: in case of too many redirects followed |
| 167 | * |
| 168 | * See: https://docs.guzzlephp.org/en/stable/quickstart.html#exceptions |
| 169 | * |
| 170 | * @param stdClass|array $query |
| 171 | * @param array $requestHeaders |
| 172 | * @return array containing Response object (Z22) returned by orchestrator, down-cast to a string |
| 173 | * and the actual http status code from the Orchestrator |
| 174 | * @throws ConnectException If the request fails to connect |
| 175 | * @throws TooManyRedirectsException If the request exceeds the allowed number of redirects |
| 176 | */ |
| 177 | private function handleGuzzleRequestForEvaluate( $query, $requestHeaders ): array { |
| 178 | // TODO (T338242): Use postAsync here. |
| 179 | $response = $this->guzzleClient->post( '/1/v2/evaluate/', [ |
| 180 | 'json' => $query, |
| 181 | 'headers' => $requestHeaders, |
| 182 | // http 400/500 errors from Orchestrator will be suppressed so that they will not throw exceptions |
| 183 | 'http_errors' => false |
| 184 | ] ); |
| 185 | $httpStatusCode = $response->getStatusCode(); |
| 186 | $responseBody = $response->getBody()->getContents(); |
| 187 | |
| 188 | try { |
| 189 | // (T414062) Check if the response body is a valid JSON string, and a Z22 as expected. |
| 190 | $responseBodyObject = json_decode( $responseBody, true, 512, JSON_THROW_ON_ERROR ); |
| 191 | if ( |
| 192 | !is_array( $responseBodyObject ) || |
| 193 | !isset( $responseBodyObject['Z1K1'] ) || |
| 194 | $responseBodyObject['Z1K1'] !== 'Z22' |
| 195 | ) { |
| 196 | throw new JsonException( 'Response is not a Z22: ' . var_export( $responseBody, true ) ); |
| 197 | } |
| 198 | } catch ( JsonException $e ) { |
| 199 | $this->logger->warning( |
| 200 | 'Orchestrator response was either not JSON, or somehow not a Z22', |
| 201 | [ |
| 202 | // Shortened to avoid over-loading the logging system |
| 203 | 'responseBody' => substr( $responseBody, 0, 1000 ), |
| 204 | 'httpStatusCode' => $httpStatusCode, |
| 205 | 'exceptionMessage' => $e->getMessage(), |
| 206 | ] |
| 207 | ); |
| 208 | |
| 209 | // Make an actual Z22 response for the user of a Z24 with with a Z577 error inside, quoting the bad response |
| 210 | $responseError = ZErrorFactory::createZErrorInstance( |
| 211 | ZErrorTypeRegistry::Z_ERROR_INVALID_ORCHESTRATOR_RESULT, |
| 212 | [ 'request' => $query, 'response' => $responseBody ] |
| 213 | ); |
| 214 | $badResponse = new ZResponseEnvelope( null, ZResponseEnvelope::wrapErrorInResponseMap( $responseError ) ); |
| 215 | return [ 'result' => FormatJson::encode( $badResponse->getSerialized() ), 'httpStatusCode' => 500 ]; |
| 216 | } |
| 217 | |
| 218 | return [ 'result' => $responseBody, 'httpStatusCode' => $httpStatusCode ]; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * Ask the function-orchestrator for the list of programming languages with evaluators currently configured. |
| 223 | * |
| 224 | * @return ResponseInterface Response interface returned by orchestrator network call. |
| 225 | */ |
| 226 | public function getSupportedProgrammingLanguages(): ResponseInterface { |
| 227 | // TODO (T338242): Use getAsync here. |
| 228 | return $this->guzzleClient->get( '/1/v1/supported-programming-languages/' ); |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Ask the function-orchestrator to store a Persistent ZObject (Z2) in its cache. |
| 233 | * |
| 234 | * @param stdClass $Z2 The ZObject to persist to cache. |
| 235 | * @return ResponseInterface Response interface returned by orchestrator network call. |
| 236 | */ |
| 237 | public function persistToCache( $Z2 ): ResponseInterface { |
| 238 | // (T365053) Propagate request tracing headers |
| 239 | $requestHeaders = $this->tracer->getRequestHeaders(); |
| 240 | $requestHeaders['User-Agent'] = $this->userAgentString; |
| 241 | |
| 242 | $query = [ |
| 243 | 'ZObject' => $Z2, |
| 244 | 'ZID' => $Z2->{ 'Z2K1' }->{ 'Z6K1' } |
| 245 | ]; |
| 246 | |
| 247 | // TODO (T338242): Use postAsync here. |
| 248 | return $this->guzzleClient->post( '/1/v1/persist-to-cache', [ |
| 249 | 'json' => $query, |
| 250 | 'headers' => $requestHeaders, |
| 251 | 'http_errors' => false |
| 252 | ] ); |
| 253 | } |
| 254 | |
| 255 | } |