Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
53.06% |
156 / 294 |
|
31.58% |
6 / 19 |
CRAP | |
0.00% |
0 / 1 |
| SpeechoidConnector | |
53.06% |
156 / 294 |
|
31.58% |
6 / 19 |
656.73 | |
0.00% |
0 / 1 |
| __construct | |
86.36% |
19 / 22 |
|
0.00% |
0 / 1 |
5.06 | |||
| synthesize | |
72.97% |
27 / 37 |
|
0.00% |
0 / 1 |
10.60 | |||
| synthesizeText | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| listDefaultVoicePerLanguage | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 | |||
| requestDefaultVoices | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| requestLexicons | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
20 | |||
| findLexiconByLocale | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
30 | |||
| requestTextProcessors | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
20 | |||
| findLexiconByLanguage | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
9.00 | |||
| lookupLexiconEntries | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
3.03 | |||
| updateLexiconEntry | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
| deleteLexiconEntry | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
| addLexiconEntry | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
56 | |||
| toIpa | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| map | |
60.00% |
24 / 40 |
|
0.00% |
0 / 1 |
12.10 | |||
| fromIpa | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| isQueueOverloaded | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
| getAvailableNonQueuedConnectionSlots | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
| unparseUrl | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
8 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Wikispeech; |
| 4 | |
| 5 | /** |
| 6 | * @file |
| 7 | * @ingroup Extensions |
| 8 | * @license GPL-2.0-or-later |
| 9 | */ |
| 10 | |
| 11 | use Config; |
| 12 | use FormatJson; |
| 13 | use InvalidArgumentException; |
| 14 | use MediaWiki\Http\HttpRequestFactory; |
| 15 | use MediaWiki\Status\Status; |
| 16 | |
| 17 | /** |
| 18 | * Provide Speechoid access. |
| 19 | * |
| 20 | * @since 0.1.5 |
| 21 | */ |
| 22 | class SpeechoidConnector { |
| 23 | |
| 24 | /** @var Config */ |
| 25 | private $config; |
| 26 | |
| 27 | /** @var string Speechoid URL, without trailing slash. For non queued (non-TTS) operations. */ |
| 28 | private $url; |
| 29 | |
| 30 | /** @var string Speechoid queue URL, without trailing slash. For queued (TTS) operations. */ |
| 31 | private $haproxyQueueUrl; |
| 32 | |
| 33 | /** @var string Speechoid queue status URL, without trailing slash. */ |
| 34 | private $haproxyStatsUrl; |
| 35 | |
| 36 | /** @var string Speechoid symbol set URL, without trailing slash. */ |
| 37 | private $symbolSetUrl; |
| 38 | |
| 39 | /** @var int Default timeout awaiting HTTP response in seconds. */ |
| 40 | private $defaultHttpResponseTimeoutSeconds; |
| 41 | |
| 42 | /** @var HttpRequestFactory */ |
| 43 | private $requestFactory; |
| 44 | |
| 45 | /** |
| 46 | * @since 0.1.5 |
| 47 | * @param Config $config |
| 48 | * @param HttpRequestFactory $requestFactory |
| 49 | */ |
| 50 | public function __construct( $config, $requestFactory ) { |
| 51 | $this->config = $config; |
| 52 | $this->url = rtrim( $config->get( 'WikispeechSpeechoidUrl' ), '/' ); |
| 53 | $this->symbolSetUrl = rtrim( $config->get( 'WikispeechSymbolSetUrl' ), '/' ); |
| 54 | if ( !$this->symbolSetUrl ) { |
| 55 | $parsedUrl = parse_url( $this->url ); |
| 56 | $parsedUrl['port'] = 8771; |
| 57 | $this->symbolSetUrl = $this->unparseUrl( $parsedUrl ); |
| 58 | } |
| 59 | $this->haproxyQueueUrl = rtrim( $config->get( 'WikispeechSpeechoidHaproxyQueueUrl' ), '/' ); |
| 60 | if ( !$this->haproxyQueueUrl ) { |
| 61 | $parsedUrl = parse_url( $this->url ); |
| 62 | $parsedUrl['port'] = 10001; |
| 63 | $this->haproxyQueueUrl = $this->unparseUrl( $parsedUrl ); |
| 64 | } |
| 65 | $this->haproxyStatsUrl = rtrim( $config->get( 'WikispeechSpeechoidHaproxyStatsUrl' ), '/' ); |
| 66 | if ( !$this->haproxyStatsUrl ) { |
| 67 | $parsedUrl = parse_url( $this->url ); |
| 68 | $parsedUrl['port'] = 10002; |
| 69 | $this->haproxyStatsUrl = $this->unparseUrl( $parsedUrl ); |
| 70 | } |
| 71 | if ( $config->get( 'WikispeechSpeechoidResponseTimeoutSeconds' ) ) { |
| 72 | $this->defaultHttpResponseTimeoutSeconds = intval( |
| 73 | $config->get( 'WikispeechSpeechoidResponseTimeoutSeconds' ) |
| 74 | ); |
| 75 | } |
| 76 | $this->requestFactory = $requestFactory; |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Make a request to Speechoid to synthesize the provided text or ipa string. |
| 81 | * |
| 82 | * @since 0.1.5 |
| 83 | * @param string $language |
| 84 | * @param string $voice |
| 85 | * @param array $parameters Should contain either 'text', 'ipa' or 'ssml'. |
| 86 | * Determines input string and type. |
| 87 | * @param int|null $responseTimeoutSeconds Seconds before timing out awaiting response. |
| 88 | * Falsy value defaults to config value WikispeechSpeechoidResponseTimeoutSeconds, |
| 89 | * which if falsy (e.g. 0) defaults to MediaWiki default. |
| 90 | * @return array Response from Speechoid, parsed as associative array. |
| 91 | * @throws SpeechoidConnectorException On Speechoid I/O errors. |
| 92 | */ |
| 93 | public function synthesize( |
| 94 | $language, |
| 95 | $voice, |
| 96 | $parameters, |
| 97 | $responseTimeoutSeconds = null |
| 98 | ): array { |
| 99 | $postData = [ |
| 100 | 'lang' => $language, |
| 101 | 'voice' => $voice |
| 102 | ]; |
| 103 | $options = []; |
| 104 | if ( $responseTimeoutSeconds ) { |
| 105 | $options['timeout'] = $responseTimeoutSeconds; |
| 106 | } elseif ( $this->defaultHttpResponseTimeoutSeconds ) { |
| 107 | $options['timeout'] = $this->defaultHttpResponseTimeoutSeconds; |
| 108 | } |
| 109 | if ( isset( $parameters['ipa'] ) ) { |
| 110 | $postData['input'] = $parameters['ipa']; |
| 111 | $postData['input_type'] = 'ipa'; |
| 112 | } elseif ( isset( $parameters['text'] ) ) { |
| 113 | $postData['input'] = $parameters['text']; |
| 114 | } elseif ( isset( $parameters['ssml'] ) ) { |
| 115 | $postData['input'] = $parameters['ssml']; |
| 116 | $postData['input_type'] = 'ssml'; |
| 117 | } else { |
| 118 | throw new InvalidArgumentException( |
| 119 | '$parameters must contain one of "text", "ipa" or "ssml".' |
| 120 | ); |
| 121 | } |
| 122 | $options = [ 'postData' => $postData ]; |
| 123 | $responseString = $this->requestFactory->post( $this->haproxyQueueUrl, $options, __METHOD__ ); |
| 124 | if ( !$responseString ) { |
| 125 | throw new SpeechoidConnectorException( |
| 126 | 'Unable to communicate with Speechoid. ' . |
| 127 | $this->haproxyQueueUrl . var_export( $options, true ) |
| 128 | ); |
| 129 | } |
| 130 | $status = FormatJson::parse( |
| 131 | $responseString, |
| 132 | FormatJson::FORCE_ASSOC |
| 133 | ); |
| 134 | if ( !$status->isOK() ) { |
| 135 | throw new SpeechoidConnectorException( $responseString ); |
| 136 | } |
| 137 | $value = $status->getValue(); |
| 138 | if ( !is_array( $value ) ) { |
| 139 | throw new SpeechoidConnectorException( 'Unexpected non-array response' ); |
| 140 | } |
| 141 | return $value; |
| 142 | } |
| 143 | |
| 144 | /** |
| 145 | * Make a request to Speechoid to synthesize the provided text. |
| 146 | * |
| 147 | * @since 0.1.8 |
| 148 | * @param string $language |
| 149 | * @param string $voice |
| 150 | * @param string $text |
| 151 | * @param int|null $responseTimeoutSeconds |
| 152 | * @return array |
| 153 | */ |
| 154 | public function synthesizeText( |
| 155 | $language, |
| 156 | $voice, |
| 157 | $text, |
| 158 | $responseTimeoutSeconds = null |
| 159 | ): array { |
| 160 | return $this->synthesize( |
| 161 | $language, |
| 162 | $voice, |
| 163 | [ 'text' => $text ], |
| 164 | $responseTimeoutSeconds |
| 165 | ); |
| 166 | } |
| 167 | |
| 168 | /** |
| 169 | * Retrieve and parse default voices per language from Speechoid. |
| 170 | * |
| 171 | * @since 0.1.5 |
| 172 | * @return array Map language => voice |
| 173 | * @throws SpeechoidConnectorException On Speechoid I/O- or JSON parse errors. |
| 174 | */ |
| 175 | public function listDefaultVoicePerLanguage(): array { |
| 176 | $defaultVoicesJson = $this->requestDefaultVoices(); |
| 177 | $status = FormatJson::parse( |
| 178 | $defaultVoicesJson, |
| 179 | FormatJson::FORCE_ASSOC |
| 180 | ); |
| 181 | if ( !$status->isOK() ) { |
| 182 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
| 183 | } |
| 184 | $defaultVoices = $status->getValue(); |
| 185 | $defaultVoicePerLanguage = []; |
| 186 | foreach ( $defaultVoices as $voice ) { |
| 187 | $defaultVoicePerLanguage[ $voice['lang'] ] = $voice['default_voice']; |
| 188 | } |
| 189 | return $defaultVoicePerLanguage; |
| 190 | } |
| 191 | |
| 192 | /** |
| 193 | * Retrieve default voices par language from Speechoid |
| 194 | * |
| 195 | * @since 0.1.6 |
| 196 | * @return string JSON response |
| 197 | * @throws SpeechoidConnectorException On Speechoid I/O error or |
| 198 | * if URL is invalid. |
| 199 | */ |
| 200 | public function requestDefaultVoices(): string { |
| 201 | if ( !filter_var( $this->url, FILTER_VALIDATE_URL ) ) { |
| 202 | throw new SpeechoidConnectorException( 'No Speechoid URL provided.' ); |
| 203 | } |
| 204 | $responseString = $this->requestFactory->get( $this->url . '/default_voices', [], __METHOD__ ); |
| 205 | if ( !$responseString ) { |
| 206 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
| 207 | } |
| 208 | return $responseString; |
| 209 | } |
| 210 | |
| 211 | /** |
| 212 | * An array of items such as: |
| 213 | * { |
| 214 | * "name": "sv_se_nst_lex:sv-se.nst", |
| 215 | * "symbolSetName": "sv-se_ws-sampa", |
| 216 | * "locale": "sv_SE", |
| 217 | * "entryCount": 919476 |
| 218 | * } |
| 219 | * |
| 220 | * This list includes all registered lexicons, |
| 221 | * including those that are not in use by any voice. |
| 222 | * |
| 223 | * @since 0.1.8 |
| 224 | * @return array Parsed JSON response as an associative array |
| 225 | * @throws SpeechoidConnectorException |
| 226 | */ |
| 227 | public function requestLexicons(): array { |
| 228 | $json = $this->requestFactory->get( |
| 229 | $this->url . '/lexserver/lexicon/list', |
| 230 | [], |
| 231 | __METHOD__ |
| 232 | ); |
| 233 | if ( !$json ) { |
| 234 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
| 235 | } |
| 236 | $status = FormatJson::parse( |
| 237 | $json, |
| 238 | FormatJson::FORCE_ASSOC |
| 239 | ); |
| 240 | if ( !$status->isOK() ) { |
| 241 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
| 242 | } |
| 243 | $value = $status->getValue(); |
| 244 | if ( !is_array( $value ) ) { |
| 245 | throw new SpeechoidConnectorException( 'Unexpected non-array response' ); |
| 246 | } |
| 247 | return $value; |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * This includes all registered lexicons, |
| 252 | * including those that are not in use by any voice. |
| 253 | * |
| 254 | * Case insensitive prefix matching query. |
| 255 | * I.e. $locale 'en' will match both 'en_US' and 'en_NZ'. |
| 256 | * |
| 257 | * @see requestLexicons |
| 258 | * @since 0.1.8 |
| 259 | * @param string $locale |
| 260 | * @return string|null Name of lexicon, or null if not found. |
| 261 | * @throws SpeechoidConnectorException |
| 262 | */ |
| 263 | public function findLexiconByLocale( |
| 264 | string $locale |
| 265 | ): ?string { |
| 266 | $locale = strtolower( $locale ); |
| 267 | $lexicons = $this->requestLexicons(); |
| 268 | $matches = []; |
| 269 | foreach ( $lexicons as $lexicon ) { |
| 270 | $lexiconLocale = $lexicon['locale']; |
| 271 | $lexiconLocale = strtolower( $lexiconLocale ); |
| 272 | $isMatching = str_starts_with( $lexiconLocale, $locale ); |
| 273 | if ( $isMatching ) { |
| 274 | $matches[] = $lexicon; |
| 275 | } |
| 276 | } |
| 277 | $numberOfMatches = count( $matches ); |
| 278 | if ( $numberOfMatches === 0 ) { |
| 279 | return null; |
| 280 | } elseif ( $numberOfMatches > 1 ) { |
| 281 | throw new SpeechoidConnectorException( |
| 282 | 'Multiple lexicons matches locale:' . |
| 283 | FormatJson::encode( $matches, true ) |
| 284 | ); |
| 285 | } |
| 286 | return $matches[0]['name']; |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * An array of items such as: |
| 291 | * { |
| 292 | * "components": [ |
| 293 | * { |
| 294 | * "call": "marytts_preproc", |
| 295 | * "mapper": { |
| 296 | * "from": "sv-se_ws-sampa", |
| 297 | * "to": "sv-se_sampa_mary" |
| 298 | * }, |
| 299 | * "module": "adapters.marytts_adapter" |
| 300 | * }, |
| 301 | * { |
| 302 | * "call": "lexLookup", |
| 303 | * "lexicon": "sv_se_nst_lex:sv-se.nst", |
| 304 | * "module": "adapters.lexicon_client" |
| 305 | * } |
| 306 | * ], |
| 307 | * "config_file": "wikispeech_server/conf/voice_config_marytts.json", |
| 308 | * "default": true, |
| 309 | * "lang": "sv", |
| 310 | * "name": "marytts_textproc_sv" |
| 311 | * } |
| 312 | * |
| 313 | * This list includes the lexicons for all registered voices, |
| 314 | * even if the voice is currently unavailable. |
| 315 | * |
| 316 | * @since 0.1.8 |
| 317 | * @return array Parsed JSON response as associative array |
| 318 | * @throws SpeechoidConnectorException |
| 319 | */ |
| 320 | public function requestTextProcessors(): array { |
| 321 | $json = $this->requestFactory->get( |
| 322 | $this->url . '/textprocessing/textprocessors', |
| 323 | [], |
| 324 | __METHOD__ |
| 325 | ); |
| 326 | if ( !$json ) { |
| 327 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
| 328 | } |
| 329 | $status = FormatJson::parse( |
| 330 | $json, |
| 331 | FormatJson::FORCE_ASSOC |
| 332 | ); |
| 333 | if ( !$status->isOK() ) { |
| 334 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
| 335 | } |
| 336 | $value = $status->getValue(); |
| 337 | if ( !is_array( $value ) ) { |
| 338 | throw new SpeechoidConnectorException( 'Unexpected non-array response' ); |
| 339 | } |
| 340 | return $value; |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * This includes the lexicons for all registered voices, |
| 345 | * even if the voice is currently unavailable. |
| 346 | * Response is in form such as 'sv_se_nst_lex:sv-se.nst', |
| 347 | * where prefix and suffix split by : is used differently throughout Speechoid |
| 348 | * e.g combined, prefix only or suffix only, for identifying items. |
| 349 | * |
| 350 | * @see requestTextProcessors |
| 351 | * @since 0.1.8 |
| 352 | * @param string $language Case insensitive language code, e.g. 'en'. |
| 353 | * @return string|null Name of lexicon, or null if not found. |
| 354 | * @throws SpeechoidConnectorException |
| 355 | */ |
| 356 | public function findLexiconByLanguage( |
| 357 | string $language |
| 358 | ): ?string { |
| 359 | $language = strtolower( $language ); |
| 360 | $lexicons = $this->requestTextProcessors(); |
| 361 | $matches = []; |
| 362 | foreach ( $lexicons as $lexicon ) { |
| 363 | $lexiconLang = strtolower( $lexicon['lang'] ); |
| 364 | if ( $lexiconLang == $language ) { |
| 365 | $matches[] = $lexicon; |
| 366 | } |
| 367 | } |
| 368 | $numberOfMatches = count( $matches ); |
| 369 | if ( $numberOfMatches === 0 ) { |
| 370 | return null; |
| 371 | } elseif ( $numberOfMatches > 1 ) { |
| 372 | throw new SpeechoidConnectorException( |
| 373 | 'Multiple lexicon matches language' . |
| 374 | FormatJson::encode( $matches, true ) |
| 375 | ); |
| 376 | } |
| 377 | foreach ( $matches[0]['components'] as $component ) { |
| 378 | if ( |
| 379 | array_key_exists( 'call', $component ) && |
| 380 | $component['call'] === 'lexLookup' |
| 381 | ) { |
| 382 | return $component['lexicon']; |
| 383 | } |
| 384 | } |
| 385 | return null; |
| 386 | } |
| 387 | |
| 388 | /** |
| 389 | * An array of items such as: |
| 390 | * { |
| 391 | * "id": 808498, |
| 392 | * "lexRef": { |
| 393 | * "dbRef": "sv_se_nst_lex", |
| 394 | * "lexName": "sv-se.nst" |
| 395 | * }, |
| 396 | * "strn": "tomten", |
| 397 | * "language": "sv-se", |
| 398 | * "partOfSpeech": "NN", |
| 399 | * "morphology": "SIN|DEF|NOM|UTR", |
| 400 | * "wordParts": "tomten", |
| 401 | * "lemma": { |
| 402 | * "id": 92909, |
| 403 | * "strn": "tomte", |
| 404 | * "paradigm": "s2b-båge" |
| 405 | * }, |
| 406 | * "transcriptions": [ |
| 407 | * { |
| 408 | * "id": 814660, |
| 409 | * "entryId": 808498, |
| 410 | * "strn": "\"\" t O m . t e n", |
| 411 | * "language": "sv-se", |
| 412 | * "sources": [ |
| 413 | * "nst" |
| 414 | * ] |
| 415 | * } |
| 416 | * ], |
| 417 | * "status": { |
| 418 | * "id": 808498, |
| 419 | * "name": "imported", |
| 420 | * "source": "nst", |
| 421 | * "timestamp": "2018-06-18T08:51:25Z", |
| 422 | * "current": true |
| 423 | * } |
| 424 | * } |
| 425 | * |
| 426 | * @since 0.1.8 |
| 427 | * @param string $lexicon |
| 428 | * @param string[] $words |
| 429 | * @return Status If successful, value contains deserialized json response. |
| 430 | * @throws SpeechoidConnectorException |
| 431 | * @throws InvalidArgumentException If words array is empty. |
| 432 | */ |
| 433 | public function lookupLexiconEntries( |
| 434 | string $lexicon, |
| 435 | array $words |
| 436 | ): Status { |
| 437 | if ( $words === [] ) { |
| 438 | throw new InvalidArgumentException( 'Must contain at least one word' ); |
| 439 | } |
| 440 | $url = wfAppendQuery( |
| 441 | $this->url . '/lexserver/lexicon/lookup', |
| 442 | [ |
| 443 | 'lexicons' => $lexicon, |
| 444 | 'words' => implode( ',', $words ) |
| 445 | ] |
| 446 | ); |
| 447 | $responseString = $this->requestFactory->get( $url, [], __METHOD__ ); |
| 448 | if ( !$responseString ) { |
| 449 | throw new SpeechoidConnectorException( "Unable to communicate with Speechoid. '$url'" ); |
| 450 | } |
| 451 | return FormatJson::parse( $responseString ); |
| 452 | } |
| 453 | |
| 454 | /** |
| 455 | * @since 0.1.8 |
| 456 | * @param string $json A single entry object item. |
| 457 | * I.e. not an array as returned by {@link lookupLexiconEntries}. |
| 458 | * @return Status If successful, value contains deserialized json response (updated entry item) |
| 459 | */ |
| 460 | public function updateLexiconEntry( |
| 461 | string $json |
| 462 | ): Status { |
| 463 | $responseString = $this->requestFactory->get( |
| 464 | wfAppendQuery( |
| 465 | $this->url . '/lexserver/lexicon/updateentry', |
| 466 | [ 'entry' => $json ] |
| 467 | ), |
| 468 | [], |
| 469 | __METHOD__ |
| 470 | ); |
| 471 | return FormatJson::parse( $responseString, FormatJson::FORCE_ASSOC ); |
| 472 | } |
| 473 | |
| 474 | /** |
| 475 | * Deletes a lexicon entry item |
| 476 | * |
| 477 | * @since 0.1.8 |
| 478 | * @param string $lexiconName |
| 479 | * @param int $identity |
| 480 | * @return Status |
| 481 | */ |
| 482 | public function deleteLexiconEntry( |
| 483 | string $lexiconName, |
| 484 | int $identity |
| 485 | ): Status { |
| 486 | $responseString = $this->requestFactory->get( |
| 487 | $this->url . '/lexserver/lexicon/delete_entry/' . |
| 488 | urlencode( $lexiconName ) . '/' . $identity, |
| 489 | [], |
| 490 | __METHOD__ |
| 491 | ); |
| 492 | // If successful, returns something like: |
| 493 | // deleted entry id '11' from lexicon 'sv' |
| 494 | // where the lexicon is the second part of the lexicon name:lang. |
| 495 | if ( mb_ereg_match( |
| 496 | "deleted entry id '(.+)' from lexicon '(.+)'", |
| 497 | $responseString |
| 498 | ) ) { |
| 499 | return Status::newGood( $responseString ); |
| 500 | } |
| 501 | return Status::newFatal( $responseString ); |
| 502 | } |
| 503 | |
| 504 | /** |
| 505 | * { |
| 506 | * "strn": "flesk", |
| 507 | * "language": "sv-se", |
| 508 | * "partOfSpeech": "NN", |
| 509 | * "morphology": "SIN-PLU|IND|NOM|NEU", |
| 510 | * "wordParts": "flesk", |
| 511 | * "lemma": { |
| 512 | * "strn": "flesk", |
| 513 | * "reading": "", |
| 514 | * "paradigm": "s7n-övriga ex träd" |
| 515 | * }, |
| 516 | * "transcriptions": [ |
| 517 | * { |
| 518 | * "strn": "\" f l E s k", |
| 519 | * "language": "sv-se" |
| 520 | * } |
| 521 | * ] |
| 522 | * } |
| 523 | * |
| 524 | * @since 0.1.8 |
| 525 | * @param string $lexiconName E.g. 'wikispeech_lexserver_testdb:sv' |
| 526 | * @param string $json A single entry object item. |
| 527 | * I.e. not an array as returned by {@link lookupLexiconEntries}. |
| 528 | * @return Status value set to int identity of newly created entry. |
| 529 | * @throws SpeechoidConnectorException |
| 530 | */ |
| 531 | public function addLexiconEntry( |
| 532 | string $lexiconName, |
| 533 | string $json |
| 534 | ): Status { |
| 535 | $responseString = $this->requestFactory->get( |
| 536 | wfAppendQuery( |
| 537 | $this->url . '/lexserver/lexicon/addentry', |
| 538 | [ |
| 539 | 'lexicon_name' => $lexiconName, |
| 540 | 'entry' => $json |
| 541 | ] |
| 542 | ), |
| 543 | [], |
| 544 | __METHOD__ |
| 545 | ); |
| 546 | // @todo how do we know if this was successful? Always return 200 |
| 547 | |
| 548 | $deserializedStatus = FormatJson::parse( $responseString, FormatJson::FORCE_ASSOC ); |
| 549 | if ( !$deserializedStatus->isOK() ) { |
| 550 | throw new SpeechoidConnectorException( "Failed to parse response as JSON: $responseString" ); |
| 551 | } |
| 552 | /** @var array $deserializedResponse */ |
| 553 | $deserializedResponse = $deserializedStatus->getValue(); |
| 554 | if ( !is_array( $deserializedResponse ) ) { |
| 555 | return Status::newFatal( 'Unexpected response: Not an array.' ); |
| 556 | } |
| 557 | if ( !array_key_exists( 'ids', $deserializedResponse ) ) { |
| 558 | return Status::newFatal( 'Unexpected Speechoid response. No `ids` field.' ); |
| 559 | } |
| 560 | /** @var array $ids */ |
| 561 | $ids = $deserializedResponse['ids']; |
| 562 | $numberOfIdentities = count( $ids ); |
| 563 | if ( $numberOfIdentities === 0 ) { |
| 564 | return Status::newFatal( 'Unexpected Speechoid response. No `ids` values.' ); |
| 565 | } elseif ( $numberOfIdentities > 1 ) { |
| 566 | return Status::newFatal( 'Unexpected Speechoid response. Multiple `ids` values.' ); |
| 567 | } |
| 568 | if ( !is_int( $ids[0] ) ) { |
| 569 | return Status::newFatal( 'Unexpected Speechoid response. Ids[0] is a non integer value.' ); |
| 570 | } |
| 571 | return Status::newGood( $ids[0] ); |
| 572 | } |
| 573 | |
| 574 | /** |
| 575 | * Convert a string to IPA from the symbolset used for the given language |
| 576 | * |
| 577 | * @since 0.1.10 |
| 578 | * @param string $string |
| 579 | * @param string $language Tell Speechoid to use the symbol set |
| 580 | * for this language. |
| 581 | * @return Status |
| 582 | */ |
| 583 | public function toIpa( string $string, string $language ): Status { |
| 584 | return $this->map( $string, $language, true ); |
| 585 | } |
| 586 | |
| 587 | /** |
| 588 | * Convert a string to or from IPA |
| 589 | * |
| 590 | * @since 0.1.8 |
| 591 | * @param string $string |
| 592 | * @param string $language Tell Speechoid to use the symbol set |
| 593 | * for this language. |
| 594 | * @param bool $toIpa Converts to IPA if true, otherwise from IPA |
| 595 | * @return Status |
| 596 | */ |
| 597 | private function map( string $string, string $language, bool $toIpa ): Status { |
| 598 | // Get the symbol set to convert to |
| 599 | $lexicon = $this->findLexiconByLanguage( $language ); |
| 600 | $symbolsetRequestUrl = "$this->url/lexserver/lexicon/info/$lexicon"; |
| 601 | $symbolSetResponse = $this->requestFactory->get( $symbolsetRequestUrl, [], __METHOD__ ); |
| 602 | $symbolSetStatus = FormatJson::parse( |
| 603 | $symbolSetResponse, |
| 604 | FormatJson::FORCE_ASSOC |
| 605 | ); |
| 606 | if ( !$symbolSetStatus->isOK() ) { |
| 607 | return Status::newFatal( |
| 608 | "Failed to parse response from $symbolsetRequestUrl as JSON: " . |
| 609 | $symbolSetResponse |
| 610 | ); |
| 611 | } |
| 612 | $symbolSetValue = $symbolSetStatus->getValue(); |
| 613 | if ( !is_array( $symbolSetValue ) || !isset( $symbolSetValue['symbolSetName'] ) ) { |
| 614 | return Status::newFatal( |
| 615 | "Missing 'symbolSetName' in response from $symbolsetRequestUrl: " . |
| 616 | FormatJson::encode( $symbolSetValue, true ) |
| 617 | ); |
| 618 | } |
| 619 | $symbolSet = $symbolSetValue['symbolSetName']; |
| 620 | |
| 621 | if ( $toIpa ) { |
| 622 | $from = $symbolSet; |
| 623 | $to = 'ipa'; |
| 624 | } else { |
| 625 | $from = 'ipa'; |
| 626 | $to = $symbolSet; |
| 627 | } |
| 628 | $mapRequestUrl = "$this->symbolSetUrl/mapper/map/$from/$to/" . |
| 629 | rawurlencode( $string ); |
| 630 | $mapResponse = $this->requestFactory->get( $mapRequestUrl, [], __METHOD__ ); |
| 631 | $mapStatus = FormatJson::parse( $mapResponse, FormatJson::FORCE_ASSOC ); |
| 632 | if ( !$mapStatus->isOK() ) { |
| 633 | return Status::newFatal( |
| 634 | "Failed to parse response from $mapRequestUrl as JSON: " . |
| 635 | $mapResponse |
| 636 | ); |
| 637 | } |
| 638 | $mapValue = $mapStatus->getValue(); |
| 639 | if ( !is_array( $mapValue ) || !isset( $mapValue['Result'] ) ) { |
| 640 | return Status::newFatal( |
| 641 | "Missing 'Result' in response from $mapRequestUrl: " . |
| 642 | FormatJson::encode( $mapValue, true ) |
| 643 | ); |
| 644 | } |
| 645 | return Status::newGood( $mapValue['Result'] ); |
| 646 | } |
| 647 | |
| 648 | /** |
| 649 | * Convert a string from IPA to the symbolset used for the given language |
| 650 | * |
| 651 | * @since 0.1.10 |
| 652 | * @param string $string |
| 653 | * @param string $language Tell Speechoid to use the symbol set |
| 654 | * for this language. |
| 655 | * @return Status |
| 656 | */ |
| 657 | public function fromIpa( string $string, string $language ): Status { |
| 658 | return $this->map( $string, $language, false ); |
| 659 | } |
| 660 | |
| 661 | /** |
| 662 | * Queue is overloaded if there are already the maximum number of current |
| 663 | * connections processed by the backend at the same time as the queue |
| 664 | * contains more than X connections waiting for their turn, |
| 665 | * where X = |
| 666 | * WikispeechSpeechoidHaproxyOverloadFactor multiplied with |
| 667 | * the maximum number of current connections to the backend. |
| 668 | * |
| 669 | * @see HaproxyStatusParser::isQueueOverloaded() |
| 670 | * @since 0.1.10 |
| 671 | * @return bool Whether or not connection queue is overloaded |
| 672 | */ |
| 673 | public function isQueueOverloaded(): bool { |
| 674 | $statsResponse = $this->requestFactory->get( |
| 675 | $this->haproxyStatsUrl . '/stats;csv;norefresh', |
| 676 | [], |
| 677 | __METHOD__ |
| 678 | ); |
| 679 | $parser = new HaproxyStatusParser( $statsResponse ); |
| 680 | return $parser->isQueueOverloaded( |
| 681 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendPxName' ), |
| 682 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendSvName' ), |
| 683 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendPxName' ), |
| 684 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendSvName' ), |
| 685 | floatval( $this->config->get( 'WikispeechSpeechoidHaproxyOverloadFactor' ) ) |
| 686 | ); |
| 687 | } |
| 688 | |
| 689 | /** |
| 690 | * Counts number of requests that currently could be sent to the queue |
| 691 | * and immediately would be passed down to backend. |
| 692 | * |
| 693 | * If this value is greater than 0, then the next request sent via the queue |
| 694 | * will be immediately processed by the backend. |
| 695 | * |
| 696 | * If this value is less than 1, then the next connection will be queued, |
| 697 | * given that the currently processing requests will not have had time to finish by then. |
| 698 | * |
| 699 | * If this value is less than 1, then the value is the inverse size of the known queue. |
| 700 | * Note that the OS on the HAProxy server might be buffering connections in the TCP-stack |
| 701 | * and that HAProxy will not be aware of such connections. A negative number might therefor |
| 702 | * not represent a perfect count of current connection lined up in the queue. |
| 703 | * |
| 704 | * The idea with this function is to see if there are available resources that could |
| 705 | * be used for pre-synthesis of utterances during otherwise idle time. |
| 706 | * |
| 707 | * @see HaproxyStatusParser::getAvailableNonQueuedConnectionSlots() |
| 708 | * @since 0.1.10 |
| 709 | * @return int Positive number if available slots, else inverted size of queue. |
| 710 | */ |
| 711 | public function getAvailableNonQueuedConnectionSlots(): int { |
| 712 | $statsResponse = $this->requestFactory->get( |
| 713 | $this->haproxyStatsUrl . '/stats;csv;norefresh', |
| 714 | [], |
| 715 | __METHOD__ |
| 716 | ); |
| 717 | $parser = new HaproxyStatusParser( $statsResponse ); |
| 718 | return $parser->getAvailableNonQueuedConnectionSlots( |
| 719 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendPxName' ), |
| 720 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendSvName' ), |
| 721 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendPxName' ), |
| 722 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendSvName' ) |
| 723 | ); |
| 724 | } |
| 725 | |
| 726 | /** |
| 727 | * Converts the output from {@link parse_url} to an URL. |
| 728 | * |
| 729 | * @since 0.1.10 |
| 730 | * @param array $parsedUrl |
| 731 | * @return string |
| 732 | */ |
| 733 | private function unparseUrl( array $parsedUrl ): string { |
| 734 | $scheme = isset( $parsedUrl['scheme'] ) ? $parsedUrl['scheme'] . '://' : ''; |
| 735 | $host = $parsedUrl['host'] ?? ''; |
| 736 | $port = isset( $parsedUrl['port'] ) ? ':' . $parsedUrl['port'] : ''; |
| 737 | $user = $parsedUrl['user'] ?? ''; |
| 738 | $pass = isset( $parsedUrl['pass'] ) ? ':' . $parsedUrl['pass'] : ''; |
| 739 | $pass = ( $user || $pass ) ? "$pass@" : ''; |
| 740 | $path = $parsedUrl['path'] ?? ''; |
| 741 | $query = isset( $parsedUrl['query'] ) ? '?' . $parsedUrl['query'] : ''; |
| 742 | $fragment = isset( $parsedUrl['fragment'] ) ? '#' . $parsedUrl['fragment'] : ''; |
| 743 | return "$scheme$user$pass$host$port$path$query$fragment"; |
| 744 | } |
| 745 | |
| 746 | } |