Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
55.35% |
150 / 271 |
|
31.58% |
6 / 19 |
CRAP | |
0.00% |
0 / 1 |
SpeechoidConnector | |
55.35% |
150 / 271 |
|
31.58% |
6 / 19 |
466.57 | |
0.00% |
0 / 1 |
__construct | |
86.36% |
19 / 22 |
|
0.00% |
0 / 1 |
5.06 | |||
synthesize | |
73.53% |
25 / 34 |
|
0.00% |
0 / 1 |
9.19 | |||
synthesizeText | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
listDefaultVoicePerLanguage | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 | |||
requestDefaultVoices | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
requestLexicons | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
findLexiconByLocale | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
30 | |||
requestTextProcessors | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
findLexiconByLanguage | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
9.00 | |||
lookupLexiconEntries | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
3.03 | |||
updateLexiconEntry | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
deleteLexiconEntry | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
addLexiconEntry | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
42 | |||
toIpa | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
map | |
71.43% |
20 / 28 |
|
0.00% |
0 / 1 |
4.37 | |||
fromIpa | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isQueueOverloaded | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
getAvailableNonQueuedConnectionSlots | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
unparseUrl | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
8 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Wikispeech; |
4 | |
5 | /** |
6 | * @file |
7 | * @ingroup Extensions |
8 | * @license GPL-2.0-or-later |
9 | */ |
10 | |
11 | use Config; |
12 | use FormatJson; |
13 | use InvalidArgumentException; |
14 | use MediaWiki\Http\HttpRequestFactory; |
15 | use MediaWiki\Status\Status; |
16 | |
17 | /** |
18 | * Provide Speechoid access. |
19 | * |
20 | * @since 0.1.5 |
21 | */ |
22 | class SpeechoidConnector { |
23 | |
24 | /** @var Config */ |
25 | private $config; |
26 | |
27 | /** @var string Speechoid URL, without trailing slash. For non queued (non-TTS) operations. */ |
28 | private $url; |
29 | |
30 | /** @var string Speechoid queue URL, without trailing slash. For queued (TTS) operations. */ |
31 | private $haproxyQueueUrl; |
32 | |
33 | /** @var string Speechoid queue status URL, without trailing slash. */ |
34 | private $haproxyStatsUrl; |
35 | |
36 | /** @var string Speechoid symbol set URL, without trailing slash. */ |
37 | private $symbolSetUrl; |
38 | |
39 | /** @var int Default timeout awaiting HTTP response in seconds. */ |
40 | private $defaultHttpResponseTimeoutSeconds; |
41 | |
42 | /** @var HttpRequestFactory */ |
43 | private $requestFactory; |
44 | |
45 | /** |
46 | * @since 0.1.5 |
47 | * @param Config $config |
48 | * @param HttpRequestFactory $requestFactory |
49 | */ |
50 | public function __construct( $config, $requestFactory ) { |
51 | $this->config = $config; |
52 | $this->url = rtrim( $config->get( 'WikispeechSpeechoidUrl' ), '/' ); |
53 | $this->symbolSetUrl = rtrim( $config->get( 'WikispeechSymbolSetUrl' ), '/' ); |
54 | if ( !$this->symbolSetUrl ) { |
55 | $parsedUrl = parse_url( $this->url ); |
56 | $parsedUrl['port'] = 8771; |
57 | $this->symbolSetUrl = $this->unparseUrl( $parsedUrl ); |
58 | } |
59 | $this->haproxyQueueUrl = rtrim( $config->get( 'WikispeechSpeechoidHaproxyQueueUrl' ), '/' ); |
60 | if ( !$this->haproxyQueueUrl ) { |
61 | $parsedUrl = parse_url( $this->url ); |
62 | $parsedUrl['port'] = 10001; |
63 | $this->haproxyQueueUrl = $this->unparseUrl( $parsedUrl ); |
64 | } |
65 | $this->haproxyStatsUrl = rtrim( $config->get( 'WikispeechSpeechoidHaproxyStatsUrl' ), '/' ); |
66 | if ( !$this->haproxyStatsUrl ) { |
67 | $parsedUrl = parse_url( $this->url ); |
68 | $parsedUrl['port'] = 10002; |
69 | $this->haproxyStatsUrl = $this->unparseUrl( $parsedUrl ); |
70 | } |
71 | if ( $config->get( 'WikispeechSpeechoidResponseTimeoutSeconds' ) ) { |
72 | $this->defaultHttpResponseTimeoutSeconds = intval( |
73 | $config->get( 'WikispeechSpeechoidResponseTimeoutSeconds' ) |
74 | ); |
75 | } |
76 | $this->requestFactory = $requestFactory; |
77 | } |
78 | |
79 | /** |
80 | * Make a request to Speechoid to synthesize the provided text or ipa string. |
81 | * |
82 | * @since 0.1.5 |
83 | * @param string $language |
84 | * @param string $voice |
85 | * @param array $parameters Should contain either 'text', 'ipa' or 'ssml'. |
86 | * Determines input string and type. |
87 | * @param int|null $responseTimeoutSeconds Seconds before timing out awaiting response. |
88 | * Falsy value defaults to config value WikispeechSpeechoidResponseTimeoutSeconds, |
89 | * which if falsy (e.g. 0) defaults to MediaWiki default. |
90 | * @return array Response from Speechoid, parsed as associative array. |
91 | * @throws SpeechoidConnectorException On Speechoid I/O errors. |
92 | */ |
93 | public function synthesize( |
94 | $language, |
95 | $voice, |
96 | $parameters, |
97 | $responseTimeoutSeconds = null |
98 | ): array { |
99 | $postData = [ |
100 | 'lang' => $language, |
101 | 'voice' => $voice |
102 | ]; |
103 | $options = []; |
104 | if ( $responseTimeoutSeconds ) { |
105 | $options['timeout'] = $responseTimeoutSeconds; |
106 | } elseif ( $this->defaultHttpResponseTimeoutSeconds ) { |
107 | $options['timeout'] = $this->defaultHttpResponseTimeoutSeconds; |
108 | } |
109 | if ( isset( $parameters['ipa'] ) ) { |
110 | $postData['input'] = $parameters['ipa']; |
111 | $postData['input_type'] = 'ipa'; |
112 | } elseif ( isset( $parameters['text'] ) ) { |
113 | $postData['input'] = $parameters['text']; |
114 | } elseif ( isset( $parameters['ssml'] ) ) { |
115 | $postData['input'] = $parameters['ssml']; |
116 | $postData['input_type'] = 'ssml'; |
117 | } else { |
118 | throw new InvalidArgumentException( |
119 | '$parameters must contain one of "text", "ipa" or "ssml".' |
120 | ); |
121 | } |
122 | $options = [ 'postData' => $postData ]; |
123 | $responseString = $this->requestFactory->post( $this->haproxyQueueUrl, $options, __METHOD__ ); |
124 | if ( !$responseString ) { |
125 | throw new SpeechoidConnectorException( |
126 | 'Unable to communicate with Speechoid. ' . |
127 | $this->haproxyQueueUrl . var_export( $options, true ) |
128 | ); |
129 | } |
130 | $status = FormatJson::parse( |
131 | $responseString, |
132 | FormatJson::FORCE_ASSOC |
133 | ); |
134 | if ( !$status->isOK() ) { |
135 | throw new SpeechoidConnectorException( $responseString ); |
136 | } |
137 | return $status->getValue(); |
138 | } |
139 | |
140 | /** |
141 | * Make a request to Speechoid to synthesize the provided text. |
142 | * |
143 | * @since 0.1.8 |
144 | * @param string $language |
145 | * @param string $voice |
146 | * @param string $text |
147 | * @param int|null $responseTimeoutSeconds |
148 | * @return array |
149 | */ |
150 | public function synthesizeText( |
151 | $language, |
152 | $voice, |
153 | $text, |
154 | $responseTimeoutSeconds = null |
155 | ): array { |
156 | return $this->synthesize( |
157 | $language, |
158 | $voice, |
159 | [ 'text' => $text ], |
160 | $responseTimeoutSeconds |
161 | ); |
162 | } |
163 | |
164 | /** |
165 | * Retrieve and parse default voices per language from Speechoid. |
166 | * |
167 | * @since 0.1.5 |
168 | * @return array Map language => voice |
169 | * @throws SpeechoidConnectorException On Speechoid I/O- or JSON parse errors. |
170 | */ |
171 | public function listDefaultVoicePerLanguage(): array { |
172 | $defaultVoicesJson = $this->requestDefaultVoices(); |
173 | $status = FormatJson::parse( |
174 | $defaultVoicesJson, |
175 | FormatJson::FORCE_ASSOC |
176 | ); |
177 | if ( !$status->isOK() ) { |
178 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
179 | } |
180 | $defaultVoices = $status->getValue(); |
181 | $defaultVoicePerLanguage = []; |
182 | foreach ( $defaultVoices as $voice ) { |
183 | $defaultVoicePerLanguage[ $voice['lang'] ] = $voice['default_voice']; |
184 | } |
185 | return $defaultVoicePerLanguage; |
186 | } |
187 | |
188 | /** |
189 | * Retrieve default voices par language from Speechoid |
190 | * |
191 | * @since 0.1.6 |
192 | * @return string JSON response |
193 | * @throws SpeechoidConnectorException On Speechoid I/O error or |
194 | * if URL is invalid. |
195 | */ |
196 | public function requestDefaultVoices(): string { |
197 | if ( !filter_var( $this->url, FILTER_VALIDATE_URL ) ) { |
198 | throw new SpeechoidConnectorException( 'No Speechoid URL provided.' ); |
199 | } |
200 | $responseString = $this->requestFactory->get( $this->url . '/default_voices', [], __METHOD__ ); |
201 | if ( !$responseString ) { |
202 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
203 | } |
204 | return $responseString; |
205 | } |
206 | |
207 | /** |
208 | * An array of items such as: |
209 | * { |
210 | * "name": "sv_se_nst_lex:sv-se.nst", |
211 | * "symbolSetName": "sv-se_ws-sampa", |
212 | * "locale": "sv_SE", |
213 | * "entryCount": 919476 |
214 | * } |
215 | * |
216 | * This list includes all registered lexicons, |
217 | * including those that are not in use by any voice. |
218 | * |
219 | * @since 0.1.8 |
220 | * @return array Parsed JSON response as an associative array |
221 | * @throws SpeechoidConnectorException |
222 | */ |
223 | public function requestLexicons(): array { |
224 | $json = $this->requestFactory->get( |
225 | $this->url . '/lexserver/lexicon/list', |
226 | [], |
227 | __METHOD__ |
228 | ); |
229 | if ( !$json ) { |
230 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
231 | } |
232 | $status = FormatJson::parse( |
233 | $json, |
234 | FormatJson::FORCE_ASSOC |
235 | ); |
236 | if ( !$status->isOK() ) { |
237 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
238 | } |
239 | return $status->getValue(); |
240 | } |
241 | |
242 | /** |
243 | * This includes all registered lexicons, |
244 | * including those that are not in use by any voice. |
245 | * |
246 | * Case insensitive prefix matching query. |
247 | * I.e. $locale 'en' will match both 'en_US' and 'en_NZ'. |
248 | * |
249 | * @see requestLexicons |
250 | * @since 0.1.8 |
251 | * @param string $locale |
252 | * @return string|null Name of lexicon, or null if not found. |
253 | * @throws SpeechoidConnectorException |
254 | */ |
255 | public function findLexiconByLocale( |
256 | string $locale |
257 | ): ?string { |
258 | $locale = strtolower( $locale ); |
259 | $lexicons = $this->requestLexicons(); |
260 | $matches = []; |
261 | foreach ( $lexicons as $lexicon ) { |
262 | $lexiconLocale = $lexicon['locale']; |
263 | $lexiconLocale = strtolower( $lexiconLocale ); |
264 | $isMatching = str_starts_with( $lexiconLocale, $locale ); |
265 | if ( $isMatching ) { |
266 | $matches[] = $lexicon; |
267 | } |
268 | } |
269 | $numberOfMatches = count( $matches ); |
270 | if ( $numberOfMatches === 0 ) { |
271 | return null; |
272 | } elseif ( $numberOfMatches > 1 ) { |
273 | throw new SpeechoidConnectorException( |
274 | 'Multiple lexicons matches locale:' . |
275 | FormatJson::encode( $matches, true ) |
276 | ); |
277 | } |
278 | return $matches[0]['name']; |
279 | } |
280 | |
281 | /** |
282 | * An array of items such as: |
283 | * { |
284 | * "components": [ |
285 | * { |
286 | * "call": "marytts_preproc", |
287 | * "mapper": { |
288 | * "from": "sv-se_ws-sampa", |
289 | * "to": "sv-se_sampa_mary" |
290 | * }, |
291 | * "module": "adapters.marytts_adapter" |
292 | * }, |
293 | * { |
294 | * "call": "lexLookup", |
295 | * "lexicon": "sv_se_nst_lex:sv-se.nst", |
296 | * "module": "adapters.lexicon_client" |
297 | * } |
298 | * ], |
299 | * "config_file": "wikispeech_server/conf/voice_config_marytts.json", |
300 | * "default": true, |
301 | * "lang": "sv", |
302 | * "name": "marytts_textproc_sv" |
303 | * } |
304 | * |
305 | * This list includes the lexicons for all registered voices, |
306 | * even if the voice is currently unavailable. |
307 | * |
308 | * @since 0.1.8 |
309 | * @return array Parsed JSON response as associative array |
310 | * @throws SpeechoidConnectorException |
311 | */ |
312 | public function requestTextProcessors(): array { |
313 | $json = $this->requestFactory->get( |
314 | $this->url . '/textprocessing/textprocessors', |
315 | [], |
316 | __METHOD__ |
317 | ); |
318 | if ( !$json ) { |
319 | throw new SpeechoidConnectorException( 'Unable to communicate with Speechoid.' ); |
320 | } |
321 | $status = FormatJson::parse( |
322 | $json, |
323 | FormatJson::FORCE_ASSOC |
324 | ); |
325 | if ( !$status->isOK() ) { |
326 | throw new SpeechoidConnectorException( 'Unexpected response from Speechoid.' ); |
327 | } |
328 | return $status->getValue(); |
329 | } |
330 | |
331 | /** |
332 | * This includes the lexicons for all registered voices, |
333 | * even if the voice is currently unavailable. |
334 | * Response is in form such as 'sv_se_nst_lex:sv-se.nst', |
335 | * where prefix and suffix split by : is used differently throughout Speechoid |
336 | * e.g combined, prefix only or suffix only, for identifying items. |
337 | * |
338 | * @see requestTextProcessors |
339 | * @since 0.1.8 |
340 | * @param string $language Case insensitive language code, e.g. 'en'. |
341 | * @return string|null Name of lexicon, or null if not found. |
342 | * @throws SpeechoidConnectorException |
343 | */ |
344 | public function findLexiconByLanguage( |
345 | string $language |
346 | ): ?string { |
347 | $language = strtolower( $language ); |
348 | $lexicons = $this->requestTextProcessors(); |
349 | $matches = []; |
350 | foreach ( $lexicons as $lexicon ) { |
351 | $lexiconLang = strtolower( $lexicon['lang'] ); |
352 | if ( $lexiconLang == $language ) { |
353 | $matches[] = $lexicon; |
354 | } |
355 | } |
356 | $numberOfMatches = count( $matches ); |
357 | if ( $numberOfMatches === 0 ) { |
358 | return null; |
359 | } elseif ( $numberOfMatches > 1 ) { |
360 | throw new SpeechoidConnectorException( |
361 | 'Multiple lexicon matches language' . |
362 | FormatJson::encode( $matches, true ) |
363 | ); |
364 | } |
365 | foreach ( $matches[0]['components'] as $component ) { |
366 | if ( |
367 | array_key_exists( 'call', $component ) && |
368 | $component['call'] === 'lexLookup' |
369 | ) { |
370 | return $component['lexicon']; |
371 | } |
372 | } |
373 | return null; |
374 | } |
375 | |
376 | /** |
377 | * An array of items such as: |
378 | * { |
379 | * "id": 808498, |
380 | * "lexRef": { |
381 | * "dbRef": "sv_se_nst_lex", |
382 | * "lexName": "sv-se.nst" |
383 | * }, |
384 | * "strn": "tomten", |
385 | * "language": "sv-se", |
386 | * "partOfSpeech": "NN", |
387 | * "morphology": "SIN|DEF|NOM|UTR", |
388 | * "wordParts": "tomten", |
389 | * "lemma": { |
390 | * "id": 92909, |
391 | * "strn": "tomte", |
392 | * "paradigm": "s2b-båge" |
393 | * }, |
394 | * "transcriptions": [ |
395 | * { |
396 | * "id": 814660, |
397 | * "entryId": 808498, |
398 | * "strn": "\"\" t O m . t e n", |
399 | * "language": "sv-se", |
400 | * "sources": [ |
401 | * "nst" |
402 | * ] |
403 | * } |
404 | * ], |
405 | * "status": { |
406 | * "id": 808498, |
407 | * "name": "imported", |
408 | * "source": "nst", |
409 | * "timestamp": "2018-06-18T08:51:25Z", |
410 | * "current": true |
411 | * } |
412 | * } |
413 | * |
414 | * @since 0.1.8 |
415 | * @param string $lexicon |
416 | * @param string[] $words |
417 | * @return Status If successful, value contains deserialized json response. |
418 | * @throws SpeechoidConnectorException |
419 | * @throws InvalidArgumentException If words array is empty. |
420 | */ |
421 | public function lookupLexiconEntries( |
422 | string $lexicon, |
423 | array $words |
424 | ): Status { |
425 | if ( $words === [] ) { |
426 | throw new InvalidArgumentException( 'Must contain at least one word' ); |
427 | } |
428 | $url = wfAppendQuery( |
429 | $this->url . '/lexserver/lexicon/lookup', |
430 | [ |
431 | 'lexicons' => $lexicon, |
432 | 'words' => implode( ',', $words ) |
433 | ] |
434 | ); |
435 | $responseString = $this->requestFactory->get( $url, [], __METHOD__ ); |
436 | if ( !$responseString ) { |
437 | throw new SpeechoidConnectorException( "Unable to communicate with Speechoid. '$url'" ); |
438 | } |
439 | return FormatJson::parse( $responseString ); |
440 | } |
441 | |
442 | /** |
443 | * @since 0.1.8 |
444 | * @param string $json A single entry object item. |
445 | * I.e. not an array as returned by {@link lookupLexiconEntries}. |
446 | * @return Status If successful, value contains deserialized json response (updated entry item) |
447 | */ |
448 | public function updateLexiconEntry( |
449 | string $json |
450 | ): Status { |
451 | $responseString = $this->requestFactory->get( |
452 | wfAppendQuery( |
453 | $this->url . '/lexserver/lexicon/updateentry', |
454 | [ 'entry' => $json ] |
455 | ), |
456 | [], |
457 | __METHOD__ |
458 | ); |
459 | return FormatJson::parse( $responseString, FormatJson::FORCE_ASSOC ); |
460 | } |
461 | |
462 | /** |
463 | * Deletes a lexicon entry item |
464 | * |
465 | * @since 0.1.8 |
466 | * @param string $lexiconName |
467 | * @param int $identity |
468 | * @return Status |
469 | */ |
470 | public function deleteLexiconEntry( |
471 | string $lexiconName, |
472 | int $identity |
473 | ): Status { |
474 | $responseString = $this->requestFactory->get( |
475 | $this->url . '/lexserver/lexicon/delete_entry/' . |
476 | urlencode( $lexiconName ) . '/' . $identity, |
477 | [], |
478 | __METHOD__ |
479 | ); |
480 | // If successful, returns something like: |
481 | // deleted entry id '11' from lexicon 'sv' |
482 | // where the lexicon is the second part of the lexicon name:lang. |
483 | if ( mb_ereg_match( |
484 | "deleted entry id '(.+)' from lexicon '(.+)'", |
485 | $responseString |
486 | ) ) { |
487 | return Status::newGood( $responseString ); |
488 | } |
489 | return Status::newFatal( $responseString ); |
490 | } |
491 | |
492 | /** |
493 | * { |
494 | * "strn": "flesk", |
495 | * "language": "sv-se", |
496 | * "partOfSpeech": "NN", |
497 | * "morphology": "SIN-PLU|IND|NOM|NEU", |
498 | * "wordParts": "flesk", |
499 | * "lemma": { |
500 | * "strn": "flesk", |
501 | * "reading": "", |
502 | * "paradigm": "s7n-övriga ex träd" |
503 | * }, |
504 | * "transcriptions": [ |
505 | * { |
506 | * "strn": "\" f l E s k", |
507 | * "language": "sv-se" |
508 | * } |
509 | * ] |
510 | * } |
511 | * |
512 | * @since 0.1.8 |
513 | * @param string $lexiconName E.g. 'wikispeech_lexserver_testdb:sv' |
514 | * @param string $json A single entry object item. |
515 | * I.e. not an array as returned by {@link lookupLexiconEntries}. |
516 | * @return Status value set to int identity of newly created entry. |
517 | * @throws SpeechoidConnectorException |
518 | */ |
519 | public function addLexiconEntry( |
520 | string $lexiconName, |
521 | string $json |
522 | ): Status { |
523 | $responseString = $this->requestFactory->get( |
524 | wfAppendQuery( |
525 | $this->url . '/lexserver/lexicon/addentry', |
526 | [ |
527 | 'lexicon_name' => $lexiconName, |
528 | 'entry' => $json |
529 | ] |
530 | ), |
531 | [], |
532 | __METHOD__ |
533 | ); |
534 | // @todo how do we know if this was successful? Always return 200 |
535 | |
536 | $deserializedStatus = FormatJson::parse( $responseString, FormatJson::FORCE_ASSOC ); |
537 | if ( !$deserializedStatus->isOK() ) { |
538 | throw new SpeechoidConnectorException( "Failed to parse response as JSON: $responseString" ); |
539 | } |
540 | /** @var array $deserializedResponse */ |
541 | $deserializedResponse = $deserializedStatus->getValue(); |
542 | if ( !array_key_exists( 'ids', $deserializedResponse ) ) { |
543 | return Status::newFatal( 'Unexpected Speechoid response. No `ids` field.' ); |
544 | } |
545 | /** @var array $ids */ |
546 | $ids = $deserializedResponse['ids']; |
547 | $numberOfIdentities = count( $ids ); |
548 | if ( $numberOfIdentities === 0 ) { |
549 | return Status::newFatal( 'Unexpected Speechoid response. No `ids` values.' ); |
550 | } elseif ( $numberOfIdentities > 1 ) { |
551 | return Status::newFatal( 'Unexpected Speechoid response. Multiple `ids` values.' ); |
552 | } |
553 | if ( !is_int( $ids[0] ) ) { |
554 | return Status::newFatal( 'Unexpected Speechoid response. Ids[0] is a non integer value.' ); |
555 | } |
556 | return Status::newGood( $ids[0] ); |
557 | } |
558 | |
559 | /** |
560 | * Convert a string to IPA from the symbolset used for the given language |
561 | * |
562 | * @since 0.1.10 |
563 | * @param string $string |
564 | * @param string $language Tell Speechoid to use the symbol set |
565 | * for this language. |
566 | * @return Status |
567 | */ |
568 | public function toIpa( string $string, string $language ): Status { |
569 | return $this->map( $string, $language, true ); |
570 | } |
571 | |
572 | /** |
573 | * Convert a string to or from IPA |
574 | * |
575 | * @since 0.1.8 |
576 | * @param string $string |
577 | * @param string $language Tell Speechoid to use the symbol set |
578 | * for this language. |
579 | * @param bool $toIpa Converts to IPA if true, otherwise from IPA |
580 | * @return Status |
581 | */ |
582 | private function map( string $string, string $language, bool $toIpa ): Status { |
583 | // Get the symbol set to convert to |
584 | $lexicon = $this->findLexiconByLanguage( $language ); |
585 | $symbolsetRequestUrl = "$this->url/lexserver/lexicon/info/$lexicon"; |
586 | $symbolSetResponse = $this->requestFactory->get( $symbolsetRequestUrl, [], __METHOD__ ); |
587 | $symbolSetStatus = FormatJson::parse( |
588 | $symbolSetResponse, |
589 | FormatJson::FORCE_ASSOC |
590 | ); |
591 | if ( !$symbolSetStatus->isOK() ) { |
592 | return Status::newFatal( |
593 | "Failed to parse response from $symbolsetRequestUrl as JSON: " . |
594 | $symbolSetResponse |
595 | ); |
596 | } |
597 | $symbolSet = $symbolSetStatus->getValue()['symbolSetName']; |
598 | |
599 | if ( $toIpa ) { |
600 | $from = $symbolSet; |
601 | $to = 'ipa'; |
602 | } else { |
603 | $from = 'ipa'; |
604 | $to = $symbolSet; |
605 | } |
606 | $mapRequestUrl = "$this->symbolSetUrl/mapper/map/$from/$to/" . |
607 | rawurlencode( $string ); |
608 | $mapResponse = $this->requestFactory->get( $mapRequestUrl, [], __METHOD__ ); |
609 | $mapStatus = FormatJson::parse( $mapResponse, FormatJson::FORCE_ASSOC ); |
610 | if ( !$mapStatus->isOK() ) { |
611 | return Status::newFatal( |
612 | "Failed to parse response from $mapRequestUrl as JSON: " . |
613 | $mapResponse |
614 | ); |
615 | } |
616 | return Status::newGood( $mapStatus->getValue()['Result'] ); |
617 | } |
618 | |
619 | /** |
620 | * Convert a string from IPA to the symbolset used for the given language |
621 | * |
622 | * @since 0.1.10 |
623 | * @param string $string |
624 | * @param string $language Tell Speechoid to use the symbol set |
625 | * for this language. |
626 | * @return Status |
627 | */ |
628 | public function fromIpa( string $string, string $language ): Status { |
629 | return $this->map( $string, $language, false ); |
630 | } |
631 | |
632 | /** |
633 | * Queue is overloaded if there are already the maximum number of current |
634 | * connections processed by the backend at the same time as the queue |
635 | * contains more than X connections waiting for their turn, |
636 | * where X = |
637 | * WikispeechSpeechoidHaproxyOverloadFactor multiplied with |
638 | * the maximum number of current connections to the backend. |
639 | * |
640 | * @see HaproxyStatusParser::isQueueOverloaded() |
641 | * @since 0.1.10 |
642 | * @return bool Whether or not connection queue is overloaded |
643 | */ |
644 | public function isQueueOverloaded(): bool { |
645 | $statsResponse = $this->requestFactory->get( |
646 | $this->haproxyStatsUrl . '/stats;csv;norefresh', |
647 | [], |
648 | __METHOD__ |
649 | ); |
650 | $parser = new HaproxyStatusParser( $statsResponse ); |
651 | return $parser->isQueueOverloaded( |
652 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendPxName' ), |
653 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendSvName' ), |
654 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendPxName' ), |
655 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendSvName' ), |
656 | floatval( $this->config->get( 'WikispeechSpeechoidHaproxyOverloadFactor' ) ) |
657 | ); |
658 | } |
659 | |
660 | /** |
661 | * Counts number of requests that currently could be sent to the queue |
662 | * and immediately would be passed down to backend. |
663 | * |
664 | * If this value is greater than 0, then the next request sent via the queue |
665 | * will be immediately processed by the backend. |
666 | * |
667 | * If this value is less than 1, then the next connection will be queued, |
668 | * given that the currently processing requests will not have had time to finish by then. |
669 | * |
670 | * If this value is less than 1, then the value is the inverse size of the known queue. |
671 | * Note that the OS on the HAProxy server might be buffering connections in the TCP-stack |
672 | * and that HAProxy will not be aware of such connections. A negative number might therefor |
673 | * not represent a perfect count of current connection lined up in the queue. |
674 | * |
675 | * The idea with this function is to see if there are available resources that could |
676 | * be used for pre-synthesis of utterances during otherwise idle time. |
677 | * |
678 | * @see HaproxyStatusParser::getAvailableNonQueuedConnectionSlots() |
679 | * @since 0.1.10 |
680 | * @return int Positive number if available slots, else inverted size of queue. |
681 | */ |
682 | public function getAvailableNonQueuedConnectionSlots(): int { |
683 | $statsResponse = $this->requestFactory->get( |
684 | $this->haproxyStatsUrl . '/stats;csv;norefresh', |
685 | [], |
686 | __METHOD__ |
687 | ); |
688 | $parser = new HaproxyStatusParser( $statsResponse ); |
689 | return $parser->getAvailableNonQueuedConnectionSlots( |
690 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendPxName' ), |
691 | $this->config->get( 'WikispeechSpeechoidHaproxyFrontendSvName' ), |
692 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendPxName' ), |
693 | $this->config->get( 'WikispeechSpeechoidHaproxyBackendSvName' ) |
694 | ); |
695 | } |
696 | |
697 | /** |
698 | * Converts the output from {@link parse_url} to an URL. |
699 | * |
700 | * @since 0.1.10 |
701 | * @param array $parsedUrl |
702 | * @return string |
703 | */ |
704 | private function unparseUrl( array $parsedUrl ): string { |
705 | $scheme = isset( $parsedUrl['scheme'] ) ? $parsedUrl['scheme'] . '://' : ''; |
706 | $host = $parsedUrl['host'] ?? ''; |
707 | $port = isset( $parsedUrl['port'] ) ? ':' . $parsedUrl['port'] : ''; |
708 | $user = $parsedUrl['user'] ?? ''; |
709 | $pass = isset( $parsedUrl['pass'] ) ? ':' . $parsedUrl['pass'] : ''; |
710 | $pass = ( $user || $pass ) ? "$pass@" : ''; |
711 | $path = $parsedUrl['path'] ?? ''; |
712 | $query = isset( $parsedUrl['query'] ) ? '?' . $parsedUrl['query'] : ''; |
713 | $fragment = isset( $parsedUrl['fragment'] ) ? '#' . $parsedUrl['fragment'] : ''; |
714 | return "$scheme$user$pass$host$port$path$query$fragment"; |
715 | } |
716 | |
717 | } |