Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
69.66% |
202 / 290 |
|
55.56% |
5 / 9 |
CRAP | |
0.00% |
0 / 1 |
ApiWikispeechListen | |
69.66% |
202 / 290 |
|
55.56% |
5 / 9 |
87.97 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
25.00% |
15 / 60 |
|
0.00% |
0 / 1 |
43.17 | |||
getUtteranceForRevisionAndSegment | |
47.73% |
21 / 44 |
|
0.00% |
0 / 1 |
8.57 | |||
validateText | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
getUtterance | |
85.07% |
57 / 67 |
|
0.00% |
0 / 1 |
10.33 | |||
validateParameters | |
100.00% |
43 / 43 |
|
100.00% |
1 / 1 |
10 | |||
makeValuesString | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getAllowedParams | |
100.00% |
31 / 31 |
|
100.00% |
1 / 1 |
1 | |||
getExamplesMessages | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Wikispeech\Api; |
4 | |
5 | /** |
6 | * @file |
7 | * @ingroup API |
8 | * @ingroup Extensions |
9 | * @license GPL-2.0-or-later |
10 | */ |
11 | |
12 | use ApiBase; |
13 | use ApiMain; |
14 | use ApiUsageException; |
15 | use Config; |
16 | use ConfigException; |
17 | use ExternalStoreException; |
18 | use FormatJson; |
19 | use InvalidArgumentException; |
20 | use MediaWiki\Http\HttpRequestFactory; |
21 | use MediaWiki\Logger\LoggerFactory; |
22 | use MediaWiki\MediaWikiServices; |
23 | use MediaWiki\Revision\RevisionStore; |
24 | use MediaWiki\Wikispeech\Segment\DeletedRevisionException; |
25 | use MediaWiki\Wikispeech\Segment\RemoteWikiPageProviderException; |
26 | use MediaWiki\Wikispeech\Segment\Segment; |
27 | use MediaWiki\Wikispeech\Segment\SegmentPageFactory; |
28 | use MediaWiki\Wikispeech\Segment\TextFilter\Sv\SwedishFilter; |
29 | use MediaWiki\Wikispeech\SpeechoidConnector; |
30 | use MediaWiki\Wikispeech\SpeechoidConnectorException; |
31 | use MediaWiki\Wikispeech\Utterance\UtteranceStore; |
32 | use MediaWiki\Wikispeech\VoiceHandler; |
33 | use MWException; |
34 | use MWTimestamp; |
35 | use Psr\Log\LoggerInterface; |
36 | use Throwable; |
37 | use WANObjectCache; |
38 | use Wikimedia\ParamValidator\ParamValidator; |
39 | |
40 | /** |
41 | * API module to synthezise text as sounds. |
42 | * |
43 | * Segments referenced by client are expected to have been created using |
44 | * the default configuration settings for segmentBreakingTags and removeTags. |
45 | * If not, segments might be incompatible, causing this API to not find |
46 | * the requested corresponding utterances. |
47 | * |
48 | * @since 0.1.3 |
49 | */ |
50 | class ApiWikispeechListen extends ApiBase { |
51 | |
52 | /** @var Config */ |
53 | private $config; |
54 | |
55 | /** @var WANObjectCache */ |
56 | private $cache; |
57 | |
58 | /** @var RevisionStore */ |
59 | private $revisionStore; |
60 | |
61 | /** @var HttpRequestFactory */ |
62 | private $requestFactory; |
63 | |
64 | /** @var LoggerInterface */ |
65 | private $logger; |
66 | |
67 | /** @var SpeechoidConnector */ |
68 | private $speechoidConnector; |
69 | |
70 | /** @var UtteranceStore */ |
71 | private $utteranceStore; |
72 | |
73 | /** @var VoiceHandler */ |
74 | private $voiceHandler; |
75 | |
76 | /** @var ListenMetricsEntry */ |
77 | private $listenMetricEntry; |
78 | |
79 | /** |
80 | * @since 0.1.5 |
81 | * @param ApiMain $mainModule |
82 | * @param string $moduleName |
83 | * @param WANObjectCache $cache |
84 | * @param RevisionStore $revisionStore |
85 | * @param HttpRequestFactory $requestFactory |
86 | * @param string $modulePrefix |
87 | */ |
88 | public function __construct( |
89 | ApiMain $mainModule, |
90 | string $moduleName, |
91 | WANObjectCache $cache, |
92 | RevisionStore $revisionStore, |
93 | HttpRequestFactory $requestFactory, |
94 | string $modulePrefix = '' |
95 | ) { |
96 | $this->config = $this->getConfig(); |
97 | $this->cache = $cache; |
98 | $this->revisionStore = $revisionStore; |
99 | $this->requestFactory = $requestFactory; |
100 | $this->logger = LoggerFactory::getInstance( 'Wikispeech' ); |
101 | $this->config = MediaWikiServices::getInstance() |
102 | ->getConfigFactory() |
103 | ->makeConfig( 'wikispeech' ); |
104 | $this->speechoidConnector = new SpeechoidConnector( |
105 | $this->config, |
106 | $requestFactory |
107 | ); |
108 | $this->utteranceStore = new UtteranceStore(); |
109 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
110 | $this->voiceHandler = new VoiceHandler( |
111 | $this->logger, |
112 | $this->config, |
113 | $this->speechoidConnector, |
114 | $cache |
115 | ); |
116 | $this->listenMetricEntry = new ListenMetricsEntry(); |
117 | parent::__construct( $mainModule, $moduleName, $modulePrefix ); |
118 | } |
119 | |
120 | /** |
121 | * Execute an API request. |
122 | * |
123 | * @since 0.1.3 |
124 | */ |
125 | public function execute() { |
126 | $started = microtime( true ); |
127 | $this->listenMetricEntry->setTimestamp( MWTimestamp::getInstance() ); |
128 | |
129 | $inputParameters = $this->extractRequestParams(); |
130 | $this->validateParameters( $inputParameters ); |
131 | |
132 | $language = $inputParameters['lang']; |
133 | $voice = $inputParameters['voice']; |
134 | if ( !$voice ) { |
135 | $voice = $this->voiceHandler->getDefaultVoice( $language ); |
136 | if ( !$voice ) { |
137 | throw new ConfigException( 'Invalid default voice configuration.' ); |
138 | } |
139 | } |
140 | if ( isset( $inputParameters['revision'] ) ) { |
141 | $response = $this->getUtteranceForRevisionAndSegment( |
142 | $voice, |
143 | $language, |
144 | $inputParameters['revision'], |
145 | $inputParameters['segment'], |
146 | $inputParameters['consumer-url'] |
147 | ); |
148 | } else { |
149 | try { |
150 | $speechoidResponse = $this->speechoidConnector->synthesize( |
151 | $language, |
152 | $voice, |
153 | $inputParameters |
154 | ); |
155 | } catch ( Throwable $exception ) { |
156 | $this->dieWithException( $exception ); |
157 | } |
158 | $response = [ |
159 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable Phan doesn't understand dieWithException() |
160 | 'audio' => $speechoidResponse['audio_data'], |
161 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable Phan doesn't understand dieWithException() |
162 | 'tokens' => $speechoidResponse['tokens'] |
163 | ]; |
164 | } |
165 | $this->getResult()->addValue( |
166 | null, |
167 | $this->getModuleName(), |
168 | $response |
169 | ); |
170 | |
171 | $charactersInSegment = 0; |
172 | foreach ( $response['tokens'] as $token ) { |
173 | $charactersInSegment += mb_strlen( $token['orth'] ); |
174 | // whitespace and sentence ends counts too |
175 | $charactersInSegment += 1; |
176 | } |
177 | $this->listenMetricEntry->setCharactersInSegment( $charactersInSegment ); |
178 | $this->listenMetricEntry->setLanguage( $inputParameters['lang'] ); |
179 | $this->listenMetricEntry->setVoice( $voice ); |
180 | $this->listenMetricEntry->setPageRevisionId( $inputParameters['revision'] ); |
181 | $this->listenMetricEntry->setSegmentHash( $inputParameters['segment'] ); |
182 | $this->listenMetricEntry->setConsumerUrl( $inputParameters['consumer-url'] ); |
183 | $this->listenMetricEntry->setRemoteWikiHash( |
184 | UtteranceStore::evaluateRemoteWikiHash( $inputParameters['consumer-url'] ) |
185 | ); |
186 | $this->listenMetricEntry->setMillisecondsSpeechInUtterance( |
187 | $response['tokens'][count( $response['tokens'] ) - 1]['endtime'] |
188 | ); |
189 | $this->listenMetricEntry->setMicrosecondsSpent( intval( 1000000 * ( microtime( true ) - $started ) ) ); |
190 | |
191 | // All other metrics fields has been set in other functions of this class. |
192 | |
193 | if ( !$inputParameters['skip-journal-metrics'] |
194 | && $this->config->get( 'WikispeechListenDoJournalMetrics' ) ) { |
195 | $metricsJournal = new ListenMetricsEntryFileJournal( $this->config ); |
196 | try { |
197 | $metricsJournal->appendEntry( $this->listenMetricEntry ); |
198 | } catch ( Throwable $exception ) { |
199 | // Catch everything. This should not bother the user! |
200 | $this->logger->warning( |
201 | 'Exception caught while appending to metrics journal {exception}', |
202 | [ 'exception' => $exception ] |
203 | ); |
204 | } |
205 | } |
206 | } |
207 | |
208 | /** |
209 | * Retrieves the matching utterance for a given revision id and segment hash . |
210 | * |
211 | * @since 0.1.5 |
212 | * @param string $voice |
213 | * @param string $language |
214 | * @param int $revisionId |
215 | * @param string $segmentHash |
216 | * @param string|null $consumerUrl URL to the script path on the consumer, if used as a producer. |
217 | * @return array An utterance |
218 | */ |
219 | private function getUtteranceForRevisionAndSegment( |
220 | string $voice, |
221 | string $language, |
222 | int $revisionId, |
223 | string $segmentHash, |
224 | ?string $consumerUrl = null |
225 | ): array { |
226 | $segmentPageFactory = new SegmentPageFactory( |
227 | $this->cache, |
228 | // todo inject config factory |
229 | MediaWikiServices::getInstance()->getConfigFactory() |
230 | ); |
231 | try { |
232 | $segmentPageResponse = $segmentPageFactory |
233 | ->setSegmentBreakingTags( null ) |
234 | ->setRemoveTags( null ) |
235 | ->setUseSegmentsCache( true ) |
236 | ->setUseRevisionPropertiesCache( true ) |
237 | ->setContextSource( $this->getContext() ) |
238 | ->setRevisionStore( $this->revisionStore ) |
239 | ->setHttpRequestFactory( $this->requestFactory ) |
240 | ->setConsumerUrl( $consumerUrl ) |
241 | ->setRequirePageRevisionProperties( true ) |
242 | ->segmentPage( |
243 | null, |
244 | $revisionId |
245 | ); |
246 | } catch ( RemoteWikiPageProviderException $remoteWikiPageProviderException ) { |
247 | $this->dieWithError( [ |
248 | 'apierror-wikispeech-listen-failed-getting-page-from-consumer', |
249 | $revisionId, |
250 | $consumerUrl |
251 | ] ); |
252 | } catch ( DeletedRevisionException $deletedRevisionException ) { |
253 | $this->dieWithError( 'apierror-wikispeech-listen-deleted-revision' ); |
254 | } |
255 | $segment = $segmentPageResponse->getSegments()->findFirstItemByHash( $segmentHash ); |
256 | if ( $segment === null ) { |
257 | throw new MWException( 'No such segment. ' . |
258 | 'Did you perhaps reference a segment that was created using incompatible settings ' . |
259 | 'for segmentBreakingTags and/or removeTags?' ); |
260 | } |
261 | $pageId = $segmentPageResponse->getPageId(); |
262 | if ( $pageId === null ) { |
263 | throw new MWException( 'Did not retrieve page id for the given revision id.' ); |
264 | } |
265 | |
266 | $this->listenMetricEntry->setSegmentIndex( $segmentPageResponse->getSegments()->indexOf( $segment ) ); |
267 | $this->listenMetricEntry->setPageId( $pageId ); |
268 | $this->listenMetricEntry->setPageTitle( $segmentPageResponse->getTitle()->getText() ); |
269 | |
270 | return $this->getUtterance( |
271 | $consumerUrl, |
272 | $voice, |
273 | $language, |
274 | $pageId, |
275 | $segment |
276 | ); |
277 | } |
278 | |
279 | /** |
280 | * Validate input text. |
281 | * |
282 | * @since 0.1.5 |
283 | * @param string $text |
284 | * @throws ApiUsageException |
285 | */ |
286 | private function validateText( $text ) { |
287 | $numberOfCharactersInInput = mb_strlen( $text ); |
288 | $maximumNumberOfCharacterInInput = |
289 | $this->config->get( 'WikispeechListenMaximumInputCharacters' ); |
290 | if ( $numberOfCharactersInInput > $maximumNumberOfCharacterInInput ) { |
291 | $this->dieWithError( [ |
292 | 'apierror-wikispeech-listen-invalid-input-too-long', |
293 | $maximumNumberOfCharacterInInput, |
294 | $numberOfCharactersInInput |
295 | ] ); |
296 | } |
297 | } |
298 | |
299 | /** |
300 | * Return the utterance corresponding to the request. |
301 | * |
302 | * These are either retrieved from storage or synthesize (and then stored). |
303 | * |
304 | * @since 0.1.5 |
305 | * @param string|null $consumerUrl |
306 | * @param string $voice |
307 | * @param string $language |
308 | * @param int $pageId |
309 | * @param Segment $segment |
310 | * @return array Containing base64 'audio' and synthesisMetadata 'tokens'. |
311 | * @throws ExternalStoreException |
312 | * @throws ConfigException |
313 | * @throws InvalidArgumentException |
314 | * @throws SpeechoidConnectorException |
315 | */ |
316 | private function getUtterance( |
317 | ?string $consumerUrl, |
318 | string $voice, |
319 | string $language, |
320 | int $pageId, |
321 | Segment $segment |
322 | ) { |
323 | if ( $pageId !== 0 && !$pageId ) { |
324 | throw new InvalidArgumentException( 'Page ID must be set.' ); |
325 | } |
326 | $segmentHash = $segment->getHash(); |
327 | if ( $segmentHash === null ) { |
328 | throw new InvalidArgumentException( 'Segment hash must be set.' ); |
329 | } |
330 | if ( !$voice ) { |
331 | $voice = $this->voiceHandler->getDefaultVoice( $language ); |
332 | if ( !$voice ) { |
333 | throw new ConfigException( "Invalid default voice configuration." ); |
334 | } |
335 | } |
336 | $utterance = $this->utteranceStore->findUtterance( |
337 | $consumerUrl, |
338 | $pageId, |
339 | $language, |
340 | $voice, |
341 | $segmentHash |
342 | ); |
343 | |
344 | $this->listenMetricEntry->setUtteranceSynthesized( $utterance === null ); |
345 | |
346 | if ( !$utterance ) { |
347 | $this->logger->debug( __METHOD__ . ': Creating new utterance for {pageId} {segmentHash}', [ |
348 | 'pageId' => $pageId, |
349 | 'segmentHash' => $segment->getHash() |
350 | ] ); |
351 | |
352 | // Make a string of all the segment contents. |
353 | $segmentText = ''; |
354 | foreach ( $segment->getContent() as $content ) { |
355 | $segmentText .= $content->getString(); |
356 | } |
357 | $this->validateText( $segmentText ); |
358 | |
359 | /** @var string $ssml text/xml Speech Synthesis Markup Language */ |
360 | $ssml = null; |
361 | if ( $language === 'sv' ) { |
362 | // @todo implement a per language selecting content text filter facade |
363 | $textFilter = new SwedishFilter( $segmentText ); |
364 | $ssml = $textFilter->process(); |
365 | } |
366 | if ( $ssml !== null ) { |
367 | $speechoidResponse = $this->speechoidConnector->synthesize( |
368 | $language, |
369 | $voice, |
370 | [ 'ssml' => $ssml ] |
371 | ); |
372 | } else { |
373 | $speechoidResponse = $this->speechoidConnector->synthesizeText( |
374 | $language, |
375 | $voice, |
376 | $segmentText |
377 | ); |
378 | } |
379 | $this->utteranceStore->createUtterance( |
380 | $consumerUrl, |
381 | $pageId, |
382 | $language, |
383 | $voice, |
384 | $segmentHash, |
385 | $speechoidResponse['audio_data'], |
386 | FormatJson::encode( |
387 | $speechoidResponse['tokens'] |
388 | ) |
389 | ); |
390 | return [ |
391 | 'audio' => $speechoidResponse['audio_data'], |
392 | 'tokens' => $speechoidResponse['tokens'] |
393 | ]; |
394 | } |
395 | $this->logger->debug( __METHOD__ . ': Using cached utterance for {pageId} {segmentHash}', [ |
396 | 'pageId' => $pageId, |
397 | 'segmentHash' => $segmentHash |
398 | ] ); |
399 | return [ |
400 | 'audio' => $utterance->getAudio(), |
401 | 'tokens' => FormatJson::parse( |
402 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable synthesis metadata is set |
403 | $utterance->getSynthesisMetadata(), |
404 | FormatJson::FORCE_ASSOC |
405 | )->getValue() |
406 | ]; |
407 | } |
408 | |
409 | /** |
410 | * Validate the parameters for language and voice. |
411 | * |
412 | * The parameter values are checked against the extension |
413 | * configuration. These may differ from what is actually running |
414 | * on the Speechoid service. |
415 | * |
416 | * @since 0.1.3 |
417 | * @param array $parameters Request parameters. |
418 | * @throws ApiUsageException |
419 | */ |
420 | private function validateParameters( $parameters ) { |
421 | if ( |
422 | isset( $parameters['consumer-url'] ) && |
423 | !$this->config->get( 'WikispeechProducerMode' ) ) { |
424 | $this->dieWithError( 'apierror-wikispeech-consumer-not-allowed' ); |
425 | } |
426 | if ( |
427 | isset( $parameters['revision'] ) && |
428 | !isset( $parameters['segment'] ) |
429 | ) { |
430 | $this->dieWithError( [ |
431 | 'apierror-invalidparammix-mustusewith', |
432 | 'revision', |
433 | 'segment' |
434 | ] ); |
435 | } |
436 | if ( |
437 | isset( $parameters['segment'] ) && |
438 | !isset( $parameters['revision'] ) |
439 | ) { |
440 | $this->dieWithError( [ |
441 | 'apierror-invalidparammix-mustusewith', |
442 | 'segment', |
443 | 'revision' |
444 | ] ); |
445 | } |
446 | $this->requireOnlyOneParameter( |
447 | $parameters, |
448 | 'revision', |
449 | 'text', |
450 | 'ipa' |
451 | ); |
452 | $voices = $this->config->get( 'WikispeechVoices' ); |
453 | $language = $parameters['lang']; |
454 | |
455 | // Validate language. |
456 | $validLanguages = array_keys( $voices ); |
457 | if ( !in_array( $language, $validLanguages ) ) { |
458 | $this->dieWithError( [ |
459 | 'apierror-wikispeech-listen-invalid-language', |
460 | $language, |
461 | self::makeValuesString( $validLanguages ) |
462 | ] ); |
463 | } |
464 | |
465 | // Validate voice. |
466 | $voice = $parameters['voice']; |
467 | if ( $voice ) { |
468 | $validVoices = $voices[$language]; |
469 | if ( !in_array( $voice, $validVoices ) ) { |
470 | $this->dieWithError( [ |
471 | 'apierror-wikispeech-listen-invalid-voice', |
472 | $voice, |
473 | self::makeValuesString( $validVoices ) |
474 | ] ); |
475 | } |
476 | } |
477 | |
478 | // Validate input text. |
479 | $input = $parameters['text'] ?? ''; |
480 | $this->validateText( $input ); |
481 | } |
482 | |
483 | /** |
484 | * Make a formatted string of values to be used in messages. |
485 | * |
486 | * @since 0.1.3 |
487 | * @param array $values Values as strings. |
488 | * @return string The input strings wrapped in <kbd> tags and |
489 | * joined by commas. |
490 | */ |
491 | private static function makeValuesString( $values ) { |
492 | $valueStrings = []; |
493 | foreach ( $values as $value ) { |
494 | $valueStrings[] = "<kbd>$value</kbd>"; |
495 | } |
496 | return implode( ', ', $valueStrings ); |
497 | } |
498 | |
499 | /** |
500 | * Specify what parameters the API accepts. |
501 | * |
502 | * @since 0.1.3 |
503 | * @return array |
504 | */ |
505 | public function getAllowedParams() { |
506 | return array_merge( |
507 | parent::getAllowedParams(), |
508 | [ |
509 | 'lang' => [ |
510 | ParamValidator::PARAM_TYPE => 'string', |
511 | ParamValidator::PARAM_REQUIRED => true |
512 | ], |
513 | 'text' => [ |
514 | ParamValidator::PARAM_TYPE => 'string' |
515 | ], |
516 | 'ipa' => [ |
517 | ParamValidator::PARAM_TYPE => 'string' |
518 | ], |
519 | 'revision' => [ |
520 | ParamValidator::PARAM_TYPE => 'integer' |
521 | ], |
522 | 'segment' => [ |
523 | ParamValidator::PARAM_TYPE => 'string' |
524 | ], |
525 | 'voice' => [ |
526 | ParamValidator::PARAM_TYPE => 'string' |
527 | ], |
528 | 'consumer-url' => [ |
529 | ParamValidator::PARAM_TYPE => 'string' |
530 | ], |
531 | 'skip-journal-metrics' => [ |
532 | ParamValidator::PARAM_TYPE => 'boolean', |
533 | ParamValidator::PARAM_DEFAULT => false |
534 | ] |
535 | ] |
536 | ); |
537 | } |
538 | |
539 | /** |
540 | * Give examples of usage. |
541 | * |
542 | * @since 0.1.3 |
543 | * @return array |
544 | */ |
545 | public function getExamplesMessages() { |
546 | return [ |
547 | 'action=wikispeech-listen&format=json&lang=en&text=Read this' |
548 | => 'apihelp-wikispeech-listen-example-1', |
549 | 'action=wikispeech-listen&format=json&lang=en&text=Read this&voice=cmu-slt-hsmm' |
550 | => 'apihelp-wikispeech-listen-example-2', |
551 | 'action=wikispeech-listen&format=json&lang=en&revision=1&segment=hash1234' |
552 | => 'apihelp-wikispeech-listen-example-3', |
553 | // phpcs:ignore Generic.Files.LineLength |
554 | 'action=wikispeech-listen&format=json&lang=en&revision=1&segment=hash1234&consumer-url=https://consumer.url/w' |
555 | => 'apihelp-wikispeech-listen-example-4', |
556 | ]; |
557 | } |
558 | } |