Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
69.66% covered (warning)
69.66%
202 / 290
55.56% covered (warning)
55.56%
5 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiWikispeechListen
69.66% covered (warning)
69.66%
202 / 290
55.56% covered (warning)
55.56%
5 / 9
87.97
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
1
 execute
25.00% covered (danger)
25.00%
15 / 60
0.00% covered (danger)
0.00%
0 / 1
43.17
 getUtteranceForRevisionAndSegment
47.73% covered (danger)
47.73%
21 / 44
0.00% covered (danger)
0.00%
0 / 1
8.57
 validateText
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 getUtterance
85.07% covered (warning)
85.07%
57 / 67
0.00% covered (danger)
0.00%
0 / 1
10.33
 validateParameters
100.00% covered (success)
100.00%
43 / 43
100.00% covered (success)
100.00%
1 / 1
10
 makeValuesString
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getAllowedParams
100.00% covered (success)
100.00%
31 / 31
100.00% covered (success)
100.00%
1 / 1
1
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace MediaWiki\Wikispeech\Api;
4
5/**
6 * @file
7 * @ingroup API
8 * @ingroup Extensions
9 * @license GPL-2.0-or-later
10 */
11
12use ApiBase;
13use ApiMain;
14use ApiUsageException;
15use Config;
16use ConfigException;
17use ExternalStoreException;
18use FormatJson;
19use InvalidArgumentException;
20use MediaWiki\Http\HttpRequestFactory;
21use MediaWiki\Logger\LoggerFactory;
22use MediaWiki\MediaWikiServices;
23use MediaWiki\Revision\RevisionStore;
24use MediaWiki\Wikispeech\Segment\DeletedRevisionException;
25use MediaWiki\Wikispeech\Segment\RemoteWikiPageProviderException;
26use MediaWiki\Wikispeech\Segment\Segment;
27use MediaWiki\Wikispeech\Segment\SegmentPageFactory;
28use MediaWiki\Wikispeech\Segment\TextFilter\Sv\SwedishFilter;
29use MediaWiki\Wikispeech\SpeechoidConnector;
30use MediaWiki\Wikispeech\SpeechoidConnectorException;
31use MediaWiki\Wikispeech\Utterance\UtteranceStore;
32use MediaWiki\Wikispeech\VoiceHandler;
33use MWException;
34use MWTimestamp;
35use Psr\Log\LoggerInterface;
36use Throwable;
37use WANObjectCache;
38use Wikimedia\ParamValidator\ParamValidator;
39
40/**
41 * API module to synthezise text as sounds.
42 *
43 * Segments referenced by client are expected to have been created using
44 * the default configuration settings for segmentBreakingTags and removeTags.
45 * If not, segments might be incompatible, causing this API to not find
46 * the requested corresponding utterances.
47 *
48 * @since 0.1.3
49 */
50class ApiWikispeechListen extends ApiBase {
51
52    /** @var Config */
53    private $config;
54
55    /** @var WANObjectCache */
56    private $cache;
57
58    /** @var RevisionStore */
59    private $revisionStore;
60
61    /** @var HttpRequestFactory */
62    private $requestFactory;
63
64    /** @var LoggerInterface */
65    private $logger;
66
67    /** @var SpeechoidConnector */
68    private $speechoidConnector;
69
70    /** @var UtteranceStore */
71    private $utteranceStore;
72
73    /** @var VoiceHandler */
74    private $voiceHandler;
75
76    /** @var ListenMetricsEntry */
77    private $listenMetricEntry;
78
79    /**
80     * @since 0.1.5
81     * @param ApiMain $mainModule
82     * @param string $moduleName
83     * @param WANObjectCache $cache
84     * @param RevisionStore $revisionStore
85     * @param HttpRequestFactory $requestFactory
86     * @param string $modulePrefix
87     */
88    public function __construct(
89        ApiMain $mainModule,
90        string $moduleName,
91        WANObjectCache $cache,
92        RevisionStore $revisionStore,
93        HttpRequestFactory $requestFactory,
94        string $modulePrefix = ''
95    ) {
96        $this->config = $this->getConfig();
97        $this->cache = $cache;
98        $this->revisionStore = $revisionStore;
99        $this->requestFactory = $requestFactory;
100        $this->logger = LoggerFactory::getInstance( 'Wikispeech' );
101        $this->config = MediaWikiServices::getInstance()
102            ->getConfigFactory()
103            ->makeConfig( 'wikispeech' );
104        $this->speechoidConnector = new SpeechoidConnector(
105            $this->config,
106            $requestFactory
107        );
108        $this->utteranceStore = new UtteranceStore();
109        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
110        $this->voiceHandler = new VoiceHandler(
111            $this->logger,
112            $this->config,
113            $this->speechoidConnector,
114            $cache
115        );
116        $this->listenMetricEntry = new ListenMetricsEntry();
117        parent::__construct( $mainModule, $moduleName, $modulePrefix );
118    }
119
120    /**
121     * Execute an API request.
122     *
123     * @since 0.1.3
124     */
125    public function execute() {
126        $started = microtime( true );
127        $this->listenMetricEntry->setTimestamp( MWTimestamp::getInstance() );
128
129        $inputParameters = $this->extractRequestParams();
130        $this->validateParameters( $inputParameters );
131
132        $language = $inputParameters['lang'];
133        $voice = $inputParameters['voice'];
134        if ( !$voice ) {
135            $voice = $this->voiceHandler->getDefaultVoice( $language );
136            if ( !$voice ) {
137                throw new ConfigException( 'Invalid default voice configuration.' );
138            }
139        }
140        if ( isset( $inputParameters['revision'] ) ) {
141            $response = $this->getUtteranceForRevisionAndSegment(
142                $voice,
143                $language,
144                $inputParameters['revision'],
145                $inputParameters['segment'],
146                $inputParameters['consumer-url']
147            );
148        } else {
149            try {
150                $speechoidResponse = $this->speechoidConnector->synthesize(
151                    $language,
152                    $voice,
153                    $inputParameters
154                );
155            } catch ( Throwable $exception ) {
156                $this->dieWithException( $exception );
157            }
158            $response = [
159                // @phan-suppress-next-line PhanTypeArraySuspiciousNullable Phan doesn't understand dieWithException()
160                'audio' => $speechoidResponse['audio_data'],
161                // @phan-suppress-next-line PhanTypeArraySuspiciousNullable Phan doesn't understand dieWithException()
162                'tokens' => $speechoidResponse['tokens']
163            ];
164        }
165        $this->getResult()->addValue(
166            null,
167            $this->getModuleName(),
168            $response
169        );
170
171        $charactersInSegment = 0;
172        foreach ( $response['tokens'] as $token ) {
173            $charactersInSegment += mb_strlen( $token['orth'] );
174            // whitespace and sentence ends counts too
175            $charactersInSegment += 1;
176        }
177        $this->listenMetricEntry->setCharactersInSegment( $charactersInSegment );
178        $this->listenMetricEntry->setLanguage( $inputParameters['lang'] );
179        $this->listenMetricEntry->setVoice( $voice );
180        $this->listenMetricEntry->setPageRevisionId( $inputParameters['revision'] );
181        $this->listenMetricEntry->setSegmentHash( $inputParameters['segment'] );
182        $this->listenMetricEntry->setConsumerUrl( $inputParameters['consumer-url'] );
183        $this->listenMetricEntry->setRemoteWikiHash(
184            UtteranceStore::evaluateRemoteWikiHash( $inputParameters['consumer-url'] )
185        );
186        $this->listenMetricEntry->setMillisecondsSpeechInUtterance(
187            $response['tokens'][count( $response['tokens'] ) - 1]['endtime']
188        );
189        $this->listenMetricEntry->setMicrosecondsSpent( intval( 1000000 * ( microtime( true ) - $started ) ) );
190
191        // All other metrics fields has been set in other functions of this class.
192
193        if ( !$inputParameters['skip-journal-metrics']
194            && $this->config->get( 'WikispeechListenDoJournalMetrics' ) ) {
195            $metricsJournal = new ListenMetricsEntryFileJournal( $this->config );
196            try {
197                $metricsJournal->appendEntry( $this->listenMetricEntry );
198            } catch ( Throwable $exception ) {
199                // Catch everything. This should not bother the user!
200                $this->logger->warning(
201                    'Exception caught while appending to metrics journal {exception}',
202                    [ 'exception' => $exception ]
203                );
204            }
205        }
206    }
207
208    /**
209     * Retrieves the matching utterance for a given revision id and segment hash .
210     *
211     * @since 0.1.5
212     * @param string $voice
213     * @param string $language
214     * @param int $revisionId
215     * @param string $segmentHash
216     * @param string|null $consumerUrl URL to the script path on the consumer, if used as a producer.
217     * @return array An utterance
218     */
219    private function getUtteranceForRevisionAndSegment(
220        string $voice,
221        string $language,
222        int $revisionId,
223        string $segmentHash,
224        string $consumerUrl = null
225    ): array {
226        $segmentPageFactory = new SegmentPageFactory(
227            $this->cache,
228            // todo inject config factory
229            MediaWikiServices::getInstance()->getConfigFactory()
230        );
231        try {
232            $segmentPageResponse = $segmentPageFactory
233                ->setSegmentBreakingTags( null )
234                ->setRemoveTags( null )
235                ->setUseSegmentsCache( true )
236                ->setUseRevisionPropertiesCache( true )
237                ->setContextSource( $this->getContext() )
238                ->setRevisionStore( $this->revisionStore )
239                ->setHttpRequestFactory( $this->requestFactory )
240                ->setConsumerUrl( $consumerUrl )
241                ->setRequirePageRevisionProperties( true )
242                ->segmentPage(
243                    null,
244                    $revisionId
245                );
246        } catch ( RemoteWikiPageProviderException $remoteWikiPageProviderException ) {
247            $this->dieWithError( [
248                'apierror-wikispeech-listen-failed-getting-page-from-consumer',
249                $revisionId,
250                $consumerUrl
251            ] );
252        } catch ( DeletedRevisionException $deletedRevisionException ) {
253            $this->dieWithError( 'apierror-wikispeech-listen-deleted-revision' );
254        }
255        $segment = $segmentPageResponse->getSegments()->findFirstItemByHash( $segmentHash );
256        if ( $segment === null ) {
257            throw new MWException( 'No such segment. ' .
258                'Did you perhaps reference a segment that was created using incompatible settings ' .
259                'for segmentBreakingTags and/or removeTags?' );
260        }
261        $pageId = $segmentPageResponse->getPageId();
262        if ( $pageId === null ) {
263            throw new MWException( 'Did not retrieve page id for the given revision id.' );
264        }
265
266        $this->listenMetricEntry->setSegmentIndex( $segmentPageResponse->getSegments()->indexOf( $segment ) );
267        $this->listenMetricEntry->setPageId( $pageId );
268        $this->listenMetricEntry->setPageTitle( $segmentPageResponse->getTitle()->getText() );
269
270        return $this->getUtterance(
271            $consumerUrl,
272            $voice,
273            $language,
274            $pageId,
275            $segment
276        );
277    }
278
279    /**
280     * Validate input text.
281     *
282     * @since 0.1.5
283     * @param string $text
284     * @throws ApiUsageException
285     */
286    private function validateText( $text ) {
287        $numberOfCharactersInInput = mb_strlen( $text );
288        $maximumNumberOfCharacterInInput =
289            $this->config->get( 'WikispeechListenMaximumInputCharacters' );
290        if ( $numberOfCharactersInInput > $maximumNumberOfCharacterInInput ) {
291            $this->dieWithError( [
292                'apierror-wikispeech-listen-invalid-input-too-long',
293                $maximumNumberOfCharacterInInput,
294                $numberOfCharactersInInput
295            ] );
296        }
297    }
298
299    /**
300     * Return the utterance corresponding to the request.
301     *
302     * These are either retrieved from storage or synthesize (and then stored).
303     *
304     * @since 0.1.5
305     * @param string|null $consumerUrl
306     * @param string $voice
307     * @param string $language
308     * @param int $pageId
309     * @param Segment $segment
310     * @return array Containing base64 'audio' and synthesisMetadata 'tokens'.
311     * @throws ExternalStoreException
312     * @throws ConfigException
313     * @throws InvalidArgumentException
314     * @throws SpeechoidConnectorException
315     */
316    private function getUtterance(
317        ?string $consumerUrl,
318        string $voice,
319        string $language,
320        int $pageId,
321        Segment $segment
322    ) {
323        if ( $pageId !== 0 && !$pageId ) {
324            throw new InvalidArgumentException( 'Page ID must be set.' );
325        }
326        $segmentHash = $segment->getHash();
327        if ( $segmentHash === null ) {
328            throw new InvalidArgumentException( 'Segment hash must be set.' );
329        }
330        if ( !$voice ) {
331            $voice = $this->voiceHandler->getDefaultVoice( $language );
332            if ( !$voice ) {
333                throw new ConfigException( "Invalid default voice configuration." );
334            }
335        }
336        $utterance = $this->utteranceStore->findUtterance(
337            $consumerUrl,
338            $pageId,
339            $language,
340            $voice,
341            $segmentHash
342        );
343
344        $this->listenMetricEntry->setUtteranceSynthesized( $utterance === null );
345
346        if ( !$utterance ) {
347            $this->logger->debug( __METHOD__ . ': Creating new utterance for {pageId} {segmentHash}', [
348                'pageId' => $pageId,
349                'segmentHash' => $segment->getHash()
350            ] );
351
352            // Make a string of all the segment contents.
353            $segmentText = '';
354            foreach ( $segment->getContent() as $content ) {
355                $segmentText .= $content->getString();
356            }
357            $this->validateText( $segmentText );
358
359            /** @var string $ssml text/xml Speech Synthesis Markup Language */
360            $ssml = null;
361            if ( $language === 'sv' ) {
362                // @todo implement a per language selecting content text filter facade
363                $textFilter = new SwedishFilter( $segmentText );
364                $ssml = $textFilter->process();
365            }
366            if ( $ssml !== null ) {
367                $speechoidResponse = $this->speechoidConnector->synthesize(
368                    $language,
369                    $voice,
370                    [ 'ssml' => $ssml ]
371                );
372            } else {
373                $speechoidResponse = $this->speechoidConnector->synthesizeText(
374                    $language,
375                    $voice,
376                    $segmentText
377                );
378            }
379            $this->utteranceStore->createUtterance(
380                $consumerUrl,
381                $pageId,
382                $language,
383                $voice,
384                $segmentHash,
385                $speechoidResponse['audio_data'],
386                FormatJson::encode(
387                    $speechoidResponse['tokens']
388                )
389            );
390            return [
391                'audio' => $speechoidResponse['audio_data'],
392                'tokens' => $speechoidResponse['tokens']
393            ];
394        }
395        $this->logger->debug( __METHOD__ . ': Using cached utterance for {pageId} {segmentHash}', [
396            'pageId' => $pageId,
397            'segmentHash' => $segmentHash
398        ] );
399        return [
400            'audio' => $utterance->getAudio(),
401            'tokens' => FormatJson::parse(
402                // @phan-suppress-next-line PhanTypeMismatchArgumentNullable synthesis metadata is set
403                $utterance->getSynthesisMetadata(),
404                FormatJson::FORCE_ASSOC
405            )->getValue()
406        ];
407    }
408
409    /**
410     * Validate the parameters for language and voice.
411     *
412     * The parameter values are checked against the extension
413     * configuration. These may differ from what is actually running
414     * on the Speechoid service.
415     *
416     * @since 0.1.3
417     * @param array $parameters Request parameters.
418     * @throws ApiUsageException
419     */
420    private function validateParameters( $parameters ) {
421        if (
422            isset( $parameters['consumer-url'] ) &&
423            !$this->config->get( 'WikispeechProducerMode' ) ) {
424            $this->dieWithError( 'apierror-wikispeech-consumer-not-allowed' );
425        }
426        if (
427            isset( $parameters['revision'] ) &&
428            !isset( $parameters['segment'] )
429        ) {
430            $this->dieWithError( [
431                'apierror-invalidparammix-mustusewith',
432                'revision',
433                'segment'
434            ] );
435        }
436        if (
437            isset( $parameters['segment'] ) &&
438            !isset( $parameters['revision'] )
439        ) {
440            $this->dieWithError( [
441                'apierror-invalidparammix-mustusewith',
442                'segment',
443                'revision'
444            ] );
445        }
446        $this->requireOnlyOneParameter(
447            $parameters,
448            'revision',
449            'text',
450            'ipa'
451        );
452        $voices = $this->config->get( 'WikispeechVoices' );
453        $language = $parameters['lang'];
454
455        // Validate language.
456        $validLanguages = array_keys( $voices );
457        if ( !in_array( $language, $validLanguages ) ) {
458            $this->dieWithError( [
459                'apierror-wikispeech-listen-invalid-language',
460                $language,
461                self::makeValuesString( $validLanguages )
462            ] );
463        }
464
465        // Validate voice.
466        $voice = $parameters['voice'];
467        if ( $voice ) {
468            $validVoices = $voices[$language];
469            if ( !in_array( $voice, $validVoices ) ) {
470                $this->dieWithError( [
471                    'apierror-wikispeech-listen-invalid-voice',
472                    $voice,
473                    self::makeValuesString( $validVoices )
474                ] );
475            }
476        }
477
478        // Validate input text.
479        $input = $parameters['text'] ?? '';
480        $this->validateText( $input );
481    }
482
483    /**
484     * Make a formatted string of values to be used in messages.
485     *
486     * @since 0.1.3
487     * @param array $values Values as strings.
488     * @return string The input strings wrapped in <kbd> tags and
489     *  joined by commas.
490     */
491    private static function makeValuesString( $values ) {
492        $valueStrings = [];
493        foreach ( $values as $value ) {
494            $valueStrings[] = "<kbd>$value</kbd>";
495        }
496        return implode( ', ', $valueStrings );
497    }
498
499    /**
500     * Specify what parameters the API accepts.
501     *
502     * @since 0.1.3
503     * @return array
504     */
505    public function getAllowedParams() {
506        return array_merge(
507            parent::getAllowedParams(),
508            [
509                'lang' => [
510                    ParamValidator::PARAM_TYPE => 'string',
511                    ParamValidator::PARAM_REQUIRED => true
512                ],
513                'text' => [
514                    ParamValidator::PARAM_TYPE => 'string'
515                ],
516                'ipa' => [
517                    ParamValidator::PARAM_TYPE => 'string'
518                ],
519                'revision' => [
520                    ParamValidator::PARAM_TYPE => 'integer'
521                ],
522                'segment' => [
523                    ParamValidator::PARAM_TYPE => 'string'
524                ],
525                'voice' => [
526                    ParamValidator::PARAM_TYPE => 'string'
527                ],
528                'consumer-url' => [
529                    ParamValidator::PARAM_TYPE => 'string'
530                ],
531                'skip-journal-metrics' => [
532                    ParamValidator::PARAM_TYPE => 'boolean',
533                    ParamValidator::PARAM_DEFAULT => false
534                ]
535            ]
536        );
537    }
538
539    /**
540     * Give examples of usage.
541     *
542     * @since 0.1.3
543     * @return array
544     */
545    public function getExamplesMessages() {
546        return [
547            'action=wikispeech-listen&format=json&lang=en&text=Read this'
548            => 'apihelp-wikispeech-listen-example-1',
549            'action=wikispeech-listen&format=json&lang=en&text=Read this&voice=cmu-slt-hsmm'
550            => 'apihelp-wikispeech-listen-example-2',
551            'action=wikispeech-listen&format=json&lang=en&revision=1&segment=hash1234'
552            => 'apihelp-wikispeech-listen-example-3',
553            // phpcs:ignore Generic.Files.LineLength
554            'action=wikispeech-listen&format=json&lang=en&revision=1&segment=hash1234&consumer-url=https://consumer.url/w'
555            => 'apihelp-wikispeech-listen-example-4',
556        ];
557    }
558}