Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 279
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
Benchmark
0.00% covered (danger)
0.00%
0 / 273
0.00% covered (danger)
0.00%
0 / 7
1056
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
2
 signalHandler
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 executeSetUp
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
20
 executeValidateInput
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
56
 executeSegmenting
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
6
 executeSynthesizing
0.00% covered (danger)
0.00%
0 / 81
0.00% covered (danger)
0.00%
0 / 1
182
 execute
0.00% covered (danger)
0.00%
0 / 85
0.00% covered (danger)
0.00%
0 / 1
20
1<?php
2
3namespace MediaWiki\Wikispeech;
4
5/**
6 * @file
7 * @ingroup Extensions
8 * @license GPL-2.0-or-later
9 */
10
11use EmptyBagOStuff;
12use Maintenance;
13use MediaWiki\MediaWikiServices;
14use Mediawiki\Title\Title;
15use MediaWiki\Wikispeech\Segment\SegmentList;
16use MediaWiki\Wikispeech\Segment\SegmentPageFactory;
17use RequestContext;
18use RuntimeException;
19use WANObjectCache;
20
21/** @var string MediaWiki installation path */
22$IP = getenv( 'MW_INSTALL_PATH' );
23if ( $IP === false ) {
24    $IP = __DIR__ . '/../../..';
25}
26require_once "$IP/maintenance/Maintenance.php";
27
28/**
29 * Maintenance script to evaluate interesting resource use metrics
30 * related to executing Wikispeech and Speechoid on a page.
31 *
32 * php extensions/Wikispeech/maintenance/benchmark.php -p Barack_Obama
33 *
34 * @since 0.1.8
35 */
36class Benchmark extends Maintenance {
37
38    /** @var VoiceHandler */
39    private $voiceHandler;
40
41    /** @var SegmentPageFactory */
42    private $segmentPageFactory;
43
44    /** @var SpeechoidConnector */
45    private $speechoidConnector;
46
47    /** @var bool Whether or not ctrl-c has been pressed. */
48    private $caughtSigInt;
49
50    /** @var SegmentList */
51    private $segments;
52
53    /** @var int */
54    private $synthesizeResponseTimeoutSeconds;
55
56    /** @var float|int */
57    private $millisecondsSpentSegmenting;
58
59    /** @var int */
60    private $numberOfSuccessfullySynthesizedSegments;
61
62    /** @var int|float */
63    private $totalMillisecondsSpentSynthesizing;
64
65    /** @var int */
66    private $totalMillisecondsSynthesizedVoice;
67
68    /** @var int */
69    private $totalNumberOfTokensSynthesizedVoice;
70
71    /** @var int */
72    private $totalBytesSynthesizedVoice;
73
74    /** @var int */
75    private $totalNumberOfTokenCharactersSynthesizedVoice;
76
77    /** @var string */
78    private $language;
79
80    /** @var string */
81    private $voice;
82
83    /** @var Title */
84    private $title;
85
86    /**
87     * Benchmark constructor.
88     *
89     * @since 0.1.8
90     */
91    public function __construct() {
92        parent::__construct();
93        $this->requireExtension( 'Wikispeech' );
94        $this->addDescription( 'Benchmark use of resources.' );
95        $this->addOption(
96            'language',
97            'Synthesized language. If not set, page language is selected.',
98            false,
99            true,
100            'l'
101        );
102        $this->addOption(
103            'voice',
104            'Synthesized voice. If not set, default voice for language is selected.',
105            false,
106            true,
107            'v'
108        );
109        $this->addOption(
110            'page',
111            'Title of page to be segmented and synthesized.',
112            true,
113            true,
114            'p'
115        );
116        $this->addOption(
117            'timeout',
118            'Maximum number of seconds to await Speechoid synthesize HTTP response. Defaults to 240.',
119            false,
120            true,
121            't'
122        );
123
124        $this->caughtSigInt = false;
125        declare( ticks = 1 );
126        pcntl_async_signals( true );
127        pcntl_signal( SIGINT, [ $this, 'signalHandler' ] );
128    }
129
130    /**
131     * Clean ctrl-c
132     */
133    public function signalHandler() {
134        $this->caughtSigInt = true;
135    }
136
137    private function executeSetUp() {
138        // Non PHP core classes aren't available prior to this point,
139        // i.e. we can't initialize the fields in the constructor,
140        // and we have to be lenient for mocked instances set by tests.
141
142        $services = MediaWikiServices::getInstance();
143        $config = $services->getConfigFactory()->makeConfig( 'wikispeech' );
144        $requestFactory = $services->getHttpRequestFactory();
145
146        $emptyWanCache = new WANObjectCache( [ 'cache' => new EmptyBagOStuff() ] );
147
148        if ( !$this->speechoidConnector ) {
149            $this->speechoidConnector = new SpeechoidConnector( $config, $requestFactory );
150        }
151        if ( !$this->voiceHandler ) {
152            $this->voiceHandler = WikispeechServices::getVoiceHandler();
153        }
154        if ( !$this->segmentPageFactory ) {
155            $this->segmentPageFactory = new SegmentPageFactory(
156                $emptyWanCache,
157                $config,
158                $services->getRevisionStore(),
159                $services->getHttpRequestFactory()
160            );
161            $this->segmentPageFactory
162                ->setUseSegmentsCache( false )
163                ->setUseRevisionPropertiesCache( false )
164                ->setContextSource( new RequestContext() )
165                ->setRevisionStore( $services->getRevisionStore() );
166        }
167    }
168
169    private function executeValidateInput() {
170        $this->language = '';
171        $this->voice = '';
172        $this->title = Title::newFromText( $this->getOption( 'page' ) );
173        if ( !$this->title->isKnown() ) {
174            $this->output( "Error: Page is not known.\n" );
175            return false;
176        }
177        if ( $this->title->isSpecialPage() ) {
178            $this->output( "Error: Page is a SpecialPage.\n" );
179            return false;
180        }
181
182        if ( !$this->getOption( 'language', false ) ) {
183            $language = $this->title->getPageLanguage();
184            if ( !$language ) {
185                $this->output( "Error: Unable to read language for page. Use parameter language.\n" );
186                return false;
187            }
188            $this->language = $language->getCode();
189            $this->output( "Language $this->language set from page default.\n" );
190        } else {
191            $this->language = $this->getOption( 'language' );
192            $this->output( "Language $this->language set from option.\n" );
193            // todo validate language
194        }
195
196        if ( !$this->getOption( 'voice', false ) ) {
197            $this->voice = $this->voiceHandler->getDefaultVoice( $this->language );
198            if ( !$this->voice ) {
199                // This will never occur unless underlying default voice logic change.
200                // I.e. if the default voice cannot be found
201                // then your language must not be defined (in Speechoid or locally)
202                $this->output( "Error: No default voice for language $this->language. Use parameter voice.\n" );
203                return false;
204            }
205            $this->output( "Voice $this->voice set from default for language $this->language.\n" );
206        } else {
207            $this->voice = $this->getOption( 'voice' );
208            $this->output( "Voice $this->voice set from option.\n" );
209            // todo validate voice of language
210        }
211
212        $this->synthesizeResponseTimeoutSeconds = intval(
213            $this->getOption( 'timeout', 240 )
214        );
215
216        return true;
217    }
218
219    private function executeSegmenting() {
220        // @todo consider adding revision as script parameter.
221        // Setting null will requests the most recent for the title.
222        $revisionId = null;
223
224        $this->output( 'Benchmarking page ' .
225            "{$this->title->getText()} using language " .
226            "$this->language and voice " .
227            "$this->voice.\n"
228        );
229
230        // We don't want to count time spent rendering to segmenting time,
231        // so we call the segmenter twice. Segmenting cache is turned off.
232        $this->output( "Allowing for MediaWiki to render page...\n" );
233        $this->segmentPageFactory->segmentPage(
234            $this->title,
235            $revisionId
236        );
237
238        $this->output( "Segmenting...\n" );
239        $startSegmenting = microtime( true ) * 1000;
240        $segments = $this->segmentPageFactory->segmentPage(
241            $this->title,
242            $revisionId
243        )->getSegments();
244        if ( $segments === null ) {
245            throw new RuntimeException( 'Segments is null!' );
246        }
247        $this->segments = $segments;
248        $endSegmenting = microtime( true ) * 1000;
249        $this->millisecondsSpentSegmenting = $endSegmenting - $startSegmenting;
250    }
251
252    private function executeSynthesizing() {
253        $this->numberOfSuccessfullySynthesizedSegments = 0;
254
255        $this->totalBytesSynthesizedVoice = 0;
256        $this->totalNumberOfTokenCharactersSynthesizedVoice = 0;
257        $this->totalNumberOfTokensSynthesizedVoice = 0;
258        $this->totalMillisecondsSynthesizedVoice = 0;
259        $this->output( 'Synthesizing ' . count( $this->segments->getSegments() ) . " segments... \n" );
260        $this->output( "Press ^C to abort and calculate on evaluated state.\n" );
261        $this->totalMillisecondsSpentSynthesizing = 0;
262
263        $failures = '';
264
265        $progressCounterLength = 40;
266        $segmentCounter = 0;
267        $progressCounter = 0;
268        foreach ( $this->segments->getSegments() as $segment ) {
269            if ( $this->caughtSigInt ) {
270                break;
271            }
272            $segmentCounter++;
273
274            $segmentText = '';
275            foreach ( $segment->getContent() as $content ) {
276                $segmentText .= $content->getString();
277            }
278
279            $attempt = 0;
280            $maximumAttempts = 3;
281            $retriesLeft = $maximumAttempts;
282            while ( true ) {
283                $attempt++;
284                $startSynthesizing = microtime( true ) * 1000;
285                try {
286                    $speechoidResponse = $this->speechoidConnector->synthesizeText(
287                        $this->language, $this->voice, $segmentText, $this->synthesizeResponseTimeoutSeconds
288                    );
289                    $endSynthesizing = microtime( true ) * 1000;
290                    $millisecondsSpentSynthesizingSegment = $endSynthesizing - $startSynthesizing;
291                    $this->totalMillisecondsSpentSynthesizing += $millisecondsSpentSynthesizingSegment;
292
293                    $bytesSynthesizedVoiceInSegment = mb_strlen( $speechoidResponse['audio_data'] );
294                    $this->totalBytesSynthesizedVoice += $bytesSynthesizedVoiceInSegment;
295
296                    $numberOfTokensInSegment = count( $speechoidResponse[ 'tokens' ] );
297                    $this->totalNumberOfTokensSynthesizedVoice += $numberOfTokensInSegment;
298
299                    $millisecondsSynthesizedVoiceInSegment =
300                        $speechoidResponse['tokens'][ $numberOfTokensInSegment - 1 ]['endtime'];
301                    $this->totalMillisecondsSynthesizedVoice += $millisecondsSynthesizedVoiceInSegment;
302
303                    $charactersInSegmentTokens = 0;
304                    foreach ( $speechoidResponse['tokens'] as $token ) {
305                        $charactersInSegmentTokens += mb_strlen( $token['orth'] );
306                    }
307                    $this->totalNumberOfTokenCharactersSynthesizedVoice += $charactersInSegmentTokens;
308
309                    if ( $attempt > 1 ) {
310                        $this->output( strval( $attempt ) );
311                    } else {
312                        $this->output( '.' );
313                    }
314                    $this->numberOfSuccessfullySynthesizedSegments++;
315                } catch ( SpeechoidConnectorException $speechoidConnectorException ) {
316                    $millisecondsSpentBeforeException = ( microtime( true ) * 1000 ) - $startSynthesizing;
317                    $failures .= "\nException $millisecondsSpentBeforeException milliseconds after request.\n";
318                    $failures .= $speechoidConnectorException->getMessage() . "\n";
319                    $retriesLeft--;
320                    if ( $retriesLeft == 0 ) {
321                        $failures .= "Giving up after attempt #$attempt. Segment ignored.\n";
322                        $failures .= $segmentText;
323                        $failures .= "\n";
324                        $this->output( 'E' );
325                    } else {
326                        continue;
327                    }
328                }
329                $progressCounter++;
330                if ( $progressCounter === $progressCounterLength ) {
331                    $progressCounter = 0;
332
333                    $eta = ', ETA ~';
334                    $meanMillisecondsSpentSynthesizingPerSegment =
335                        $this->totalMillisecondsSpentSynthesizing / $this->numberOfSuccessfullySynthesizedSegments;
336                    $millisecondsEta = intval( count( $this->segments->getSegments() ) - $segmentCounter )
337                        * $meanMillisecondsSpentSynthesizingPerSegment;
338                    if ( $millisecondsEta < 1000 ) {
339                        $eta .= $millisecondsEta . ' ms';
340                    } elseif ( $millisecondsEta < 1000 * 60 ) {
341                        $eta .= intdiv( $millisecondsEta, 1000 ) . ' seconds';
342                    } else {
343                        $eta .= intdiv( $millisecondsEta, 1000 * 60 ) . ' minutes';
344                    }
345                    $eta .= ' (~' .    intdiv( $meanMillisecondsSpentSynthesizingPerSegment, 1000 ) . 's/seg)';
346                    $this->output(
347                        ' ' .
348                        $segmentCounter . ' / ' . count( $this->segments->getSegments() ) .
349                        $eta . "\n"
350                    );
351                }
352                break;
353            }
354        }
355
356        if ( $failures ) {
357            $this->output( "\n" );
358            $this->output( $failures );
359            $this->output( "\n" );
360        }
361    }
362
363    /**
364     * @since 0.1.8
365     * @return bool success
366     */
367    public function execute() {
368        $this->executeSetUp();
369        if ( !$this->executeValidateInput() ) {
370            return false;
371        }
372        $this->executeSegmenting();
373        $this->executeSynthesizing();
374
375        $this->output( "\n\n" );
376        $this->output( "Benchmark results\n" );
377        $this->output( "-----------------\n" );
378        $this->output( "\n" );
379
380        $this->output( 'Number of segments: ' .
381            count( $this->segments->getSegments() ) . "\n" );
382        $this->output( "Milliseconds spent segmenting: $this->millisecondsSpentSegmenting\n" );
383
384        $meanMillisecondsSpentSegmentingPerSegment =
385            $this->millisecondsSpentSegmenting / count( $this->segments->getSegments() );
386
387        $this->output( 'Mean milliseconds spent segmenting per segment: ' .
388            "$meanMillisecondsSpentSegmentingPerSegment\n" );
389
390        if ( $this->numberOfSuccessfullySynthesizedSegments === 0 ) {
391            $this->output( "Nothing synthesized, no further metrics available.\n" );
392            exit( 1 );
393        }
394
395        $this->totalMillisecondsSpentSynthesizing = intval( $this->totalMillisecondsSpentSynthesizing );
396        $this->totalMillisecondsSynthesizedVoice = intval( $this->totalMillisecondsSynthesizedVoice );
397
398        $meanMillisecondsSynthesizingPerToken =
399            $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice;
400        $meanMillisecondsSynthesizingPerCharacter =
401            $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice;
402        $meanBytesSynthesizedVoicePerToken =
403            $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice;
404        $meanBytesSynthesizedVoicePerCharacter =
405            $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice;
406
407        $meanTokensPerSegment =
408            $this->totalNumberOfTokensSynthesizedVoice / $this->numberOfSuccessfullySynthesizedSegments;
409        $meanTokenCharactersPerSegment =
410            $this->totalNumberOfTokenCharactersSynthesizedVoice /
411            $this->numberOfSuccessfullySynthesizedSegments;
412
413        $meanMillisecondsSpentSegmentingPerToken =
414            ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) /
415            $this->totalNumberOfTokensSynthesizedVoice;
416        $meanMillisecondsSpentSegmentingPerTokenCharacter =
417            ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) /
418            $this->totalNumberOfTokenCharactersSynthesizedVoice;
419
420        $this->output( 'Mean milliseconds spent segmenting per token synthesized: ' .
421            "$meanMillisecondsSpentSegmentingPerToken\n" );
422        $this->output( 'Mean milliseconds spent segmenting per token character synthesized: ' .
423            "$meanMillisecondsSpentSegmentingPerTokenCharacter\n" );
424
425        if ( $this->numberOfSuccessfullySynthesizedSegments != count( $this->segments->getSegments() ) ) {
426            $this->output( 'Warning! Not all segments synthesized, ' .
427                "mean segmenting per token values might be slightly off.\n" );
428        }
429
430        $this->output( "\n" );
431
432        $this->output( 'Number of synthesized segments: ' .
433            "$this->numberOfSuccessfullySynthesizedSegments\n" );
434        $this->output( "Number of synthesized tokens: $this->totalNumberOfTokensSynthesizedVoice\n" );
435        $this->output( 'Number of synthesized token characters: ' .
436            "$this->totalNumberOfTokenCharactersSynthesizedVoice\n" );
437
438        $this->output( "\n" );
439
440        $this->output( "Mean number of tokens per synthesized segment: $meanTokensPerSegment\n" );
441        $this->output( 'Mean number of token characters per synthesized segment: ' .
442            "$meanTokenCharactersPerSegment\n" );
443
444        $this->output( "\n" );
445
446        $this->output( 'Mean milliseconds synthesizing per token: ' .
447            "$meanMillisecondsSynthesizingPerToken\n" );
448        $this->output( 'Mean milliseconds synthesizing per token character: ' .
449            "$meanMillisecondsSynthesizingPerCharacter\n" );
450
451        $this->output( 'Mean bytes synthesized voice per token: ' .
452            intval( $meanBytesSynthesizedVoicePerToken ) . "\n" );
453        $this->output( 'Mean bytes synthesized voice per token character: ' .
454            intval( $meanBytesSynthesizedVoicePerCharacter ) . "\n" );
455
456        $this->output( "\n" );
457
458        $this->output( "Milliseconds of synthesized voice: $this->totalMillisecondsSynthesizedVoice\n" );
459        $this->output( 'Seconds of synthesized voice: ' .
460            intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 ) . "\n" );
461        $this->output( 'Minutes of synthesized voice: ' .
462            intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 * 60 ) . "\n" );
463
464        $this->output( "\n" );
465
466        $this->output( "Milliseconds spent synthesizing: $this->totalMillisecondsSpentSynthesizing\n" );
467        $this->output( 'Seconds spent synthesizing: ' .
468            intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 ) . "\n" );
469        $this->output( 'Minutes spent synthesizing: ' .
470            intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 * 60 ) . "\n" );
471
472        $this->output( "\n" );
473
474        $this->output( "Synthesized voice bytes: $this->totalBytesSynthesizedVoice\n" );
475        $this->output( 'Synthesized voice kilobytes: ' .
476            intdiv( $this->totalBytesSynthesizedVoice, 1024 ) . "\n" );
477        $this->output( 'Synthesized voice megabytes: ' .
478            intdiv( $this->totalBytesSynthesizedVoice, 1024 * 1024 ) . "\n" );
479
480        return true;
481    }
482
483}
484
485/** @var string This class, required to start via Maintenance. */
486$maintClass = Benchmark::class;
487
488require_once RUN_MAINTENANCE_IF_MAIN;