Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 285
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
Benchmark
0.00% covered (danger)
0.00%
0 / 279
0.00% covered (danger)
0.00%
0 / 7
1056
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
2
 signalHandler
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 executeSetUp
0.00% covered (danger)
0.00%
0 / 26
0.00% covered (danger)
0.00%
0 / 1
20
 executeValidateInput
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
56
 executeSegmenting
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
6
 executeSynthesizing
0.00% covered (danger)
0.00%
0 / 81
0.00% covered (danger)
0.00%
0 / 1
182
 execute
0.00% covered (danger)
0.00%
0 / 85
0.00% covered (danger)
0.00%
0 / 1
20
1<?php
2
3namespace MediaWiki\Wikispeech;
4
5/**
6 * @file
7 * @ingroup Extensions
8 * @license GPL-2.0-or-later
9 */
10
11use EmptyBagOStuff;
12use Maintenance;
13use MediaWiki\Logger\LoggerFactory;
14use MediaWiki\MediaWikiServices;
15use MediaWiki\Wikispeech\Segment\SegmentList;
16use MediaWiki\Wikispeech\Segment\SegmentPageFactory;
17use MWException;
18use RequestContext;
19use Title;
20use WANObjectCache;
21
22/** @var string MediaWiki installation path */
23$IP = getenv( 'MW_INSTALL_PATH' );
24if ( $IP === false ) {
25    $IP = __DIR__ . '/../../..';
26}
27require_once "$IP/maintenance/Maintenance.php";
28
29/**
30 * Maintenance script to evaluate interesting resource use metrics
31 * related to executing Wikispeech and Speechoid on a page.
32 *
33 * php extensions/Wikispeech/maintenance/benchmark.php -p Barack_Obama
34 *
35 * @since 0.1.8
36 */
37class Benchmark extends Maintenance {
38
39    /** @var VoiceHandler */
40    private $voiceHandler;
41
42    /** @var SegmentPageFactory */
43    private $segmentPageFactory;
44
45    /** @var SpeechoidConnector */
46    private $speechoidConnector;
47
48    /** @var bool Whether or not ctrl-c has been pressed. */
49    private $caughtSigInt;
50
51    /** @var SegmentList */
52    private $segments;
53
54    /** @var int */
55    private $synthesizeResponseTimeoutSeconds;
56
57    /** @var float|int */
58    private $millisecondsSpentSegmenting;
59
60    /** @var int */
61    private $numberOfSuccessfullySynthesizedSegments;
62
63    /** @var int|float */
64    private $totalMillisecondsSpentSynthesizing;
65
66    /** @var int */
67    private $totalMillisecondsSynthesizedVoice;
68
69    /** @var int */
70    private $totalNumberOfTokensSynthesizedVoice;
71
72    /** @var int */
73    private $totalBytesSynthesizedVoice;
74
75    /** @var int */
76    private $totalNumberOfTokenCharactersSynthesizedVoice;
77
78    /** @var string */
79    private $language;
80
81    /** @var string */
82    private $voice;
83
84    /** @var Title */
85    private $title;
86
87    /**
88     * Benchmark constructor.
89     *
90     * @since 0.1.8
91     */
92    public function __construct() {
93        parent::__construct();
94        $this->requireExtension( 'Wikispeech' );
95        $this->addDescription( 'Benchmark use of resources.' );
96        $this->addOption(
97            'language',
98            'Synthesized language. If not set, page language is selected.',
99            false,
100            true,
101            'l'
102        );
103        $this->addOption(
104            'voice',
105            'Synthesized voice. If not set, default voice for language is selected.',
106            false,
107            true,
108            'v'
109        );
110        $this->addOption(
111            'page',
112            'Title of page to be segmented and synthesized.',
113            true,
114            true,
115            'p'
116        );
117        $this->addOption(
118            'timeout',
119            'Maximum number of seconds to await Speechoid synthesize HTTP response. Defaults to 240.',
120            false,
121            true,
122            't'
123        );
124
125        $this->caughtSigInt = false;
126        declare( ticks = 1 );
127        pcntl_async_signals( true );
128        pcntl_signal( SIGINT, [ $this, 'signalHandler' ] );
129    }
130
131    /**
132     * Clean ctrl-c
133     */
134    public function signalHandler() {
135        $this->caughtSigInt = true;
136    }
137
138    private function executeSetUp() {
139        // Non PHP core classes aren't available prior to this point,
140        // i.e. we can't initialize the fields in the constructor,
141        // and we have to be lenient for mocked instances set by tests.
142
143        $config = MediaWikiServices::getInstance()
144            ->getConfigFactory()
145            ->makeConfig( 'wikispeech' );
146        $requestFactory = MediaWikiServices::getInstance()
147            ->getHttpRequestFactory();
148
149        $emptyWanCache = new WANObjectCache( [ 'cache' => new EmptyBagOStuff() ] );
150
151        $logger = LoggerFactory::getInstance( 'Wikispeech' );
152
153        if ( !$this->speechoidConnector ) {
154            $this->speechoidConnector = new SpeechoidConnector( $config, $requestFactory );
155        }
156        if ( !$this->voiceHandler ) {
157            $this->voiceHandler = new VoiceHandler(
158                $logger,
159                $config,
160                $this->speechoidConnector,
161                $emptyWanCache
162            );
163        }
164        if ( !$this->segmentPageFactory ) {
165            $this->segmentPageFactory = new SegmentPageFactory(
166                $emptyWanCache,
167                MediaWikiServices::getInstance()->getConfigFactory()
168            );
169            $this->segmentPageFactory
170                ->setUseSegmentsCache( false )
171                ->setUseRevisionPropertiesCache( false )
172                ->setContextSource( new RequestContext() )
173                ->setRevisionStore( MediaWikiServices::getInstance()->getRevisionStore() );
174        }
175    }
176
177    private function executeValidateInput() {
178        $this->language = '';
179        $this->voice = '';
180        $this->title = Title::newFromText( $this->getOption( 'page' ) );
181        if ( !$this->title->isKnown() ) {
182            $this->output( "Error: Page is not known.\n" );
183            return false;
184        }
185        if ( $this->title->isSpecialPage() ) {
186            $this->output( "Error: Page is a SpecialPage.\n" );
187            return false;
188        }
189
190        if ( !$this->getOption( 'language', false ) ) {
191            $language = $this->title->getPageLanguage();
192            if ( !$language ) {
193                $this->output( "Error: Unable to read language for page. Use parameter language.\n" );
194                return false;
195            }
196            $this->language = $language->getCode();
197            $this->output( "Language $this->language set from page default.\n" );
198        } else {
199            $this->language = $this->getOption( 'language' );
200            $this->output( "Language $this->language set from option.\n" );
201            // todo validate language
202        }
203
204        if ( !$this->getOption( 'voice', false ) ) {
205            $this->voice = $this->voiceHandler->getDefaultVoice( $this->language );
206            if ( !$this->voice ) {
207                // This will never occur unless underlying default voice logic change.
208                // I.e. if the default voice cannot be found
209                // then your language must not be defined (in Speechoid or locally)
210                $this->output( "Error: No default voice for language $this->language. Use parameter voice.\n" );
211                return false;
212            }
213            $this->output( "Voice $this->voice set from default for language $this->language.\n" );
214        } else {
215            $this->voice = $this->getOption( 'voice' );
216            $this->output( "Voice $this->voice set from option.\n" );
217            // todo validate voice of language
218        }
219
220        $this->synthesizeResponseTimeoutSeconds = intval(
221            $this->getOption( 'timeout', 240 )
222        );
223
224        return true;
225    }
226
227    private function executeSegmenting() {
228        // @todo consider adding revision as script parameter.
229        // Setting null will requests the most recent for the title.
230        $revisionId = null;
231
232        $this->output( 'Benchmarking page ' .
233            "{$this->title->getText()} using language " .
234            "$this->language and voice " .
235            "$this->voice.\n"
236        );
237
238        // We don't want to count time spent rendering to segmenting time,
239        // so we call the segmenter twice. Segmenting cache is turned off.
240        $this->output( "Allowing for MediaWiki to render page...\n" );
241        $this->segmentPageFactory->segmentPage(
242            $this->title,
243            $revisionId
244        );
245
246        $this->output( "Segmenting...\n" );
247        $startSegmenting = microtime( true ) * 1000;
248        $segments = $this->segmentPageFactory->segmentPage(
249            $this->title,
250            $revisionId
251        )->getSegments();
252        if ( $segments === null ) {
253            throw new MWException( 'Segments is null!' );
254        }
255        $this->segments = $segments;
256        $endSegmenting = microtime( true ) * 1000;
257        $this->millisecondsSpentSegmenting = $endSegmenting - $startSegmenting;
258    }
259
260    private function executeSynthesizing() {
261        $this->numberOfSuccessfullySynthesizedSegments = 0;
262
263        $this->totalBytesSynthesizedVoice = 0;
264        $this->totalNumberOfTokenCharactersSynthesizedVoice = 0;
265        $this->totalNumberOfTokensSynthesizedVoice = 0;
266        $this->totalMillisecondsSynthesizedVoice = 0;
267        $this->output( 'Synthesizing ' . count( $this->segments->getSegments() ) . " segments... \n" );
268        $this->output( "Press ^C to abort and calculate on evaluated state.\n" );
269        $this->totalMillisecondsSpentSynthesizing = 0;
270
271        $failures = '';
272
273        $progressCounterLength = 40;
274        $segmentCounter = 0;
275        $progressCounter = 0;
276        foreach ( $this->segments->getSegments() as $segment ) {
277            if ( $this->caughtSigInt ) {
278                break;
279            }
280            $segmentCounter++;
281
282            $segmentText = '';
283            foreach ( $segment->getContent() as $content ) {
284                $segmentText .= $content->getString();
285            }
286
287            $attempt = 0;
288            $maximumAttempts = 3;
289            $retriesLeft = $maximumAttempts;
290            while ( true ) {
291                $attempt++;
292                $startSynthesizing = microtime( true ) * 1000;
293                try {
294                    $speechoidResponse = $this->speechoidConnector->synthesizeText(
295                        $this->language, $this->voice, $segmentText, $this->synthesizeResponseTimeoutSeconds
296                    );
297                    $endSynthesizing = microtime( true ) * 1000;
298                    $millisecondsSpentSynthesizingSegment = $endSynthesizing - $startSynthesizing;
299                    $this->totalMillisecondsSpentSynthesizing += $millisecondsSpentSynthesizingSegment;
300
301                    $bytesSynthesizedVoiceInSegment = mb_strlen( $speechoidResponse['audio_data'] );
302                    $this->totalBytesSynthesizedVoice += $bytesSynthesizedVoiceInSegment;
303
304                    $numberOfTokensInSegment = count( $speechoidResponse[ 'tokens' ] );
305                    $this->totalNumberOfTokensSynthesizedVoice += $numberOfTokensInSegment;
306
307                    $millisecondsSynthesizedVoiceInSegment =
308                        $speechoidResponse['tokens'][ $numberOfTokensInSegment - 1 ]['endtime'];
309                    $this->totalMillisecondsSynthesizedVoice += $millisecondsSynthesizedVoiceInSegment;
310
311                    $charactersInSegmentTokens = 0;
312                    foreach ( $speechoidResponse['tokens'] as $token ) {
313                        $charactersInSegmentTokens += mb_strlen( $token['orth'] );
314                    }
315                    $this->totalNumberOfTokenCharactersSynthesizedVoice += $charactersInSegmentTokens;
316
317                    if ( $attempt > 1 ) {
318                        $this->output( strval( $attempt ) );
319                    } else {
320                        $this->output( '.' );
321                    }
322                    $this->numberOfSuccessfullySynthesizedSegments++;
323                } catch ( SpeechoidConnectorException $speechoidConnectorException ) {
324                    $millisecondsSpentBeforeException = ( microtime( true ) * 1000 ) - $startSynthesizing;
325                    $failures .= "\nException $millisecondsSpentBeforeException milliseconds after request.\n";
326                    $failures .= $speechoidConnectorException->getMessage() . "\n";
327                    $retriesLeft--;
328                    if ( $retriesLeft == 0 ) {
329                        $failures .= "Giving up after attempt #$attempt. Segment ignored.\n";
330                        $failures .= $segmentText;
331                        $failures .= "\n";
332                        $this->output( 'E' );
333                    } else {
334                        continue;
335                    }
336                }
337                $progressCounter++;
338                if ( $progressCounter === $progressCounterLength ) {
339                    $progressCounter = 0;
340
341                    $eta = ', ETA ~';
342                    $meanMillisecondsSpentSynthesizingPerSegment =
343                        $this->totalMillisecondsSpentSynthesizing / $this->numberOfSuccessfullySynthesizedSegments;
344                    $millisecondsEta = intval( count( $this->segments->getSegments() ) - $segmentCounter )
345                        * $meanMillisecondsSpentSynthesizingPerSegment;
346                    if ( $millisecondsEta < 1000 ) {
347                        $eta .= $millisecondsEta . ' ms';
348                    } elseif ( $millisecondsEta < 1000 * 60 ) {
349                        $eta .= intdiv( $millisecondsEta, 1000 ) . ' seconds';
350                    } else {
351                        $eta .= intdiv( $millisecondsEta, 1000 * 60 ) . ' minutes';
352                    }
353                    $eta .= ' (~' .    intdiv( $meanMillisecondsSpentSynthesizingPerSegment, 1000 ) . 's/seg)';
354                    $this->output(
355                        ' ' .
356                        $segmentCounter . ' / ' . count( $this->segments->getSegments() ) .
357                        $eta . "\n"
358                    );
359                }
360                break;
361            }
362        }
363
364        if ( $failures ) {
365            $this->output( "\n" );
366            $this->output( $failures );
367            $this->output( "\n" );
368        }
369    }
370
371    /**
372     * @since 0.1.8
373     * @return bool success
374     */
375    public function execute() {
376        $this->executeSetUp();
377        if ( !$this->executeValidateInput() ) {
378            return false;
379        }
380        $this->executeSegmenting();
381        $this->executeSynthesizing();
382
383        $this->output( "\n\n" );
384        $this->output( "Benchmark results\n" );
385        $this->output( "-----------------\n" );
386        $this->output( "\n" );
387
388        $this->output( 'Number of segments: ' .
389            count( $this->segments->getSegments() ) . "\n" );
390        $this->output( "Milliseconds spent segmenting: $this->millisecondsSpentSegmenting\n" );
391
392        $meanMillisecondsSpentSegmentingPerSegment =
393            $this->millisecondsSpentSegmenting / count( $this->segments->getSegments() );
394
395        $this->output( 'Mean milliseconds spent segmenting per segment: ' .
396            "$meanMillisecondsSpentSegmentingPerSegment\n" );
397
398        if ( $this->numberOfSuccessfullySynthesizedSegments === 0 ) {
399            $this->output( "Nothing synthesized, no further metrics available.\n" );
400            exit( 1 );
401        }
402
403        $this->totalMillisecondsSpentSynthesizing = intval( $this->totalMillisecondsSpentSynthesizing );
404        $this->totalMillisecondsSynthesizedVoice = intval( $this->totalMillisecondsSynthesizedVoice );
405
406        $meanMillisecondsSynthesizingPerToken =
407            $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice;
408        $meanMillisecondsSynthesizingPerCharacter =
409            $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice;
410        $meanBytesSynthesizedVoicePerToken =
411            $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice;
412        $meanBytesSynthesizedVoicePerCharacter =
413            $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice;
414
415        $meanTokensPerSegment =
416            $this->totalNumberOfTokensSynthesizedVoice / $this->numberOfSuccessfullySynthesizedSegments;
417        $meanTokenCharactersPerSegment =
418            $this->totalNumberOfTokenCharactersSynthesizedVoice /
419            $this->numberOfSuccessfullySynthesizedSegments;
420
421        $meanMillisecondsSpentSegmentingPerToken =
422            ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) /
423            $this->totalNumberOfTokensSynthesizedVoice;
424        $meanMillisecondsSpentSegmentingPerTokenCharacter =
425            ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) /
426            $this->totalNumberOfTokenCharactersSynthesizedVoice;
427
428        $this->output( 'Mean milliseconds spent segmenting per token synthesized: ' .
429            "$meanMillisecondsSpentSegmentingPerToken\n" );
430        $this->output( 'Mean milliseconds spent segmenting per token character synthesized: ' .
431            "$meanMillisecondsSpentSegmentingPerTokenCharacter\n" );
432
433        if ( $this->numberOfSuccessfullySynthesizedSegments != count( $this->segments->getSegments() ) ) {
434            $this->output( 'Warning! Not all segments synthesized, ' .
435                "mean segmenting per token values might be slightly off.\n" );
436        }
437
438        $this->output( "\n" );
439
440        $this->output( 'Number of synthesized segments: ' .
441            "$this->numberOfSuccessfullySynthesizedSegments\n" );
442        $this->output( "Number of synthesized tokens: $this->totalNumberOfTokensSynthesizedVoice\n" );
443        $this->output( 'Number of synthesized token characters: ' .
444            "$this->totalNumberOfTokenCharactersSynthesizedVoice\n" );
445
446        $this->output( "\n" );
447
448        $this->output( "Mean number of tokens per synthesized segment: $meanTokensPerSegment\n" );
449        $this->output( 'Mean number of token characters per synthesized segment: ' .
450            "$meanTokenCharactersPerSegment\n" );
451
452        $this->output( "\n" );
453
454        $this->output( 'Mean milliseconds synthesizing per token: ' .
455            "$meanMillisecondsSynthesizingPerToken\n" );
456        $this->output( 'Mean milliseconds synthesizing per token character: ' .
457            "$meanMillisecondsSynthesizingPerCharacter\n" );
458
459        $this->output( 'Mean bytes synthesized voice per token: ' .
460            intval( $meanBytesSynthesizedVoicePerToken ) . "\n" );
461        $this->output( 'Mean bytes synthesized voice per token character: ' .
462            intval( $meanBytesSynthesizedVoicePerCharacter ) . "\n" );
463
464        $this->output( "\n" );
465
466        $this->output( "Milliseconds of synthesized voice: $this->totalMillisecondsSynthesizedVoice\n" );
467        $this->output( 'Seconds of synthesized voice: ' .
468            intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 ) . "\n" );
469        $this->output( 'Minutes of synthesized voice: ' .
470            intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 * 60 ) . "\n" );
471
472        $this->output( "\n" );
473
474        $this->output( "Milliseconds spent synthesizing: $this->totalMillisecondsSpentSynthesizing\n" );
475        $this->output( 'Seconds spent synthesizing: ' .
476            intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 ) . "\n" );
477        $this->output( 'Minutes spent synthesizing: ' .
478            intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 * 60 ) . "\n" );
479
480        $this->output( "\n" );
481
482        $this->output( "Synthesized voice bytes: $this->totalBytesSynthesizedVoice\n" );
483        $this->output( 'Synthesized voice kilobytes: ' .
484            intdiv( $this->totalBytesSynthesizedVoice, 1024 ) . "\n" );
485        $this->output( 'Synthesized voice megabytes: ' .
486            intdiv( $this->totalBytesSynthesizedVoice, 1024 * 1024 ) . "\n" );
487
488        return true;
489    }
490
491}
492
493/** @var string This class, required to start via Maintenance. */
494$maintClass = Benchmark::class;
495
496require_once RUN_MAINTENANCE_IF_MAIN;