Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 285 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
Benchmark | |
0.00% |
0 / 279 |
|
0.00% |
0 / 7 |
1056 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
2 | |||
signalHandler | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
executeSetUp | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
20 | |||
executeValidateInput | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
56 | |||
executeSegmenting | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
6 | |||
executeSynthesizing | |
0.00% |
0 / 81 |
|
0.00% |
0 / 1 |
182 | |||
execute | |
0.00% |
0 / 85 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Wikispeech; |
4 | |
5 | /** |
6 | * @file |
7 | * @ingroup Extensions |
8 | * @license GPL-2.0-or-later |
9 | */ |
10 | |
11 | use EmptyBagOStuff; |
12 | use Maintenance; |
13 | use MediaWiki\Logger\LoggerFactory; |
14 | use MediaWiki\MediaWikiServices; |
15 | use Mediawiki\Title\Title; |
16 | use MediaWiki\Wikispeech\Segment\SegmentList; |
17 | use MediaWiki\Wikispeech\Segment\SegmentPageFactory; |
18 | use MWException; |
19 | use RequestContext; |
20 | use WANObjectCache; |
21 | |
22 | /** @var string MediaWiki installation path */ |
23 | $IP = getenv( 'MW_INSTALL_PATH' ); |
24 | if ( $IP === false ) { |
25 | $IP = __DIR__ . '/../../..'; |
26 | } |
27 | require_once "$IP/maintenance/Maintenance.php"; |
28 | |
29 | /** |
30 | * Maintenance script to evaluate interesting resource use metrics |
31 | * related to executing Wikispeech and Speechoid on a page. |
32 | * |
33 | * php extensions/Wikispeech/maintenance/benchmark.php -p Barack_Obama |
34 | * |
35 | * @since 0.1.8 |
36 | */ |
37 | class Benchmark extends Maintenance { |
38 | |
39 | /** @var VoiceHandler */ |
40 | private $voiceHandler; |
41 | |
42 | /** @var SegmentPageFactory */ |
43 | private $segmentPageFactory; |
44 | |
45 | /** @var SpeechoidConnector */ |
46 | private $speechoidConnector; |
47 | |
48 | /** @var bool Whether or not ctrl-c has been pressed. */ |
49 | private $caughtSigInt; |
50 | |
51 | /** @var SegmentList */ |
52 | private $segments; |
53 | |
54 | /** @var int */ |
55 | private $synthesizeResponseTimeoutSeconds; |
56 | |
57 | /** @var float|int */ |
58 | private $millisecondsSpentSegmenting; |
59 | |
60 | /** @var int */ |
61 | private $numberOfSuccessfullySynthesizedSegments; |
62 | |
63 | /** @var int|float */ |
64 | private $totalMillisecondsSpentSynthesizing; |
65 | |
66 | /** @var int */ |
67 | private $totalMillisecondsSynthesizedVoice; |
68 | |
69 | /** @var int */ |
70 | private $totalNumberOfTokensSynthesizedVoice; |
71 | |
72 | /** @var int */ |
73 | private $totalBytesSynthesizedVoice; |
74 | |
75 | /** @var int */ |
76 | private $totalNumberOfTokenCharactersSynthesizedVoice; |
77 | |
78 | /** @var string */ |
79 | private $language; |
80 | |
81 | /** @var string */ |
82 | private $voice; |
83 | |
84 | /** @var Title */ |
85 | private $title; |
86 | |
87 | /** |
88 | * Benchmark constructor. |
89 | * |
90 | * @since 0.1.8 |
91 | */ |
92 | public function __construct() { |
93 | parent::__construct(); |
94 | $this->requireExtension( 'Wikispeech' ); |
95 | $this->addDescription( 'Benchmark use of resources.' ); |
96 | $this->addOption( |
97 | 'language', |
98 | 'Synthesized language. If not set, page language is selected.', |
99 | false, |
100 | true, |
101 | 'l' |
102 | ); |
103 | $this->addOption( |
104 | 'voice', |
105 | 'Synthesized voice. If not set, default voice for language is selected.', |
106 | false, |
107 | true, |
108 | 'v' |
109 | ); |
110 | $this->addOption( |
111 | 'page', |
112 | 'Title of page to be segmented and synthesized.', |
113 | true, |
114 | true, |
115 | 'p' |
116 | ); |
117 | $this->addOption( |
118 | 'timeout', |
119 | 'Maximum number of seconds to await Speechoid synthesize HTTP response. Defaults to 240.', |
120 | false, |
121 | true, |
122 | 't' |
123 | ); |
124 | |
125 | $this->caughtSigInt = false; |
126 | declare( ticks = 1 ); |
127 | pcntl_async_signals( true ); |
128 | pcntl_signal( SIGINT, [ $this, 'signalHandler' ] ); |
129 | } |
130 | |
131 | /** |
132 | * Clean ctrl-c |
133 | */ |
134 | public function signalHandler() { |
135 | $this->caughtSigInt = true; |
136 | } |
137 | |
138 | private function executeSetUp() { |
139 | // Non PHP core classes aren't available prior to this point, |
140 | // i.e. we can't initialize the fields in the constructor, |
141 | // and we have to be lenient for mocked instances set by tests. |
142 | |
143 | $config = MediaWikiServices::getInstance() |
144 | ->getConfigFactory() |
145 | ->makeConfig( 'wikispeech' ); |
146 | $requestFactory = MediaWikiServices::getInstance() |
147 | ->getHttpRequestFactory(); |
148 | |
149 | $emptyWanCache = new WANObjectCache( [ 'cache' => new EmptyBagOStuff() ] ); |
150 | |
151 | $logger = LoggerFactory::getInstance( 'Wikispeech' ); |
152 | |
153 | if ( !$this->speechoidConnector ) { |
154 | $this->speechoidConnector = new SpeechoidConnector( $config, $requestFactory ); |
155 | } |
156 | if ( !$this->voiceHandler ) { |
157 | $this->voiceHandler = new VoiceHandler( |
158 | $logger, |
159 | $config, |
160 | $this->speechoidConnector, |
161 | $emptyWanCache |
162 | ); |
163 | } |
164 | if ( !$this->segmentPageFactory ) { |
165 | $this->segmentPageFactory = new SegmentPageFactory( |
166 | $emptyWanCache, |
167 | MediaWikiServices::getInstance()->getConfigFactory() |
168 | ); |
169 | $this->segmentPageFactory |
170 | ->setUseSegmentsCache( false ) |
171 | ->setUseRevisionPropertiesCache( false ) |
172 | ->setContextSource( new RequestContext() ) |
173 | ->setRevisionStore( MediaWikiServices::getInstance()->getRevisionStore() ); |
174 | } |
175 | } |
176 | |
177 | private function executeValidateInput() { |
178 | $this->language = ''; |
179 | $this->voice = ''; |
180 | $this->title = Title::newFromText( $this->getOption( 'page' ) ); |
181 | if ( !$this->title->isKnown() ) { |
182 | $this->output( "Error: Page is not known.\n" ); |
183 | return false; |
184 | } |
185 | if ( $this->title->isSpecialPage() ) { |
186 | $this->output( "Error: Page is a SpecialPage.\n" ); |
187 | return false; |
188 | } |
189 | |
190 | if ( !$this->getOption( 'language', false ) ) { |
191 | $language = $this->title->getPageLanguage(); |
192 | if ( !$language ) { |
193 | $this->output( "Error: Unable to read language for page. Use parameter language.\n" ); |
194 | return false; |
195 | } |
196 | $this->language = $language->getCode(); |
197 | $this->output( "Language $this->language set from page default.\n" ); |
198 | } else { |
199 | $this->language = $this->getOption( 'language' ); |
200 | $this->output( "Language $this->language set from option.\n" ); |
201 | // todo validate language |
202 | } |
203 | |
204 | if ( !$this->getOption( 'voice', false ) ) { |
205 | $this->voice = $this->voiceHandler->getDefaultVoice( $this->language ); |
206 | if ( !$this->voice ) { |
207 | // This will never occur unless underlying default voice logic change. |
208 | // I.e. if the default voice cannot be found |
209 | // then your language must not be defined (in Speechoid or locally) |
210 | $this->output( "Error: No default voice for language $this->language. Use parameter voice.\n" ); |
211 | return false; |
212 | } |
213 | $this->output( "Voice $this->voice set from default for language $this->language.\n" ); |
214 | } else { |
215 | $this->voice = $this->getOption( 'voice' ); |
216 | $this->output( "Voice $this->voice set from option.\n" ); |
217 | // todo validate voice of language |
218 | } |
219 | |
220 | $this->synthesizeResponseTimeoutSeconds = intval( |
221 | $this->getOption( 'timeout', 240 ) |
222 | ); |
223 | |
224 | return true; |
225 | } |
226 | |
227 | private function executeSegmenting() { |
228 | // @todo consider adding revision as script parameter. |
229 | // Setting null will requests the most recent for the title. |
230 | $revisionId = null; |
231 | |
232 | $this->output( 'Benchmarking page ' . |
233 | "{$this->title->getText()} using language " . |
234 | "$this->language and voice " . |
235 | "$this->voice.\n" |
236 | ); |
237 | |
238 | // We don't want to count time spent rendering to segmenting time, |
239 | // so we call the segmenter twice. Segmenting cache is turned off. |
240 | $this->output( "Allowing for MediaWiki to render page...\n" ); |
241 | $this->segmentPageFactory->segmentPage( |
242 | $this->title, |
243 | $revisionId |
244 | ); |
245 | |
246 | $this->output( "Segmenting...\n" ); |
247 | $startSegmenting = microtime( true ) * 1000; |
248 | $segments = $this->segmentPageFactory->segmentPage( |
249 | $this->title, |
250 | $revisionId |
251 | )->getSegments(); |
252 | if ( $segments === null ) { |
253 | throw new MWException( 'Segments is null!' ); |
254 | } |
255 | $this->segments = $segments; |
256 | $endSegmenting = microtime( true ) * 1000; |
257 | $this->millisecondsSpentSegmenting = $endSegmenting - $startSegmenting; |
258 | } |
259 | |
260 | private function executeSynthesizing() { |
261 | $this->numberOfSuccessfullySynthesizedSegments = 0; |
262 | |
263 | $this->totalBytesSynthesizedVoice = 0; |
264 | $this->totalNumberOfTokenCharactersSynthesizedVoice = 0; |
265 | $this->totalNumberOfTokensSynthesizedVoice = 0; |
266 | $this->totalMillisecondsSynthesizedVoice = 0; |
267 | $this->output( 'Synthesizing ' . count( $this->segments->getSegments() ) . " segments... \n" ); |
268 | $this->output( "Press ^C to abort and calculate on evaluated state.\n" ); |
269 | $this->totalMillisecondsSpentSynthesizing = 0; |
270 | |
271 | $failures = ''; |
272 | |
273 | $progressCounterLength = 40; |
274 | $segmentCounter = 0; |
275 | $progressCounter = 0; |
276 | foreach ( $this->segments->getSegments() as $segment ) { |
277 | if ( $this->caughtSigInt ) { |
278 | break; |
279 | } |
280 | $segmentCounter++; |
281 | |
282 | $segmentText = ''; |
283 | foreach ( $segment->getContent() as $content ) { |
284 | $segmentText .= $content->getString(); |
285 | } |
286 | |
287 | $attempt = 0; |
288 | $maximumAttempts = 3; |
289 | $retriesLeft = $maximumAttempts; |
290 | while ( true ) { |
291 | $attempt++; |
292 | $startSynthesizing = microtime( true ) * 1000; |
293 | try { |
294 | $speechoidResponse = $this->speechoidConnector->synthesizeText( |
295 | $this->language, $this->voice, $segmentText, $this->synthesizeResponseTimeoutSeconds |
296 | ); |
297 | $endSynthesizing = microtime( true ) * 1000; |
298 | $millisecondsSpentSynthesizingSegment = $endSynthesizing - $startSynthesizing; |
299 | $this->totalMillisecondsSpentSynthesizing += $millisecondsSpentSynthesizingSegment; |
300 | |
301 | $bytesSynthesizedVoiceInSegment = mb_strlen( $speechoidResponse['audio_data'] ); |
302 | $this->totalBytesSynthesizedVoice += $bytesSynthesizedVoiceInSegment; |
303 | |
304 | $numberOfTokensInSegment = count( $speechoidResponse[ 'tokens' ] ); |
305 | $this->totalNumberOfTokensSynthesizedVoice += $numberOfTokensInSegment; |
306 | |
307 | $millisecondsSynthesizedVoiceInSegment = |
308 | $speechoidResponse['tokens'][ $numberOfTokensInSegment - 1 ]['endtime']; |
309 | $this->totalMillisecondsSynthesizedVoice += $millisecondsSynthesizedVoiceInSegment; |
310 | |
311 | $charactersInSegmentTokens = 0; |
312 | foreach ( $speechoidResponse['tokens'] as $token ) { |
313 | $charactersInSegmentTokens += mb_strlen( $token['orth'] ); |
314 | } |
315 | $this->totalNumberOfTokenCharactersSynthesizedVoice += $charactersInSegmentTokens; |
316 | |
317 | if ( $attempt > 1 ) { |
318 | $this->output( strval( $attempt ) ); |
319 | } else { |
320 | $this->output( '.' ); |
321 | } |
322 | $this->numberOfSuccessfullySynthesizedSegments++; |
323 | } catch ( SpeechoidConnectorException $speechoidConnectorException ) { |
324 | $millisecondsSpentBeforeException = ( microtime( true ) * 1000 ) - $startSynthesizing; |
325 | $failures .= "\nException $millisecondsSpentBeforeException milliseconds after request.\n"; |
326 | $failures .= $speechoidConnectorException->getMessage() . "\n"; |
327 | $retriesLeft--; |
328 | if ( $retriesLeft == 0 ) { |
329 | $failures .= "Giving up after attempt #$attempt. Segment ignored.\n"; |
330 | $failures .= $segmentText; |
331 | $failures .= "\n"; |
332 | $this->output( 'E' ); |
333 | } else { |
334 | continue; |
335 | } |
336 | } |
337 | $progressCounter++; |
338 | if ( $progressCounter === $progressCounterLength ) { |
339 | $progressCounter = 0; |
340 | |
341 | $eta = ', ETA ~'; |
342 | $meanMillisecondsSpentSynthesizingPerSegment = |
343 | $this->totalMillisecondsSpentSynthesizing / $this->numberOfSuccessfullySynthesizedSegments; |
344 | $millisecondsEta = intval( count( $this->segments->getSegments() ) - $segmentCounter ) |
345 | * $meanMillisecondsSpentSynthesizingPerSegment; |
346 | if ( $millisecondsEta < 1000 ) { |
347 | $eta .= $millisecondsEta . ' ms'; |
348 | } elseif ( $millisecondsEta < 1000 * 60 ) { |
349 | $eta .= intdiv( $millisecondsEta, 1000 ) . ' seconds'; |
350 | } else { |
351 | $eta .= intdiv( $millisecondsEta, 1000 * 60 ) . ' minutes'; |
352 | } |
353 | $eta .= ' (~' . intdiv( $meanMillisecondsSpentSynthesizingPerSegment, 1000 ) . 's/seg)'; |
354 | $this->output( |
355 | ' ' . |
356 | $segmentCounter . ' / ' . count( $this->segments->getSegments() ) . |
357 | $eta . "\n" |
358 | ); |
359 | } |
360 | break; |
361 | } |
362 | } |
363 | |
364 | if ( $failures ) { |
365 | $this->output( "\n" ); |
366 | $this->output( $failures ); |
367 | $this->output( "\n" ); |
368 | } |
369 | } |
370 | |
371 | /** |
372 | * @since 0.1.8 |
373 | * @return bool success |
374 | */ |
375 | public function execute() { |
376 | $this->executeSetUp(); |
377 | if ( !$this->executeValidateInput() ) { |
378 | return false; |
379 | } |
380 | $this->executeSegmenting(); |
381 | $this->executeSynthesizing(); |
382 | |
383 | $this->output( "\n\n" ); |
384 | $this->output( "Benchmark results\n" ); |
385 | $this->output( "-----------------\n" ); |
386 | $this->output( "\n" ); |
387 | |
388 | $this->output( 'Number of segments: ' . |
389 | count( $this->segments->getSegments() ) . "\n" ); |
390 | $this->output( "Milliseconds spent segmenting: $this->millisecondsSpentSegmenting\n" ); |
391 | |
392 | $meanMillisecondsSpentSegmentingPerSegment = |
393 | $this->millisecondsSpentSegmenting / count( $this->segments->getSegments() ); |
394 | |
395 | $this->output( 'Mean milliseconds spent segmenting per segment: ' . |
396 | "$meanMillisecondsSpentSegmentingPerSegment\n" ); |
397 | |
398 | if ( $this->numberOfSuccessfullySynthesizedSegments === 0 ) { |
399 | $this->output( "Nothing synthesized, no further metrics available.\n" ); |
400 | exit( 1 ); |
401 | } |
402 | |
403 | $this->totalMillisecondsSpentSynthesizing = intval( $this->totalMillisecondsSpentSynthesizing ); |
404 | $this->totalMillisecondsSynthesizedVoice = intval( $this->totalMillisecondsSynthesizedVoice ); |
405 | |
406 | $meanMillisecondsSynthesizingPerToken = |
407 | $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice; |
408 | $meanMillisecondsSynthesizingPerCharacter = |
409 | $this->totalMillisecondsSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice; |
410 | $meanBytesSynthesizedVoicePerToken = |
411 | $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokensSynthesizedVoice; |
412 | $meanBytesSynthesizedVoicePerCharacter = |
413 | $this->totalBytesSynthesizedVoice / $this->totalNumberOfTokenCharactersSynthesizedVoice; |
414 | |
415 | $meanTokensPerSegment = |
416 | $this->totalNumberOfTokensSynthesizedVoice / $this->numberOfSuccessfullySynthesizedSegments; |
417 | $meanTokenCharactersPerSegment = |
418 | $this->totalNumberOfTokenCharactersSynthesizedVoice / |
419 | $this->numberOfSuccessfullySynthesizedSegments; |
420 | |
421 | $meanMillisecondsSpentSegmentingPerToken = |
422 | ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) / |
423 | $this->totalNumberOfTokensSynthesizedVoice; |
424 | $meanMillisecondsSpentSegmentingPerTokenCharacter = |
425 | ( $meanMillisecondsSpentSegmentingPerSegment * $this->numberOfSuccessfullySynthesizedSegments ) / |
426 | $this->totalNumberOfTokenCharactersSynthesizedVoice; |
427 | |
428 | $this->output( 'Mean milliseconds spent segmenting per token synthesized: ' . |
429 | "$meanMillisecondsSpentSegmentingPerToken\n" ); |
430 | $this->output( 'Mean milliseconds spent segmenting per token character synthesized: ' . |
431 | "$meanMillisecondsSpentSegmentingPerTokenCharacter\n" ); |
432 | |
433 | if ( $this->numberOfSuccessfullySynthesizedSegments != count( $this->segments->getSegments() ) ) { |
434 | $this->output( 'Warning! Not all segments synthesized, ' . |
435 | "mean segmenting per token values might be slightly off.\n" ); |
436 | } |
437 | |
438 | $this->output( "\n" ); |
439 | |
440 | $this->output( 'Number of synthesized segments: ' . |
441 | "$this->numberOfSuccessfullySynthesizedSegments\n" ); |
442 | $this->output( "Number of synthesized tokens: $this->totalNumberOfTokensSynthesizedVoice\n" ); |
443 | $this->output( 'Number of synthesized token characters: ' . |
444 | "$this->totalNumberOfTokenCharactersSynthesizedVoice\n" ); |
445 | |
446 | $this->output( "\n" ); |
447 | |
448 | $this->output( "Mean number of tokens per synthesized segment: $meanTokensPerSegment\n" ); |
449 | $this->output( 'Mean number of token characters per synthesized segment: ' . |
450 | "$meanTokenCharactersPerSegment\n" ); |
451 | |
452 | $this->output( "\n" ); |
453 | |
454 | $this->output( 'Mean milliseconds synthesizing per token: ' . |
455 | "$meanMillisecondsSynthesizingPerToken\n" ); |
456 | $this->output( 'Mean milliseconds synthesizing per token character: ' . |
457 | "$meanMillisecondsSynthesizingPerCharacter\n" ); |
458 | |
459 | $this->output( 'Mean bytes synthesized voice per token: ' . |
460 | intval( $meanBytesSynthesizedVoicePerToken ) . "\n" ); |
461 | $this->output( 'Mean bytes synthesized voice per token character: ' . |
462 | intval( $meanBytesSynthesizedVoicePerCharacter ) . "\n" ); |
463 | |
464 | $this->output( "\n" ); |
465 | |
466 | $this->output( "Milliseconds of synthesized voice: $this->totalMillisecondsSynthesizedVoice\n" ); |
467 | $this->output( 'Seconds of synthesized voice: ' . |
468 | intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 ) . "\n" ); |
469 | $this->output( 'Minutes of synthesized voice: ' . |
470 | intdiv( $this->totalMillisecondsSynthesizedVoice, 1000 * 60 ) . "\n" ); |
471 | |
472 | $this->output( "\n" ); |
473 | |
474 | $this->output( "Milliseconds spent synthesizing: $this->totalMillisecondsSpentSynthesizing\n" ); |
475 | $this->output( 'Seconds spent synthesizing: ' . |
476 | intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 ) . "\n" ); |
477 | $this->output( 'Minutes spent synthesizing: ' . |
478 | intdiv( $this->totalMillisecondsSpentSynthesizing, 1000 * 60 ) . "\n" ); |
479 | |
480 | $this->output( "\n" ); |
481 | |
482 | $this->output( "Synthesized voice bytes: $this->totalBytesSynthesizedVoice\n" ); |
483 | $this->output( 'Synthesized voice kilobytes: ' . |
484 | intdiv( $this->totalBytesSynthesizedVoice, 1024 ) . "\n" ); |
485 | $this->output( 'Synthesized voice megabytes: ' . |
486 | intdiv( $this->totalBytesSynthesizedVoice, 1024 * 1024 ) . "\n" ); |
487 | |
488 | return true; |
489 | } |
490 | |
491 | } |
492 | |
493 | /** @var string This class, required to start via Maintenance. */ |
494 | $maintClass = Benchmark::class; |
495 | |
496 | require_once RUN_MAINTENANCE_IF_MAIN; |