Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
78.90% covered (warning)
78.90%
258 / 327
50.00% covered (danger)
50.00%
11 / 22
CRAP
0.00% covered (danger)
0.00%
0 / 1
UtteranceStore
78.90% covered (warning)
78.90%
258 / 327
50.00% covered (danger)
50.00%
11 / 22
93.82
0.00% covered (danger)
0.00%
0 / 1
 __construct
61.54% covered (warning)
61.54%
8 / 13
0.00% covered (danger)
0.00%
0 / 1
2.23
 getFileBackend
8.00% covered (danger)
8.00%
2 / 25
0.00% covered (danger)
0.00%
0 / 1
16.46
 findUtterance
80.00% covered (warning)
80.00%
24 / 30
0.00% covered (danger)
0.00%
0 / 1
5.20
 retrieveUtteranceMetadata
100.00% covered (success)
100.00%
31 / 31
100.00% covered (success)
100.00%
1 / 1
3
 retrieveFileContents
62.50% covered (warning)
62.50%
5 / 8
0.00% covered (danger)
0.00%
0 / 1
2.21
 createUtterance
100.00% covered (success)
100.00%
35 / 35
100.00% covered (success)
100.00%
1 / 1
1
 storeFile
72.22% covered (warning)
72.22%
13 / 18
0.00% covered (danger)
0.00%
0 / 1
5.54
 flushUtterancesByExpirationDate
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 flushUtterancesByPage
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
1
 flushUtterancesByLanguageAndVoice
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 flushUtterances
85.00% covered (warning)
85.00%
34 / 40
0.00% covered (danger)
0.00%
0 / 1
9.27
 deleteFileBackendFile
42.86% covered (danger)
42.86%
9 / 21
0.00% covered (danger)
0.00%
0 / 1
4.68
 urlPathFactory
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 audioUrlPrefixFactory
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 audioUrlFactory
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 synthesisMetadataUrlFactory
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 flushUtterancesByExpirationDateOnFile
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
2.01
 recurseFlushUtterancesByExpirationDateOnFile
91.11% covered (success)
91.11%
41 / 45
0.00% covered (danger)
0.00%
0 / 1
7.03
 getWikispeechUtteranceExpirationTimestamp
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 getWikispeechUtteranceTimeToLiveDays
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 isWikispeechUtteranceUseSwiftFileBackendExpiring
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 evaluateRemoteWikiHash
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
5
1<?php
2
3namespace MediaWiki\Wikispeech\Utterance;
4
5/**
6 * @file
7 * @ingroup Extensions
8 * @license GPL-2.0-or-later
9 */
10
11use Config;
12use ExternalStoreException;
13use FileBackend;
14use FSFileBackend;
15use MediaWiki\Logger\LoggerFactory;
16use MediaWiki\MediaWikiServices;
17use MWTimestamp;
18use Psr\Log\LoggerInterface;
19use SwiftFileBackend;
20use WikiMap;
21use Wikimedia\Rdbms\IDatabase;
22use Wikimedia\Rdbms\ILoadBalancer;
23use Wikimedia\Rdbms\IResultWrapper;
24
25/**
26 * Keeps track of utterances in persistent layers.
27 *
28 * Utterance metadata (i.e. segment hash, page id, language, etc) is stored in a database table.
29 * Utterance audio is (synthesised voice audio) is stored as an opus file in file backend.
30 * Synthesis metadata (tokens, etc) is stored as a JSON file in file backend.
31 *
32 * (.opus and .json suffixes are added in file backed store although this class is agnostic
33 * regarding to the actual data encoding and formats.)
34 *
35 * @since 0.1.5
36 */
37class UtteranceStore {
38
39    /** @var string Name of database table that keeps track of utterance metadata. */
40    public const UTTERANCE_TABLE = 'wikispeech_utterance';
41
42    /** @var LoggerInterface */
43    private $logger;
44
45    /**
46     * Don't use this directly, access @see getFileBackend
47     * @var FileBackend Used to store utterance audio and synthesis metadata.
48     */
49    private $fileBackend;
50
51    /**
52     * @var ILoadBalancer
53     */
54    private $dbLoadBalancer;
55
56    /** @var string Name of container (sort of path prefix) used for files in backend. */
57    private $fileBackendContainerName;
58
59    /** @var Config */
60    private $config;
61
62    public function __construct() {
63        $this->logger = LoggerFactory::getInstance( 'Wikispeech' );
64
65        // @todo don't create, add as constructor parameter
66        // Refer to https://phabricator.wikimedia.org/T264165
67        $this->config = MediaWikiServices::getInstance()
68            ->getConfigFactory()
69            ->makeConfig( 'wikispeech' );
70
71        $this->fileBackendContainerName = $this->config
72            ->get( 'WikispeechUtteranceFileBackendContainerName' );
73        if ( !$this->fileBackendContainerName ) {
74            $this->fileBackendContainerName = "wikispeech-utterances";
75            $this->logger->info( __METHOD__ . ': ' .
76                'Falling back on container name {containerName}', [
77                    'containerName' => $this->fileBackendContainerName
78            ] );
79        }
80
81        $this->dbLoadBalancer = MediaWikiServices::getInstance()->getDBLoadBalancer();
82    }
83
84    /**
85     * @since 0.1.5
86     * @return FileBackend
87     * @throws ExternalStoreException If defined file backend group does not exists.
88     */
89    private function getFileBackend() {
90        global $wgUploadDirectory;
91        if ( !$this->fileBackend ) {
92
93            /** @var string Name of file backend group in LocalSettings.php to use. */
94            $fileBackendName = $this->config->get( 'WikispeechUtteranceFileBackendName' );
95            if ( !$fileBackendName ) {
96                $fileBackendName = 'wikispeech-backend';
97                $fallbackDir = "$wgUploadDirectory/wikispeech_utterances";
98                $this->logger->info( __METHOD__ . ': ' .
99                    'No file backend defined in LocalSettings.php. Falling back ' .
100                    'on FS storage backend named {name} in {dir}.', [
101                        'name' => $fileBackendName,
102                        'dir' => $fallbackDir
103                ] );
104                $this->fileBackend = new FSFileBackend( [
105                    'name' => $fileBackendName,
106                    'wikiId' => WikiMap::getCurrentWikiId(),
107                    'basePath' => $fallbackDir
108                ] );
109            } else {
110                $fileBackend = MediaWikiServices::getInstance()
111                    ->getFileBackendGroup()
112                    ->get( $fileBackendName );
113                if ( $fileBackend ) {
114                    $this->fileBackend = $fileBackend;
115                } else {
116                    throw new ExternalStoreException(
117                        "No file backend group in LocalSettings.php named $fileBackendName."
118                    );
119                }
120            }
121        }
122        return $this->fileBackend;
123    }
124
125    /**
126     * Retrieves an utterance for a given segment in a page, using a specific
127     * voice and language.
128     *
129     * @since 0.1.5
130     * @param string|null $consumerUrl Remote wiki where page is located, or null if local.
131     * @param int $pageId Mediawiki page ID.
132     * @param string $language ISO-639.
133     * @param string $voice Name of synthesis voice.
134     * @param string $segmentHash Hash of segment representing utterance.
135     * @param bool $omitAudio If true, then no audio is returned.
136     * @return Utterance|null Utterance found, or null if non-existing.
137     */
138    public function findUtterance(
139        ?string $consumerUrl,
140        int $pageId,
141        string $language,
142        string $voice,
143        string $segmentHash,
144        bool $omitAudio = false
145    ): ?Utterance {
146        $utterance = $this->retrieveUtteranceMetadata(
147            $consumerUrl,
148            $pageId,
149            $language,
150            $voice,
151            $segmentHash
152        );
153        if ( !$utterance ) {
154            return null;
155        }
156
157        // load utterance audio and synthesis metadata
158        $utteranceId = $utterance->getUtteranceId();
159
160        // @note We might want to keep this as separate function calls,
161        // allowing the user to request when needed, and perhaps
162        // pass a stream straight down from file backend to user
163        // rather than bouncing it via RAM.
164        // Not sure if this is an existing thing in PHP though.
165
166        if ( !$omitAudio ) {
167            $audioSrc = $this->audioUrlFactory( $utteranceId );
168            try {
169                $utterance->setAudio( $this->retrieveFileContents(
170                    $audioSrc,
171                    $utteranceId,
172                    'audio file'
173                ) );
174            } catch ( ExternalStoreException $e ) {
175                $this->logger->warning( __METHOD__ . ': ' . $e->getMessage() );
176                return null;
177            }
178        }
179
180        $synthesisMetadataSrc = $this->synthesisMetadataUrlFactory( $utteranceId );
181        try {
182            $utterance->setSynthesisMetadata( $this->retrieveFileContents(
183                $synthesisMetadataSrc,
184                $utteranceId,
185                'synthesis metadata file'
186            ) );
187        } catch ( ExternalStoreException $e ) {
188            $this->logger->warning( __METHOD__ . ': ' . $e->getMessage() );
189            return null;
190        }
191
192        return $utterance;
193    }
194
195    /**
196     * Retrieves the utterance metadata from the database for a given segment in a page,
197     * using a specific voice and language.
198     *
199     * @since 0.1.5
200     * @param string|null $consumerUrl Remote wiki where page is located, or null if local.
201     * @param int $pageId Mediawiki page ID.
202     * @param string $language ISO-639.
203     * @param string $voice Name of synthesis voice.
204     * @param string $segmentHash Hash of segment representing utterance.
205     * @return Utterance|null Utterance or null if not found in database
206     */
207    public function retrieveUtteranceMetadata(
208        ?string $consumerUrl,
209        int $pageId,
210        string $language,
211        string $voice,
212        string $segmentHash
213    ): ?Utterance {
214        $remoteWikiHash = self::evaluateRemoteWikiHash( $consumerUrl );
215        $dbr = $this->dbLoadBalancer->getConnection( DB_REPLICA );
216        $row = $dbr->selectRow( self::UTTERANCE_TABLE, [
217            'wsu_utterance_id',
218            'wsu_remote_wiki_hash',
219            'wsu_page_id',
220            'wsu_lang',
221            'wsu_voice',
222            'wsu_seg_hash',
223            'wsu_date_stored'
224        ], [
225            'wsu_remote_wiki_hash' => $remoteWikiHash,
226            'wsu_page_id' => $pageId,
227            'wsu_lang' => $language,
228            'wsu_voice' => $voice,
229            'wsu_seg_hash' => $segmentHash
230        ], __METHOD__, [
231            'ORDER BY date_stored DESC',
232        ] );
233        if ( !$row ) {
234            return null;
235        }
236        $utterance = new Utterance(
237            intval( $row->wsu_utterance_id ),
238            $row->wsu_remote_wiki_hash === null ? null : strval( $row->wsu_remote_wiki_hash ),
239            intval( $row->wsu_page_id ),
240            strval( $row->wsu_lang ),
241            strval( $row->wsu_voice ),
242            strval( $row->wsu_seg_hash ),
243            MWTimestamp::getInstance( $row->wsu_date_stored )
244        );
245        return $utterance;
246    }
247
248    /**
249     * Retrieve the file contents from the backend.
250     *
251     * @since 0.1.5
252     * @param string $src
253     * @param int $utteranceId
254     * @param string $type
255     * @return mixed File contents
256     * @throws ExternalStoreException
257     */
258    public function retrieveFileContents( $src, $utteranceId, $type ) {
259        $content = $this->getFileBackend()->getFileContents( [
260            'src' => $src
261        ] );
262        if ( $content == FileBackend::CONTENT_FAIL ) {
263            // @note Consider queuing job to flush inconsistencies from database.
264            throw new ExternalStoreException(
265                "Inconsistency! Database contains utterance with ID $utteranceId " .
266                "that does not exist as $type named $src in file backend." );
267        }
268        return $content;
269    }
270
271    /**
272     * Creates an utterance in the database.
273     *
274     * @since 0.1.5
275     * @param string|null $consumerUrl
276     * @param int $pageId Mediawiki page ID.
277     * @param string $language ISO 639.
278     * @param string $voice Name of synthesis voice.
279     * @param string $segmentHash Hash of segment representing utterance.
280     * @param string $audio Utterance audio.
281     * @param string $synthesisMetadata JSON form metadata about the audio.
282     * @return Utterance Inserted utterance.
283     * @throws ExternalStoreException If unable to prepare or create files in file backend.
284     */
285    public function createUtterance(
286        ?string $consumerUrl,
287        int $pageId,
288        string $language,
289        string $voice,
290        string $segmentHash,
291        string $audio,
292        string $synthesisMetadata
293    ): Utterance {
294        $remoteWikiHash = self::evaluateRemoteWikiHash( $consumerUrl );
295        $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
296        $rows = [
297            'wsu_remote_wiki_hash' => $remoteWikiHash,
298            'wsu_page_id' => $pageId,
299            'wsu_lang' => $language,
300            'wsu_voice' => $voice,
301            'wsu_seg_hash' => $segmentHash,
302            'wsu_date_stored' => $dbw->timestamp()
303        ];
304        $dbw->insert( self::UTTERANCE_TABLE, $rows );
305        $utterance = new Utterance(
306            intval( $dbw->insertId() ),
307            $remoteWikiHash,
308            $pageId,
309            $language,
310            $voice,
311            $segmentHash,
312            MWTimestamp::getInstance( $rows['wsu_date_stored'] )
313        );
314
315        // create audio file
316        $this->storeFile(
317            $this->audioUrlFactory( $utterance->getUtteranceId() ),
318            $audio,
319            'audio file'
320        );
321        $utterance->setAudio( $audio );
322
323        // create synthesis metadata file
324        $this->storeFile(
325            $this->synthesisMetadataUrlFactory( $utterance->getUtteranceId() ),
326            $synthesisMetadata,
327            'synthesis metadata file'
328        );
329        $utterance->setSynthesisMetadata( $synthesisMetadata );
330
331        $jobQueue = new FlushUtterancesFromStoreByExpirationJobQueue();
332        $jobQueue->maybeQueueJob();
333
334        return $utterance;
335    }
336
337    /**
338     * Store a file in the backend.
339     *
340     * @since 0.1.5
341     * @param string $fileUrl
342     * @param mixed $content
343     * @param string $type
344     * @throws ExternalStoreException
345     */
346    public function storeFile( $fileUrl, $content, $type ) {
347        $fileBackend = $this->getFileBackend();
348        if ( !$fileBackend->prepare( [
349            'dir' => dirname( $fileUrl ),
350            'noAccess' => 1,
351            'noListing' => 1
352        ] )->isOK() ) {
353            throw new ExternalStoreException( "Failed to prepare $type$fileUrl." );
354        }
355        $opts = [
356            'dst' => $fileUrl,
357            'content' => $content
358        ];
359        if ( $this->isWikispeechUtteranceUseSwiftFileBackendExpiring() &&
360            $fileBackend instanceof SwiftFileBackend ) {
361            // Mark files in Swift for automatic removal after TTL.
362            // See $this->flushUtterances for code that skips forced removal if backend is Swift.
363            $opts['headers'] = [
364                // number of seconds from now
365                'X-Delete-After' => $this->getWikispeechUtteranceTimeToLiveDays() * 60 * 60 * 24
366            ];
367        }
368        if ( !$fileBackend->create( $opts )->isOK() ) {
369            throw new ExternalStoreException( "Failed to create $type$fileUrl." );
370        }
371    }
372
373    /**
374     * Clears database and file backend of utterances older than a given age.
375     *
376     * @since 0.1.5
377     * @param MWTimestamp $expirationDate
378     * @return int Number of utterances flushed.
379     */
380    public function flushUtterancesByExpirationDate( $expirationDate ) {
381        $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
382        $results = $dbw->select( self::UTTERANCE_TABLE,
383            [ 'wsu_utterance_id' ],
384            [ 1 => 'wsu_date_stored <= ' . $expirationDate->getTimestamp( TS_MW ) ]
385        );
386        return $this->flushUtterances( $dbw, $results );
387    }
388
389    /**
390     * Clears database and file backend of all utterances for a given page.
391     *
392     * @since 0.1.5
393     * @param string|null $consumerUrl
394     * @param int $pageId Mediawiki page ID.
395     * @return int Number of utterances flushed.
396     */
397    public function flushUtterancesByPage(
398        ?string $consumerUrl,
399        int $pageId
400    ): int {
401        $remoteWikiHash = self::evaluateRemoteWikiHash( $consumerUrl );
402        $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
403        $results = $dbw->select( self::UTTERANCE_TABLE,
404            [ 'wsu_utterance_id' ],
405            [
406                'wsu_remote_wiki_hash' => $remoteWikiHash,
407                'wsu_page_id' => $pageId
408            ]
409        );
410        return $this->flushUtterances( $dbw, $results );
411    }
412
413    /**
414     * Clears database and file backend of all utterances for a given language and voice.
415     * If no voice is set, then all voices will be removed.
416     *
417     * @since 0.1.5
418     * @param string $language ISO 639.
419     * @param string|null $voice Optional name of synthesis voice to limit flush to.
420     * @return int Number of utterances flushed.
421     */
422    public function flushUtterancesByLanguageAndVoice( $language, $voice = null ) {
423        $conditions = [
424            'wsu_lang' => $language
425        ];
426        if ( $voice != null ) {
427            $conditions['wsu_voice'] = $voice;
428        }
429        $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
430        $results = $dbw->select( self::UTTERANCE_TABLE,
431            [ 'wsu_utterance_id' ], $conditions
432        );
433        return $this->flushUtterances( $dbw, $results );
434    }
435
436    /**
437     * Flushes utterances listed in a result set containing
438     * at least the wsu_utterance_id column.
439     *
440     * In order for return value to increase, the utterance must have been
441     * successfully deleted in all layers, i.e. utterance metadata database row,
442     * utterance audio and synthesis metadata from file store.
443     * E.g. if the utterance audio file is missing and thus not explicitly removed,
444     * but at the same time we managed to remove the utterance metadata from database
445     * and also removed the synthesis metadata file, this will not count as a
446     * successfully removed utterance. It would however be removed from all layers
447     * and it would also cause an out-of-sync warning in the log.
448     *
449     * @note Consider if database should be flushing within a transaction.
450     *
451     * @since 0.1.5
452     * @param IDatabase $dbw Writable database connection.
453     * @param IResultWrapper $results Result set.
454     * @return int Number of utterances that were successfully flushed in all layers.
455     */
456    private function flushUtterances( $dbw, $results ) {
457        if ( !$results ) {
458            return 0;
459        }
460
461        // TTL is set when creating files in Swift, so no need to invoke any delete I/O operations.
462        $flushInFileBackend = !(
463            $this->isWikispeechUtteranceUseSwiftFileBackendExpiring() &&
464            $this->getFileBackend() instanceof SwiftFileBackend
465        );
466
467        $successfullyFlushedCounter = 0;
468        foreach ( $results as $row ) {
469            $utteranceId = $row->wsu_utterance_id;
470
471            // 1. delete in database
472            $successfullyDeletedTableRow = $dbw->delete(
473                self::UTTERANCE_TABLE,
474                [ 'wsu_utterance_id' => $utteranceId ],
475                __METHOD__
476            );
477            if ( !$successfullyDeletedTableRow ) {
478                $this->logger->warning( __METHOD__ . ': ' .
479                    'Failed to delete utterance {utteranceId} from database.', [
480                        'utteranceId' => $utteranceId
481                ] );
482            } else {
483                $this->logger->debug( __METHOD__ . ': ' .
484                    'Flushed out utterance with id {utteranceId} from database', [
485                        'utteranceId' => $utteranceId
486                ] );
487            }
488
489            // 2. delete in file store.
490            if ( $flushInFileBackend ) {
491                $successfullyDeletedAudioFile = $this->deleteFileBackendFile(
492                    $this->audioUrlFactory( $utteranceId ),
493                    $utteranceId,
494                    'audio file'
495                );
496                $successfullyDeletedSynthesisMetadataFile = $this->deleteFileBackendFile(
497                    $this->synthesisMetadataUrlFactory( $utteranceId ),
498                    $utteranceId,
499                    'synthesis metadata file'
500                );
501                $successfullyDeletedFiles =
502                    $successfullyDeletedAudioFile && $successfullyDeletedSynthesisMetadataFile;
503            } else {
504                // The files were marked for automatic deletion using TTL in the Swift create operation.
505                $successfullyDeletedFiles = true;
506            }
507
508            if ( $successfullyDeletedTableRow && $successfullyDeletedFiles ) {
509                $successfullyFlushedCounter++;
510            }
511        }
512
513        return $successfullyFlushedCounter;
514    }
515
516    /**
517     * @since 0.1.5
518     * @param string $src
519     * @param int $utteranceId
520     * @param string $type
521     * @return bool If successfully deleted
522     */
523    private function deleteFileBackendFile( $src, $utteranceId, $type ) {
524        $synthesisMetadataFile = [
525            'src' => $src
526        ];
527        if ( $this->getFileBackend()->fileExists( $synthesisMetadataFile ) ) {
528            if ( !$this->getFileBackend()->delete( $synthesisMetadataFile )->isOK() ) {
529                $this->logger->warning( __METHOD__ . ': ' .
530                    'Unable to delete {type} for utterance with identity {utteranceId}.', [
531                        'utteranceId' => $utteranceId,
532                        'type' => $type
533                ] );
534                return false;
535            } else {
536                $this->getFileBackend()->clean( [ 'dir' => $this->urlPathFactory( $utteranceId ) ] );
537            }
538        } else {
539            $this->logger->warning( __METHOD__ . ': ' .
540                'Attempted to delete non existing {type} for utterance {utteranceId}.', [
541                    'utteranceId' => $utteranceId,
542                    'type' => $type
543            ] );
544            return false;
545        }
546        $this->logger->debug( __METHOD__ . ': ' .
547            'Flushed out file {src}', [ 'src' => $src ] );
548        return true;
549    }
550
551    /**
552     * Creates a deterministic path based on utterance identity,
553     * causing no more than 1000 files and 10 subdirectories per directory.
554     * (Actually, 2000 files, as we store both .json and .opus)
555     *
556     * Overloading a directory with files often cause performance problems.
557     *
558     * 1 -> /
559     * 12 -> /
560     * 123 -> /
561     * 1234 -> /1/
562     * 12345 -> /1/2/
563     * 123456 -> /1/2/3/
564     * 1234567 -> /1/2/3/4/
565     *
566     * @since 0.1.5
567     * @param int $utteranceId
568     * @return string Path
569     */
570    private function urlPathFactory( $utteranceId ) {
571        $path = '/';
572        $utteranceIdText = strval( $utteranceId );
573        $utteranceIdTextLength = strlen( $utteranceIdText );
574        for ( $index = 0; $index < $utteranceIdTextLength - 3; $index++ ) {
575            $path .= substr( $utteranceIdText, $index, 1 ) . '/';
576        }
577        return $path;
578    }
579
580    /**
581     * @since 0.1.5
582     * @param int $utteranceId Utterance identity.
583     * @return string url used to access object in file store
584     */
585    private function audioUrlPrefixFactory( $utteranceId ) {
586        return $this->getFileBackend()->getContainerStoragePath( $this->fileBackendContainerName )
587            . $this->urlPathFactory( $utteranceId ) . $utteranceId;
588    }
589
590    /**
591     * @since 0.1.5
592     * @param int $utteranceId Utterance identity.
593     * @return string url used to access object in file store
594     */
595    private function audioUrlFactory( $utteranceId ) {
596        return $this->audioUrlPrefixFactory( $utteranceId ) . '.opus';
597    }
598
599    /**
600     * @since 0.1.5
601     * @param int $utteranceId Utterance identity.
602     * @return string url used to access object in file store
603     */
604    private function synthesisMetadataUrlFactory( $utteranceId ) {
605        return $this->audioUrlPrefixFactory( $utteranceId ) . '.json';
606    }
607
608    /**
609     * Removes expired utterance and synthesis metadata from the file backend.
610     *
611     * @since 0.1.7
612     * @param MWTimestamp|null $expiredTimestamp File timestamp <= to this value is orphaned.
613     *  Defaults to config value.
614     * @return int Number of expired files flushed
615     */
616    public function flushUtterancesByExpirationDateOnFile( $expiredTimestamp = null ) {
617        // @note Either this method, or the job,
618        // should probably call `flushUtterancesByExpirationDate`
619        // to ensure we are not deleting a bunch of files
620        // which were scheduled to be deleted together with their db-entries anyway.
621
622        if ( !$expiredTimestamp ) {
623            $expiredTimestamp = $this->getWikispeechUtteranceExpirationTimestamp();
624        }
625        $fileBackend = $this->getFileBackend();
626        return $this->recurseFlushUtterancesByExpirationDateOnFile(
627            $fileBackend,
628            $this->getFileBackend()
629                ->getContainerStoragePath( $this->fileBackendContainerName ),
630            $expiredTimestamp
631        );
632    }
633
634    /**
635     * @since 0.1.7
636     * @param FileBackend $fileBackend
637     * @param string $directory
638     * @param MWTimestamp $expiredTimestamp
639     * @return int Number of expired files flushed
640     */
641    private function recurseFlushUtterancesByExpirationDateOnFile(
642        $fileBackend,
643        $directory,
644        $expiredTimestamp
645    ) {
646        $this->logger->debug( __METHOD__ . ': ' .
647            'Processing directory {directory}', [ 'directory' => $directory ] );
648        $removedFilesCounter = 0;
649        $subdirectories = $fileBackend->getDirectoryList( [
650            'dir' => $directory,
651            'topOnly' => true,
652        ] );
653        if ( $subdirectories ) {
654            foreach ( $subdirectories as $subdirectory ) {
655                $removedFilesCounter += $this->recurseFlushUtterancesByExpirationDateOnFile(
656                    $fileBackend,
657                    $directory . '/' . $subdirectory,
658                    $expiredTimestamp
659                );
660            }
661        }
662        $files = $fileBackend->getFileList( [
663            'dir' => $directory,
664            'topOnly' => true,
665            'adviseStat' => false
666        ] );
667        if ( $files ) {
668            foreach ( $files as $file ) {
669                $src = [ 'src' => $directory . '/' . $file ];
670                $timestamp = new MWTimestamp( $fileBackend->getFileTimestamp( $src ) );
671                $this->logger->debug( __METHOD__ . ': ' .
672                    'Processing file {src} with timestamp {timestamp}', [
673                    'src' => $file,
674                    'timestamp' => $timestamp,
675                    'expiredTimestamp' => $expiredTimestamp
676                ] );
677                if ( $timestamp <= $expiredTimestamp ) {
678                    if ( $fileBackend->delete( $src )->isOK() ) {
679                        $removedFilesCounter++;
680                        $this->logger->debug( __METHOD__ . ': ' .
681                            'Deleted expired file {file} #{num}', [
682                                'file' => $file,
683                                'num' => $removedFilesCounter
684                            ]
685                        );
686                    } else {
687                        $this->logger->warning( __METHOD__ . ': ' .
688                            'Unable to delete expired file {file}',
689                            [ 'file' => $file ]
690                        );
691                    }
692                }
693                unset( $timestamp );
694            }
695        }
696        $this->getFileBackend()->clean( [ 'dir' => $directory ] );
697        return $removedFilesCounter;
698    }
699
700    /**
701     * Calculates historic timestamp on now-WikispeechUtteranceTimeToLiveDays
702     *
703     * @return MWTimestamp Utterance parts with timestamp <= this is expired.
704     */
705    public function getWikispeechUtteranceExpirationTimestamp(): MWTimestamp {
706        return MWTimestamp::getInstance(
707            strtotime( '-' . $this->getWikispeechUtteranceTimeToLiveDays() . 'days' )
708        );
709    }
710
711    /**
712     * @return int Number of days an utterance is to exist before being flushed out.
713     */
714    private function getWikispeechUtteranceTimeToLiveDays(): int {
715        return intval( $this->config->get( 'WikispeechUtteranceTimeToLiveDays' ) );
716    }
717
718    /**
719     * @return bool
720     */
721    private function isWikispeechUtteranceUseSwiftFileBackendExpiring(): bool {
722        return $this->config->get( 'WikispeechUtteranceUseSwiftFileBackendExpiring' );
723    }
724
725    /**
726     * Used to evaluate hash of gadget consumer URL,
727     * the remote wiki where the page is located.
728     *
729     * Making changes to this function will probably invalidate all existing cached utterances.
730     *
731     * @since 0.1.9
732     * @param string|null $consumerUrl
733     * @return string|null SHA256 message digest
734     */
735    public static function evaluateRemoteWikiHash( ?string $consumerUrl ): ?string {
736        if ( $consumerUrl === null ) {
737            return null;
738        }
739        $context = hash_init( 'sha256' );
740        $urlParts = parse_url( $consumerUrl );
741        if ( isset( $urlParts['host'] ) ) {
742            hash_update( $context, mb_strtolower( $urlParts['host'] ) );
743        }
744        if ( isset( $urlParts['port'] ) ) {
745            hash_update( $context, strval( $urlParts['port'] ) );
746        }
747        if ( isset( $urlParts['path'] ) ) {
748            hash_update( $context, $urlParts['path'] );
749        }
750        return hash_final( $context );
751    }
752
753}