Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 180 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
TextHandler | |
0.00% |
0 / 180 |
|
0.00% |
0 / 13 |
2756 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTracks | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getTimedTextNamespace | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
110 | |||
getTextPagesFromDb | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
12 | |||
getRemoteTextPagesQuery | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
getRemoteTextSources | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
20 | |||
getForeignDbTextSources | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getLocalDbTextSources | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getTextTracksFromRows | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 | |||
getTextTracksFromData | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
getContentType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getFullURL | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
convertSubtitles | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
72 |
1 | <?php |
2 | /** |
3 | * Timed Text handling for mediaWiki |
4 | * |
5 | * Timed text support is presently fairly limited. Unlike Ogg and WebM handlers, |
6 | * timed text does not extend the TimedMediaHandler class. |
7 | * |
8 | * TODO On "new" timedtext language save purge all pages where file exists |
9 | */ |
10 | |
11 | namespace MediaWiki\TimedMediaHandler\Handlers\TextHandler; |
12 | |
13 | use Exception; |
14 | use File; |
15 | use ForeignDBFile; |
16 | use IForeignRepoWithDB; |
17 | use IForeignRepoWithMWApi; |
18 | use LocalRepo; |
19 | use MediaWiki\MediaWikiServices; |
20 | use MediaWiki\TimedMediaHandler\TimedText\ParseError; |
21 | use MediaWiki\TimedMediaHandler\TimedText\SrtReader; |
22 | use MediaWiki\TimedMediaHandler\TimedText\SrtWriter; |
23 | use MediaWiki\TimedMediaHandler\TimedText\VttWriter; |
24 | use MediaWiki\TimedMediaHandler\TimedTextPage; |
25 | use MediaWiki\Title\Title; |
26 | use RuntimeException; |
27 | use Wikimedia\Rdbms\IExpression; |
28 | use Wikimedia\Rdbms\IResultWrapper; |
29 | use Wikimedia\Rdbms\LikeValue; |
30 | |
31 | class TextHandler { |
32 | /** @var int|null lazy init remote Namespace number */ |
33 | public $remoteNs; |
34 | /** @var string|null lazy init remote Namespace name */ |
35 | public $remoteNsName; |
36 | |
37 | /** |
38 | * @var File |
39 | */ |
40 | protected $file; |
41 | |
42 | /** |
43 | * @var array of string keys for subtitle formats |
44 | */ |
45 | protected $formats; |
46 | |
47 | /** |
48 | * @param File $file |
49 | * @param array $formats |
50 | */ |
51 | public function __construct( |
52 | $file, |
53 | $formats = [ TimedTextPage::VTT_SUBTITLE_FORMAT, TimedTextPage::SRT_SUBTITLE_FORMAT ] |
54 | ) { |
55 | $this->file = $file; |
56 | $this->formats = $formats; |
57 | } |
58 | |
59 | /** |
60 | * Get the timed text tracks elements as an associative array |
61 | * @return array[] |
62 | */ |
63 | public function getTracks() { |
64 | if ( $this->file->isLocal() ) { |
65 | return $this->getLocalDbTextSources(); |
66 | } |
67 | if ( $this->file instanceof ForeignDBFile ) { |
68 | return $this->getForeignDbTextSources(); |
69 | } |
70 | if ( $this->file->getRepo() instanceof IForeignRepoWithMWApi ) { |
71 | return $this->getRemoteTextSources( $this->file ); |
72 | } |
73 | return []; |
74 | } |
75 | |
76 | /** |
77 | * @return false|int |
78 | */ |
79 | public function getTimedTextNamespace() { |
80 | $repo = $this->file->getRepo(); |
81 | |
82 | if ( $this->file->isLocal() ) { |
83 | return NS_TIMEDTEXT; |
84 | } |
85 | if ( $repo instanceof IForeignRepoWithDB ) { |
86 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
87 | $timedTextForeignNamespaces = $config->get( 'TimedTextForeignNamespaces' ); |
88 | $wikiID = $repo->getReplicaDB()->getDomainID(); |
89 | // if failed to get namespace via IForeignRepoWithDB, return NS_TIMEDTEXT |
90 | return $timedTextForeignNamespaces[$wikiID] ?? NS_TIMEDTEXT; |
91 | } |
92 | if ( $repo instanceof IForeignRepoWithMWApi ) { |
93 | if ( $this->remoteNs !== null ) { |
94 | return $this->remoteNs; |
95 | } |
96 | |
97 | // Get the namespace data from the image api repo: |
98 | // fetchImageQuery query caches results |
99 | $data = $repo->fetchImageQuery( [ |
100 | 'meta' => 'siteinfo', |
101 | 'siprop' => 'namespaces' |
102 | ] ); |
103 | |
104 | if ( isset( $data['query']['namespaces'] ) ) { |
105 | // get the ~last~ timed text namespace defined |
106 | foreach ( $data['query']['namespaces'] as $ns ) { |
107 | if ( isset( $ns['canonical'] ) && $ns['canonical'] === 'TimedText' ) { |
108 | $this->remoteNs = $ns['id']; |
109 | $this->remoteNsName = $ns['*']; |
110 | wfDebug( "Discovered remoteNs: $this->remoteNs and name: $this->remoteNsName \n" ); |
111 | break; |
112 | } |
113 | } |
114 | } |
115 | |
116 | // Return the remote Ns, if found |
117 | if ( $this->remoteNs !== null ) { |
118 | return $this->remoteNs; |
119 | } |
120 | } |
121 | |
122 | return false; |
123 | } |
124 | |
125 | /** |
126 | * Retrieve a list of TimedText pages in the database that start with |
127 | * the name of the file associated with this handler. |
128 | * |
129 | * If the file is on a foreign repo, will query the ForeignDb |
130 | * |
131 | * @return IResultWrapper|false |
132 | */ |
133 | private function getTextPagesFromDb() { |
134 | $ns = $this->getTimedTextNamespace(); |
135 | if ( $ns === false ) { |
136 | wfDebug( 'Repo: ' . $this->file->getRepoName() . " does not have a TimedText namespace \n" ); |
137 | // No timed text namespace, don't try to look up timed text tracks |
138 | return false; |
139 | } |
140 | |
141 | $repo = $this->file->getRepo(); |
142 | if ( $repo instanceof LocalRepo ) { |
143 | $dbr = $repo->getReplicaDB(); |
144 | $prefix = $this->file->getTitle()->getDBkey(); |
145 | return $dbr->newSelectQueryBuilder() |
146 | ->select( [ 'page_namespace', 'page_title' ] ) |
147 | ->from( 'page' ) |
148 | ->where( [ |
149 | 'page_namespace' => $ns, |
150 | $dbr->expr( 'page_title', IExpression::LIKE, new LikeValue( $prefix, $dbr->anyString() ) ), |
151 | ] ) |
152 | ->limit( 300 ) |
153 | ->orderBy( 'page_title' ) |
154 | ->caller( __METHOD__ ) |
155 | ->fetchResultSet(); |
156 | } |
157 | |
158 | return false; |
159 | } |
160 | |
161 | /** |
162 | * Build the api query to find TimedText pages belonging to a remote file |
163 | * @return array|false |
164 | */ |
165 | public function getRemoteTextPagesQuery() { |
166 | $ns = $this->getTimedTextNamespace(); |
167 | if ( $ns === false ) { |
168 | wfDebug( 'Repo: ' . $this->file->getRepoName() . " does not have a TimedText namespace \n" ); |
169 | // No timed text namespace, don't try to look up timed text tracks |
170 | return false; |
171 | } |
172 | $canonicalTitle = Title::makeName( |
173 | $this->file->getTitle()->getNamespace(), |
174 | $this->file->getTitle()->getDbKey(), |
175 | '', |
176 | '', |
177 | true |
178 | ); |
179 | return [ |
180 | 'action' => 'query', |
181 | // For a remote wiki, we need to always use canonical namespace names |
182 | 'titles' => $canonicalTitle, |
183 | 'prop' => 'videoinfo', |
184 | 'viprop' => 'timedtext', |
185 | 'formatversion' => '2', |
186 | ]; |
187 | } |
188 | |
189 | /** |
190 | * Retrieve the text sources belonging to a remote file |
191 | * @param File $file The File's repo must implement IForeignRepoWithMWApi |
192 | * @return array[] |
193 | */ |
194 | private function getRemoteTextSources( File $file ) { |
195 | $regenerator = function () use ( $file ) { |
196 | /** @var IForeignRepoWithMWApi $repo */ |
197 | $repo = $file->getRepo(); |
198 | '@phan-var IForeignRepoWithMWApi $repo'; |
199 | wfDebug( "Get text tracks from remote api \n" ); |
200 | $query = $this->getRemoteTextPagesQuery(); |
201 | // Error in getting timed text namespace return empty array; |
202 | if ( $query === false || !( $repo instanceof IForeignRepoWithMWApi ) ) { |
203 | return []; |
204 | } |
205 | |
206 | $data = $repo->fetchImageQuery( $query ); |
207 | |
208 | return $this->getTextTracksFromData( $data ); |
209 | }; |
210 | |
211 | $repoInfo = $file->getRepo()->getInfo(); |
212 | $cacheTTL = $repoInfo['descriptionCacheExpiry'] ?? 0; |
213 | |
214 | if ( $cacheTTL > 0 ) { |
215 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
216 | $textTracks = $cache->getWithSetCallback( |
217 | $cache->makeKey( |
218 | 'RemoteTextTracks-url', |
219 | $this->file->getRepoName(), |
220 | $this->file->getName() |
221 | ), |
222 | $cacheTTL, |
223 | $regenerator |
224 | ); |
225 | } else { |
226 | $textTracks = $regenerator(); |
227 | } |
228 | |
229 | return $textTracks; |
230 | } |
231 | |
232 | /** |
233 | * Retrieve the text sources belonging to a foreign db accessible file |
234 | * @return array[] |
235 | */ |
236 | public function getForeignDbTextSources() { |
237 | $data = $this->getTextPagesFromDb(); |
238 | if ( $data !== false ) { |
239 | return $this->getTextTracksFromRows( $data ); |
240 | } |
241 | return []; |
242 | } |
243 | |
244 | /** |
245 | * Retrieve the text sources belonging to a local file |
246 | * @return array[] |
247 | */ |
248 | public function getLocalDbTextSources() { |
249 | $data = $this->getTextPagesFromDb(); |
250 | if ( $data !== false ) { |
251 | return $this->getTextTracksFromRows( $data ); |
252 | } |
253 | return []; |
254 | } |
255 | |
256 | /** |
257 | * Build an array of track information using a Database result |
258 | * Handles both local and foreign Db results |
259 | * |
260 | * @param IResultWrapper $data Database result with page titles |
261 | * @return array[] |
262 | */ |
263 | public function getTextTracksFromRows( IResultWrapper $data ) { |
264 | $textTracks = []; |
265 | |
266 | $services = MediaWikiServices::getInstance(); |
267 | $langNames = $services->getLanguageNameUtils()->getLanguageNames(); |
268 | $languageFactory = $services->getLanguageFactory(); |
269 | |
270 | foreach ( $data as $row ) { |
271 | $titleParts = explode( '.', $row->page_title ); |
272 | if ( count( $titleParts ) >= 3 ) { |
273 | $timedTextExtension = array_pop( $titleParts ); |
274 | $languageKey = array_pop( $titleParts ); |
275 | } else { |
276 | continue; |
277 | } |
278 | // If there is no valid language continue: |
279 | if ( !isset( $langNames[ $languageKey ] ) ) { |
280 | continue; |
281 | } |
282 | |
283 | $language = $languageFactory->getLanguage( $languageKey ); |
284 | foreach ( $this->formats as $format ) { |
285 | $textTracks[] = [ |
286 | 'src' => $this->getFullURL( $languageKey, $format ), |
287 | 'kind' => 'subtitles', |
288 | 'type' => $this->getContentType( $format ), |
289 | 'srclang' => $language->getHtmlCode(), |
290 | 'dir' => $language->getDir(), |
291 | 'label' => wfMessage( 'timedmedia-subtitle-language', |
292 | $langNames[ $languageKey ], |
293 | $languageKey )->text() |
294 | ]; |
295 | } |
296 | } |
297 | return $textTracks; |
298 | } |
299 | |
300 | /** |
301 | * Build an array of track information using an API result |
302 | * @param mixed $data JSON decoded result from a query API request |
303 | * @return array[] |
304 | */ |
305 | public function getTextTracksFromData( $data ) { |
306 | $textTracks = []; |
307 | foreach ( $data['query']['pages'] ?? [] as $page ) { |
308 | foreach ( $page['videoinfo'] ?? [] as $info ) { |
309 | foreach ( $info['timedtext'] ?? [] as $track ) { |
310 | foreach ( $this->formats as $format ) { |
311 | if ( ( $track['type'] ?? '' ) === $this->getContentType( $format ) ) { |
312 | // Add validation ? |
313 | $textTracks[] = $track; |
314 | } |
315 | } |
316 | } |
317 | } |
318 | } |
319 | return $textTracks; |
320 | } |
321 | |
322 | /** |
323 | * @param string $timedTextExtension |
324 | * |
325 | * @return string |
326 | */ |
327 | public function getContentType( $timedTextExtension ) { |
328 | if ( $timedTextExtension === TimedTextPage::SRT_SUBTITLE_FORMAT ) { |
329 | return 'text/x-srt'; |
330 | } |
331 | if ( $timedTextExtension === TimedTextPage::VTT_SUBTITLE_FORMAT ) { |
332 | return 'text/vtt'; |
333 | } |
334 | return ''; |
335 | } |
336 | |
337 | /** |
338 | * Retrieve a url to the raw subtitle file |
339 | * Only use for local and foreignDb requests |
340 | * |
341 | * @param string $lang |
342 | * @param string $format |
343 | * @return string |
344 | */ |
345 | public function getFullURL( $lang, $format ) { |
346 | $title = $this->file->getTitle(); |
347 | // Note we need to use the canonical namespace in case this is a |
348 | // foreign DB repo (Wikimedia Commons style) in a different language |
349 | // than the current site. |
350 | $canonicalTitle = Title::makeName( |
351 | $title->getNamespace(), |
352 | $title->getDbKey(), |
353 | '', |
354 | '', |
355 | true |
356 | ); |
357 | $params = [ |
358 | 'action' => 'timedtext', |
359 | 'title' => $canonicalTitle, |
360 | 'lang' => $lang, |
361 | 'trackformat' => $format, |
362 | ]; |
363 | if ( !$this->file->isLocal() ) { |
364 | $params['origin'] = '*'; |
365 | } |
366 | $query = wfArrayToCgi( $params ); |
367 | |
368 | // Note: This will return false if scriptDirUrl is not set for repo. |
369 | return $this->file->getRepo()->makeUrl( $query, 'api' ); |
370 | } |
371 | |
372 | /** |
373 | * Convert subtitles between SubRIP (SRT) and WebVTT, laxly. |
374 | * |
375 | * @param string $from source format, one of TimedTextPage::SRT_SUBTITLE_FORMAT |
376 | * or TimedTextPage::VTT_SUBTITLE_FORMAT |
377 | * @param string $to destination format, one of TimedTextPage::SRT_SUBTITLE_FORMAT |
378 | * or TimedTextPage::VTT_SUBTITLE_FORMAT |
379 | * @param string $data source-formatted subtitles |
380 | * @param ParseError[] &$errors optional outparam to capture errors |
381 | * @return string destination-formatted subtitles |
382 | */ |
383 | public static function convertSubtitles( $from, $to, $data, &$errors = [] ) { |
384 | // Note that we convert even if the format is the same, to ensure |
385 | // data format integrity. |
386 | // |
387 | // @todo cache the conversion in memcached |
388 | switch ( $from ) { |
389 | case TimedTextPage::SRT_SUBTITLE_FORMAT: |
390 | $reader = new SrtReader(); |
391 | break; |
392 | case TimedTextPage::VTT_SUBTITLE_FORMAT: |
393 | // @todo once VttReader is implemented, use it. |
394 | // For now throw an exception rather than a fatal error. |
395 | throw new RuntimeException( 'vtt source pages are not yet supported' ); |
396 | default: |
397 | throw new RuntimeException( 'Unsupported timedtext filetype' ); |
398 | } |
399 | switch ( $to ) { |
400 | case TimedTextPage::SRT_SUBTITLE_FORMAT: |
401 | $writer = new SrtWriter(); |
402 | break; |
403 | case TimedTextPage::VTT_SUBTITLE_FORMAT: |
404 | $writer = new VttWriter(); |
405 | break; |
406 | default: |
407 | throw new RuntimeException( 'Unsupported timedtext filetype' ); |
408 | } |
409 | try { |
410 | $reader->read( $data ); |
411 | $cues = $reader->getCues(); |
412 | $errors = $reader->getErrors(); |
413 | |
414 | return $writer->write( $cues ); |
415 | } catch ( Exception $e ) { |
416 | throw new RuntimeException( 'Timed text track conversion failed: ' . |
417 | $e->getMessage() ); |
418 | } |
419 | } |
420 | } |