Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
36.69% |
62 / 169 |
|
17.39% |
4 / 23 |
CRAP | |
0.00% |
0 / 1 |
DjVuHandler | |
36.69% |
62 / 169 |
|
17.39% |
4 / 23 |
1037.61 | |
0.00% |
0 / 1 |
isEnabled | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
mustRender | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isExpensiveToThumbnail | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isMultiPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getParamMap | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
validateParam | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
20 | |||
makeParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getScriptParams | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
doTransform | |
0.00% |
0 / 62 |
|
0.00% |
0 / 1 |
90 | |||
getDjVuImage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getMetadataInternal | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
4.18 | |||
getMetaTree | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
8.32 | |||
getThumbType | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
getSizeAndMetadata | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getMetadataType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFileMetadataValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
pageCount | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getPageDimensions | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getDimensionInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getDimensionInfoFromMetaTree | |
81.25% |
13 / 16 |
|
0.00% |
0 / 1 |
6.24 | |||
getPageText | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
useSplitMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Handler for DjVu images. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Media |
22 | */ |
23 | |
24 | use MediaWiki\FileRepo\File\File; |
25 | use MediaWiki\MainConfigNames; |
26 | use MediaWiki\MediaWikiServices; |
27 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
28 | use MediaWiki\Shell\Shell; |
29 | |
30 | /** |
31 | * Handler for DjVu images |
32 | * |
33 | * @ingroup Media |
34 | */ |
35 | class DjVuHandler extends ImageHandler { |
36 | private const EXPENSIVE_SIZE_LIMIT = 10_485_760; // 10MiB |
37 | |
38 | // Constants for getHandlerState |
39 | private const STATE_DJVU_IMAGE = 'djvuImage'; |
40 | private const STATE_TEXT_TREE = 'djvuTextTree'; |
41 | private const STATE_META_TREE = 'djvuMetaTree'; |
42 | private const CACHE_VERSION = 'v2'; |
43 | |
44 | /** |
45 | * @return bool |
46 | */ |
47 | public function isEnabled() { |
48 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
49 | $djvuDump = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuDump ); |
50 | if ( !$djvuRenderer || !$djvuDump ) { |
51 | wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump" ); |
52 | |
53 | return false; |
54 | } |
55 | return true; |
56 | } |
57 | |
58 | /** |
59 | * @param File $file |
60 | * @return bool |
61 | */ |
62 | public function mustRender( $file ) { |
63 | return true; |
64 | } |
65 | |
66 | /** |
67 | * True if creating thumbnails from the file is large or otherwise resource-intensive. |
68 | * @param File $file |
69 | * @return bool |
70 | */ |
71 | public function isExpensiveToThumbnail( $file ) { |
72 | return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT; |
73 | } |
74 | |
75 | /** |
76 | * @param File $file |
77 | * @return bool |
78 | */ |
79 | public function isMultiPage( $file ) { |
80 | return true; |
81 | } |
82 | |
83 | /** |
84 | * @return array |
85 | */ |
86 | public function getParamMap() { |
87 | return [ |
88 | 'img_width' => 'width', |
89 | 'img_page' => 'page', |
90 | ]; |
91 | } |
92 | |
93 | /** |
94 | * @param string $name |
95 | * @param mixed $value |
96 | * @return bool |
97 | */ |
98 | public function validateParam( $name, $value ) { |
99 | if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { |
100 | // Extra junk on the end of page, probably actually a caption |
101 | // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]] |
102 | return false; |
103 | } |
104 | return in_array( $name, [ 'width', 'height', 'page' ] ) && $value > 0; |
105 | } |
106 | |
107 | /** |
108 | * @param array $params |
109 | * @return string|false |
110 | */ |
111 | public function makeParamString( $params ) { |
112 | $page = $params['page'] ?? 1; |
113 | if ( !isset( $params['width'] ) ) { |
114 | return false; |
115 | } |
116 | |
117 | return "page{$page}-{$params['width']}px"; |
118 | } |
119 | |
120 | /** |
121 | * @param string $str |
122 | * @return array|false |
123 | */ |
124 | public function parseParamString( $str ) { |
125 | $m = false; |
126 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
127 | return [ 'width' => $m[2], 'page' => $m[1] ]; |
128 | } |
129 | return false; |
130 | } |
131 | |
132 | /** |
133 | * @param array $params |
134 | * @return array |
135 | */ |
136 | protected function getScriptParams( $params ) { |
137 | return [ |
138 | 'width' => $params['width'], |
139 | 'page' => $params['page'], |
140 | ]; |
141 | } |
142 | |
143 | /** |
144 | * @param File $image |
145 | * @param string $dstPath |
146 | * @param string $dstUrl |
147 | * @param array $params |
148 | * @param int $flags |
149 | * @return MediaTransformError|ThumbnailImage|TransformParameterError |
150 | */ |
151 | public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
152 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
153 | $djvuPostProcessor = MediaWikiServices::getInstance()->getMainConfig() |
154 | ->get( MainConfigNames::DjvuPostProcessor ); |
155 | if ( !$this->normaliseParams( $image, $params ) ) { |
156 | return new TransformParameterError( $params ); |
157 | } |
158 | $width = $params['width']; |
159 | $height = $params['height']; |
160 | $page = $params['page']; |
161 | |
162 | if ( $flags & self::TRANSFORM_LATER ) { |
163 | $params = [ |
164 | 'width' => $width, |
165 | 'height' => $height, |
166 | 'page' => $page |
167 | ]; |
168 | |
169 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
170 | } |
171 | |
172 | if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { |
173 | return new MediaTransformError( |
174 | 'thumbnail_error', |
175 | $width, |
176 | $height, |
177 | wfMessage( 'thumbnail_dest_directory' ) |
178 | ); |
179 | } |
180 | |
181 | // Get local copy source for shell scripts |
182 | // Thumbnail extraction is very inefficient for large files. |
183 | // Provide a way to pool count limit the number of downloaders. |
184 | if ( $image->getSize() >= 1e7 ) { // 10 MB |
185 | $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), |
186 | [ |
187 | 'doWork' => static function () use ( $image ) { |
188 | return $image->getLocalRefPath(); |
189 | } |
190 | ] |
191 | ); |
192 | $srcPath = $work->execute(); |
193 | } else { |
194 | $srcPath = $image->getLocalRefPath(); |
195 | } |
196 | |
197 | if ( $srcPath === false ) { // Failed to get local copy |
198 | wfDebugLog( 'thumbnail', |
199 | sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"', |
200 | wfHostname(), $image->getName() ) ); |
201 | |
202 | return new MediaTransformError( 'thumbnail_error', |
203 | $params['width'], $params['height'], |
204 | wfMessage( 'filemissing' ) |
205 | ); |
206 | } |
207 | |
208 | # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
209 | # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
210 | $cmd = '(' . Shell::escape( |
211 | $djvuRenderer, |
212 | "-format=ppm", |
213 | "-page={$page}", |
214 | "-size={$params['physicalWidth']}x{$params['physicalHeight']}", |
215 | $srcPath ); |
216 | if ( $djvuPostProcessor ) { |
217 | $cmd .= " | {$djvuPostProcessor}"; |
218 | } |
219 | $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1'; |
220 | wfDebug( __METHOD__ . ": $cmd" ); |
221 | $retval = 0; |
222 | $err = wfShellExec( $cmd, $retval ); |
223 | |
224 | $removed = $this->removeBadFile( $dstPath, $retval ); |
225 | if ( $retval !== 0 || $removed ) { |
226 | $this->logErrorForExternalProcess( $retval, $err, $cmd ); |
227 | return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); |
228 | } |
229 | $params = [ |
230 | 'width' => $width, |
231 | 'height' => $height, |
232 | 'page' => $page |
233 | ]; |
234 | |
235 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
236 | } |
237 | |
238 | /** |
239 | * Cache an instance of DjVuImage in a MediaHandlerState object, return |
240 | * that instance |
241 | * |
242 | * @param MediaHandlerState $state |
243 | * @param string $path |
244 | * @return DjVuImage |
245 | */ |
246 | private function getDjVuImage( $state, $path ) { |
247 | $deja = $state->getHandlerState( self::STATE_DJVU_IMAGE ); |
248 | if ( !$deja ) { |
249 | $deja = new DjVuImage( $path ); |
250 | $state->setHandlerState( self::STATE_DJVU_IMAGE, $deja ); |
251 | } |
252 | return $deja; |
253 | } |
254 | |
255 | /** |
256 | * Get metadata, unserializing it if necessary. |
257 | * |
258 | * @param File $file The DjVu file in question |
259 | * @param bool $gettext |
260 | * @return string|false|array metadata |
261 | */ |
262 | private function getMetadataInternal( File $file, $gettext ) { |
263 | $itemNames = [ 'error', '_error', 'data' ]; |
264 | if ( $gettext ) { |
265 | $itemNames[] = 'text'; |
266 | } |
267 | $unser = $file->getMetadataItems( $itemNames ); |
268 | |
269 | if ( isset( $unser['error'] ) ) { |
270 | return false; |
271 | } |
272 | if ( isset( $unser['_error'] ) ) { |
273 | return false; |
274 | } |
275 | return $unser; |
276 | } |
277 | |
278 | /** |
279 | * Cache a document tree for the DjVu metadata |
280 | * @param File $image |
281 | * @param bool $gettext DOCUMENT (Default: false) |
282 | * @return false|array |
283 | */ |
284 | public function getMetaTree( $image, $gettext = false ) { |
285 | if ( $gettext && $image->getHandlerState( self::STATE_TEXT_TREE ) ) { |
286 | return $image->getHandlerState( self::STATE_TEXT_TREE ); |
287 | } |
288 | if ( !$gettext && $image->getHandlerState( self::STATE_META_TREE ) ) { |
289 | return $image->getHandlerState( self::STATE_META_TREE ); |
290 | } |
291 | |
292 | $metadata = $this->getMetadataInternal( $image, $gettext ); |
293 | if ( !$metadata ) { |
294 | return false; |
295 | } |
296 | |
297 | if ( !$gettext ) { |
298 | unset( $metadata['text'] ); |
299 | } |
300 | return $metadata; |
301 | } |
302 | |
303 | public function getThumbType( $ext, $mime, $params = null ) { |
304 | $djvuOutputExtension = MediaWikiServices::getInstance()->getMainConfig() |
305 | ->get( MainConfigNames::DjvuOutputExtension ); |
306 | static $djvuMime = null; |
307 | if ( $djvuMime === null ) { |
308 | $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); |
309 | $djvuMime = $magic->getMimeTypeFromExtensionOrNull( $djvuOutputExtension ); |
310 | } |
311 | |
312 | return [ $djvuOutputExtension, $djvuMime ]; |
313 | } |
314 | |
315 | public function getSizeAndMetadata( $state, $path ) { |
316 | wfDebug( "Getting DjVu metadata for $path" ); |
317 | |
318 | $djvuImage = $this->getDjVuImage( $state, $path ); |
319 | $metadata = $djvuImage->retrieveMetaData(); |
320 | if ( $metadata === false ) { |
321 | // Special value so that we don't repetitively try and decode a broken file. |
322 | $metadata = [ 'error' => 'Error extracting metadata' ]; |
323 | } |
324 | return [ 'metadata' => $metadata ] + $djvuImage->getImageSize(); |
325 | } |
326 | |
327 | public function getMetadataType( $image ) { |
328 | // historical reasons |
329 | return 'djvuxml'; |
330 | } |
331 | |
332 | public function isFileMetadataValid( $image ) { |
333 | return $image->getMetadataArray() ? self::METADATA_GOOD : self::METADATA_BAD; |
334 | } |
335 | |
336 | public function pageCount( File $image ) { |
337 | $info = $this->getDimensionInfo( $image ); |
338 | |
339 | return $info ? $info['pageCount'] : false; |
340 | } |
341 | |
342 | public function getPageDimensions( File $image, $page ) { |
343 | $index = $page - 1; // MW starts pages at 1 |
344 | |
345 | $info = $this->getDimensionInfo( $image ); |
346 | if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { |
347 | return $info['dimensionsByPage'][$index]; |
348 | } |
349 | |
350 | return false; |
351 | } |
352 | |
353 | protected function getDimensionInfo( File $file ) { |
354 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
355 | return $cache->getWithSetCallback( |
356 | $cache->makeKey( 'file-djvu', 'dimensions', self::CACHE_VERSION, $file->getSha1() ), |
357 | $cache::TTL_INDEFINITE, |
358 | function () use ( $file ) { |
359 | $tree = $this->getMetaTree( $file ); |
360 | return $this->getDimensionInfoFromMetaTree( $tree ); |
361 | }, |
362 | [ 'pcTTL' => $cache::TTL_INDEFINITE ] |
363 | ); |
364 | } |
365 | |
366 | /** |
367 | * Given the metadata, returns dimension information about the document |
368 | * @param false|array $metatree The file's metadata tree |
369 | * @return array|false |
370 | */ |
371 | protected function getDimensionInfoFromMetaTree( $metatree ) { |
372 | if ( !$metatree ) { |
373 | return false; |
374 | } |
375 | $dimsByPage = []; |
376 | |
377 | if ( !isset( $metatree['data'] ) || !$metatree['data'] ) { |
378 | return false; |
379 | } |
380 | foreach ( $metatree['data']['pages'] as $page ) { |
381 | if ( !$page ) { |
382 | $dimsByPage[] = false; |
383 | } else { |
384 | $dimsByPage[] = [ |
385 | 'width' => (int)$page['width'], |
386 | 'height' => (int)$page['height'], |
387 | ]; |
388 | } |
389 | } |
390 | return [ |
391 | 'pageCount' => count( $metatree['data']['pages'] ), |
392 | 'dimensionsByPage' => $dimsByPage |
393 | ]; |
394 | } |
395 | |
396 | /** |
397 | * @param File $image |
398 | * @param int $page Page number to get information for |
399 | * @return string|false Page text or false when no text found. |
400 | */ |
401 | public function getPageText( File $image, $page ) { |
402 | $tree = $this->getMetaTree( $image, true ); |
403 | if ( !$tree ) { |
404 | return false; |
405 | } |
406 | if ( isset( $tree['text'] ) && isset( $tree['text'][$page - 1] ) ) { |
407 | return $tree['text'][$page - 1]; |
408 | } |
409 | return false; |
410 | } |
411 | |
412 | public function useSplitMetadata() { |
413 | return true; |
414 | } |
415 | } |