Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
36.69% |
62 / 169 |
|
17.39% |
4 / 23 |
CRAP | |
0.00% |
0 / 1 |
DjVuHandler | |
36.69% |
62 / 169 |
|
17.39% |
4 / 23 |
1037.61 | |
0.00% |
0 / 1 |
isEnabled | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
3.33 | |||
mustRender | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isExpensiveToThumbnail | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isMultiPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getParamMap | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
validateParam | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
20 | |||
makeParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseParamString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getScriptParams | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
doTransform | |
0.00% |
0 / 62 |
|
0.00% |
0 / 1 |
90 | |||
getDjVuImage | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getMetadataInternal | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
4.18 | |||
getMetaTree | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
8.32 | |||
getThumbType | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
getSizeAndMetadata | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getMetadataType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFileMetadataValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
pageCount | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getPageDimensions | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getDimensionInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getDimensionInfoFromMetaTree | |
81.25% |
13 / 16 |
|
0.00% |
0 / 1 |
6.24 | |||
getPageText | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
4.59 | |||
useSplitMetadata | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Handler for DjVu images. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Media |
22 | */ |
23 | |
24 | use MediaWiki\MainConfigNames; |
25 | use MediaWiki\MediaWikiServices; |
26 | use MediaWiki\PoolCounter\PoolCounterWorkViaCallback; |
27 | use MediaWiki\Shell\Shell; |
28 | |
29 | /** |
30 | * Handler for DjVu images |
31 | * |
32 | * @ingroup Media |
33 | */ |
34 | class DjVuHandler extends ImageHandler { |
35 | private const EXPENSIVE_SIZE_LIMIT = 10_485_760; // 10MiB |
36 | |
37 | // Constants for getHandlerState |
38 | private const STATE_DJVU_IMAGE = 'djvuImage'; |
39 | private const STATE_TEXT_TREE = 'djvuTextTree'; |
40 | private const STATE_META_TREE = 'djvuMetaTree'; |
41 | private const CACHE_VERSION = 'v2'; |
42 | |
43 | /** |
44 | * @return bool |
45 | */ |
46 | public function isEnabled() { |
47 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
48 | $djvuDump = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuDump ); |
49 | if ( !$djvuRenderer || !$djvuDump ) { |
50 | wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump" ); |
51 | |
52 | return false; |
53 | } |
54 | return true; |
55 | } |
56 | |
57 | /** |
58 | * @param File $file |
59 | * @return bool |
60 | */ |
61 | public function mustRender( $file ) { |
62 | return true; |
63 | } |
64 | |
65 | /** |
66 | * True if creating thumbnails from the file is large or otherwise resource-intensive. |
67 | * @param File $file |
68 | * @return bool |
69 | */ |
70 | public function isExpensiveToThumbnail( $file ) { |
71 | return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT; |
72 | } |
73 | |
74 | /** |
75 | * @param File $file |
76 | * @return bool |
77 | */ |
78 | public function isMultiPage( $file ) { |
79 | return true; |
80 | } |
81 | |
82 | /** |
83 | * @return array |
84 | */ |
85 | public function getParamMap() { |
86 | return [ |
87 | 'img_width' => 'width', |
88 | 'img_page' => 'page', |
89 | ]; |
90 | } |
91 | |
92 | /** |
93 | * @param string $name |
94 | * @param mixed $value |
95 | * @return bool |
96 | */ |
97 | public function validateParam( $name, $value ) { |
98 | if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { |
99 | // Extra junk on the end of page, probably actually a caption |
100 | // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]] |
101 | return false; |
102 | } |
103 | return in_array( $name, [ 'width', 'height', 'page' ] ) && $value > 0; |
104 | } |
105 | |
106 | /** |
107 | * @param array $params |
108 | * @return string|false |
109 | */ |
110 | public function makeParamString( $params ) { |
111 | $page = $params['page'] ?? 1; |
112 | if ( !isset( $params['width'] ) ) { |
113 | return false; |
114 | } |
115 | |
116 | return "page{$page}-{$params['width']}px"; |
117 | } |
118 | |
119 | /** |
120 | * @param string $str |
121 | * @return array|false |
122 | */ |
123 | public function parseParamString( $str ) { |
124 | $m = false; |
125 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
126 | return [ 'width' => $m[2], 'page' => $m[1] ]; |
127 | } |
128 | return false; |
129 | } |
130 | |
131 | /** |
132 | * @param array $params |
133 | * @return array |
134 | */ |
135 | protected function getScriptParams( $params ) { |
136 | return [ |
137 | 'width' => $params['width'], |
138 | 'page' => $params['page'], |
139 | ]; |
140 | } |
141 | |
142 | /** |
143 | * @param File $image |
144 | * @param string $dstPath |
145 | * @param string $dstUrl |
146 | * @param array $params |
147 | * @param int $flags |
148 | * @return MediaTransformError|ThumbnailImage|TransformParameterError |
149 | */ |
150 | public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
151 | $djvuRenderer = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DjvuRenderer ); |
152 | $djvuPostProcessor = MediaWikiServices::getInstance()->getMainConfig() |
153 | ->get( MainConfigNames::DjvuPostProcessor ); |
154 | if ( !$this->normaliseParams( $image, $params ) ) { |
155 | return new TransformParameterError( $params ); |
156 | } |
157 | $width = $params['width']; |
158 | $height = $params['height']; |
159 | $page = $params['page']; |
160 | |
161 | if ( $flags & self::TRANSFORM_LATER ) { |
162 | $params = [ |
163 | 'width' => $width, |
164 | 'height' => $height, |
165 | 'page' => $page |
166 | ]; |
167 | |
168 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
169 | } |
170 | |
171 | if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { |
172 | return new MediaTransformError( |
173 | 'thumbnail_error', |
174 | $width, |
175 | $height, |
176 | wfMessage( 'thumbnail_dest_directory' ) |
177 | ); |
178 | } |
179 | |
180 | // Get local copy source for shell scripts |
181 | // Thumbnail extraction is very inefficient for large files. |
182 | // Provide a way to pool count limit the number of downloaders. |
183 | if ( $image->getSize() >= 1e7 ) { // 10 MB |
184 | $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), |
185 | [ |
186 | 'doWork' => static function () use ( $image ) { |
187 | return $image->getLocalRefPath(); |
188 | } |
189 | ] |
190 | ); |
191 | $srcPath = $work->execute(); |
192 | } else { |
193 | $srcPath = $image->getLocalRefPath(); |
194 | } |
195 | |
196 | if ( $srcPath === false ) { // Failed to get local copy |
197 | wfDebugLog( 'thumbnail', |
198 | sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"', |
199 | wfHostname(), $image->getName() ) ); |
200 | |
201 | return new MediaTransformError( 'thumbnail_error', |
202 | $params['width'], $params['height'], |
203 | wfMessage( 'filemissing' ) |
204 | ); |
205 | } |
206 | |
207 | # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
208 | # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
209 | $cmd = '(' . Shell::escape( |
210 | $djvuRenderer, |
211 | "-format=ppm", |
212 | "-page={$page}", |
213 | "-size={$params['physicalWidth']}x{$params['physicalHeight']}", |
214 | $srcPath ); |
215 | if ( $djvuPostProcessor ) { |
216 | $cmd .= " | {$djvuPostProcessor}"; |
217 | } |
218 | $cmd .= ' > ' . Shell::escape( $dstPath ) . ') 2>&1'; |
219 | wfDebug( __METHOD__ . ": $cmd" ); |
220 | $retval = 0; |
221 | $err = wfShellExec( $cmd, $retval ); |
222 | |
223 | $removed = $this->removeBadFile( $dstPath, $retval ); |
224 | if ( $retval !== 0 || $removed ) { |
225 | $this->logErrorForExternalProcess( $retval, $err, $cmd ); |
226 | return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); |
227 | } |
228 | $params = [ |
229 | 'width' => $width, |
230 | 'height' => $height, |
231 | 'page' => $page |
232 | ]; |
233 | |
234 | return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); |
235 | } |
236 | |
237 | /** |
238 | * Cache an instance of DjVuImage in a MediaHandlerState object, return |
239 | * that instance |
240 | * |
241 | * @param MediaHandlerState $state |
242 | * @param string $path |
243 | * @return DjVuImage |
244 | */ |
245 | private function getDjVuImage( $state, $path ) { |
246 | $deja = $state->getHandlerState( self::STATE_DJVU_IMAGE ); |
247 | if ( !$deja ) { |
248 | $deja = new DjVuImage( $path ); |
249 | $state->setHandlerState( self::STATE_DJVU_IMAGE, $deja ); |
250 | } |
251 | return $deja; |
252 | } |
253 | |
254 | /** |
255 | * Get metadata, unserializing it if necessary. |
256 | * |
257 | * @param File $file The DjVu file in question |
258 | * @param bool $gettext |
259 | * @return string|false|array metadata |
260 | */ |
261 | private function getMetadataInternal( File $file, $gettext ) { |
262 | $itemNames = [ 'error', '_error', 'data' ]; |
263 | if ( $gettext ) { |
264 | $itemNames[] = 'text'; |
265 | } |
266 | $unser = $file->getMetadataItems( $itemNames ); |
267 | |
268 | if ( isset( $unser['error'] ) ) { |
269 | return false; |
270 | } |
271 | if ( isset( $unser['_error'] ) ) { |
272 | return false; |
273 | } |
274 | return $unser; |
275 | } |
276 | |
277 | /** |
278 | * Cache a document tree for the DjVu metadata |
279 | * @param File $image |
280 | * @param bool $gettext DOCUMENT (Default: false) |
281 | * @return false|array |
282 | */ |
283 | public function getMetaTree( $image, $gettext = false ) { |
284 | if ( $gettext && $image->getHandlerState( self::STATE_TEXT_TREE ) ) { |
285 | return $image->getHandlerState( self::STATE_TEXT_TREE ); |
286 | } |
287 | if ( !$gettext && $image->getHandlerState( self::STATE_META_TREE ) ) { |
288 | return $image->getHandlerState( self::STATE_META_TREE ); |
289 | } |
290 | |
291 | $metadata = $this->getMetadataInternal( $image, $gettext ); |
292 | if ( !$metadata ) { |
293 | return false; |
294 | } |
295 | |
296 | if ( !$gettext ) { |
297 | unset( $metadata['text'] ); |
298 | } |
299 | return $metadata; |
300 | } |
301 | |
302 | public function getThumbType( $ext, $mime, $params = null ) { |
303 | $djvuOutputExtension = MediaWikiServices::getInstance()->getMainConfig() |
304 | ->get( MainConfigNames::DjvuOutputExtension ); |
305 | static $djvuMime = null; |
306 | if ( $djvuMime === null ) { |
307 | $magic = MediaWikiServices::getInstance()->getMimeAnalyzer(); |
308 | $djvuMime = $magic->getMimeTypeFromExtensionOrNull( $djvuOutputExtension ); |
309 | } |
310 | |
311 | return [ $djvuOutputExtension, $djvuMime ]; |
312 | } |
313 | |
314 | public function getSizeAndMetadata( $state, $path ) { |
315 | wfDebug( "Getting DjVu metadata for $path" ); |
316 | |
317 | $djvuImage = $this->getDjVuImage( $state, $path ); |
318 | $metadata = $djvuImage->retrieveMetaData(); |
319 | if ( $metadata === false ) { |
320 | // Special value so that we don't repetitively try and decode a broken file. |
321 | $metadata = [ 'error' => 'Error extracting metadata' ]; |
322 | } |
323 | return [ 'metadata' => $metadata ] + $djvuImage->getImageSize(); |
324 | } |
325 | |
326 | public function getMetadataType( $image ) { |
327 | // historical reasons |
328 | return 'djvuxml'; |
329 | } |
330 | |
331 | public function isFileMetadataValid( $image ) { |
332 | return $image->getMetadataArray() ? self::METADATA_GOOD : self::METADATA_BAD; |
333 | } |
334 | |
335 | public function pageCount( File $image ) { |
336 | $info = $this->getDimensionInfo( $image ); |
337 | |
338 | return $info ? $info['pageCount'] : false; |
339 | } |
340 | |
341 | public function getPageDimensions( File $image, $page ) { |
342 | $index = $page - 1; // MW starts pages at 1 |
343 | |
344 | $info = $this->getDimensionInfo( $image ); |
345 | if ( $info && isset( $info['dimensionsByPage'][$index] ) ) { |
346 | return $info['dimensionsByPage'][$index]; |
347 | } |
348 | |
349 | return false; |
350 | } |
351 | |
352 | protected function getDimensionInfo( File $file ) { |
353 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
354 | return $cache->getWithSetCallback( |
355 | $cache->makeKey( 'file-djvu', 'dimensions', self::CACHE_VERSION, $file->getSha1() ), |
356 | $cache::TTL_INDEFINITE, |
357 | function () use ( $file ) { |
358 | $tree = $this->getMetaTree( $file ); |
359 | return $this->getDimensionInfoFromMetaTree( $tree ); |
360 | }, |
361 | [ 'pcTTL' => $cache::TTL_INDEFINITE ] |
362 | ); |
363 | } |
364 | |
365 | /** |
366 | * Given the metadata, returns dimension information about the document |
367 | * @param false|array $metatree The file's metadata tree |
368 | * @return array|false |
369 | */ |
370 | protected function getDimensionInfoFromMetaTree( $metatree ) { |
371 | if ( !$metatree ) { |
372 | return false; |
373 | } |
374 | $dimsByPage = []; |
375 | |
376 | if ( !isset( $metatree['data'] ) || !$metatree['data'] ) { |
377 | return false; |
378 | } |
379 | foreach ( $metatree['data']['pages'] as $page ) { |
380 | if ( !$page ) { |
381 | $dimsByPage[] = false; |
382 | } else { |
383 | $dimsByPage[] = [ |
384 | 'width' => (int)$page['width'], |
385 | 'height' => (int)$page['height'], |
386 | ]; |
387 | } |
388 | } |
389 | return [ |
390 | 'pageCount' => count( $metatree['data']['pages'] ), |
391 | 'dimensionsByPage' => $dimsByPage |
392 | ]; |
393 | } |
394 | |
395 | /** |
396 | * @param File $image |
397 | * @param int $page Page number to get information for |
398 | * @return string|false Page text or false when no text found. |
399 | */ |
400 | public function getPageText( File $image, $page ) { |
401 | $tree = $this->getMetaTree( $image, true ); |
402 | if ( !$tree ) { |
403 | return false; |
404 | } |
405 | if ( isset( $tree['text'] ) && isset( $tree['text'][$page - 1] ) ) { |
406 | return $tree['text'][$page - 1]; |
407 | } |
408 | return false; |
409 | } |
410 | |
411 | public function useSplitMetadata() { |
412 | return true; |
413 | } |
414 | } |