52 $this->mFilename = $filename;
62 return $info !==
false;
72 if ( $data !==
false ) {
74 'width' => $data[
'width'],
75 'height' => $data[
'height']
88 $file = fopen( $this->mFilename,
'rb' );
90 $arr = unpack(
'a4magic/a4chunk/NchunkLength',
$header );
91 $chunk = $arr[
'chunk'];
92 $chunkLength = $arr[
'chunkLength'];
93 echo
"$chunk $chunkLength\n";
99 $start = ftell(
$file );
100 $secondary = fread(
$file, 4 );
101 echo str_repeat(
' ', $indent * 4 ) .
"($secondary)\n";
102 while ( ftell(
$file ) - $start < $length ) {
103 $chunkHeader = fread(
$file, 8 );
104 if ( $chunkHeader ==
'' ) {
107 $arr = unpack(
'a4chunk/NchunkLength', $chunkHeader );
108 $chunk = $arr[
'chunk'];
109 $chunkLength = $arr[
'chunkLength'];
110 echo str_repeat(
' ', $indent * 4 ) .
"$chunk $chunkLength\n";
112 if ( $chunk ==
'FORM' ) {
115 fseek(
$file, $chunkLength, SEEK_CUR );
116 if ( $chunkLength & 1 ) {
118 fseek(
$file, 1, SEEK_CUR );
125 Wikimedia\suppressWarnings();
126 $file = fopen( $this->mFilename,
'rb' );
127 Wikimedia\restoreWarnings();
128 if (
$file ===
false ) {
129 wfDebug( __METHOD__ .
": missing or failed file read" );
137 if ( strlen(
$header ) < 16 ) {
138 wfDebug( __METHOD__ .
": too short file header" );
140 $arr = unpack(
'a4magic/a4form/NformLength/a4subtype',
$header );
142 $subtype = $arr[
'subtype'];
143 if ( $arr[
'magic'] !=
'AT&T' ) {
144 wfDebug( __METHOD__ .
": not a DjVu file" );
145 } elseif ( $subtype ==
'DJVU' ) {
148 } elseif ( $subtype ==
'DJVM' ) {
152 wfDebug( __METHOD__ .
": unrecognized DJVU file type '{$arr['subtype']}'" );
165 $arr = unpack(
'a4chunk/Nlength',
$header );
167 return [ $arr[
'chunk'], $arr[
'length'] ];
172 fseek(
$file, $chunkLength, SEEK_CUR );
174 if ( ( $chunkLength & 1 ) && !feof(
$file ) ) {
176 fseek(
$file, 1, SEEK_CUR );
183 $start = ftell(
$file );
190 if ( $chunk ==
'FORM' ) {
191 $subtype = fread(
$file, 4 );
192 if ( $subtype ==
'DJVU' ) {
193 wfDebug( __METHOD__ .
": found first subpage" );
199 wfDebug( __METHOD__ .
": skipping '$chunk' chunk" );
202 }
while ( $length != 0 && !feof(
$file ) && ftell(
$file ) - $start < $formLength );
204 wfDebug( __METHOD__ .
": multi-page DJVU file contained no pages" );
211 if ( $chunk !=
'INFO' ) {
212 wfDebug( __METHOD__ .
": expected INFO chunk, got '$chunk'" );
218 wfDebug( __METHOD__ .
": INFO should be 9 or 10 bytes, found $length" );
222 $data = fread(
$file, $length );
223 if ( strlen( $data ) < $length ) {
224 wfDebug( __METHOD__ .
": INFO chunk cut off" );
237 # Newer files have rotation info in byte 10, but we don't use it yet.
240 'width' => $arr[
'width'],
241 'height' => $arr[
'height'],
242 'version' =>
"{$arr['major']}.{$arr['minor']}",
243 'resolution' => $arr[
'resolution'],
244 'gamma' => $arr[
'gamma'] / 10.0 ];
259 # djvudump is faster than djvutoxml (now abandoned) as of version 3.5
261 $cmd = Shell::escape(
$wgDjvuDump ) .
' ' . Shell::escape( $this->mFilename );
269 $cmd = Shell::escape(
$wgDjvuTxt ) .
' --detail=page ' . Shell::escape( $this->mFilename );
270 wfDebug( __METHOD__ .
": $cmd" );
272 $txt =
wfShellExec( $cmd, $retval, [], [
'memory' => self::DJVUTXT_MEMORY_LIMIT ] );
273 if ( $retval == 0 ) {
274 # Strip some control characters
275 # Ignore carriage returns
276 $txt = preg_replace(
"/\\\\013/",
"", $txt );
277 # Replace runs of OCR region separators with a single extra line break
278 $txt = preg_replace(
"/(?:\\\\(035|037))+/",
"\n", $txt );
281 /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*
"
282 ((?> # Text to match is composed of atoms of either:
283 \\\\. # - any escaped character
284 | # - any character different from " and \
288 | # Or page can be empty ; in
this case, djvutxt dumps ()
291 $txt = preg_replace_callback( $reg, [ $this,
'pageTextCallback' ], $txt );
292 $txt =
"<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt .
"</BODY>\n</DjVuTxt>\n";
293 $xml = preg_replace(
"/<DjVuXML>/",
"<mw-djvu><DjVuXML>", $xml, 1 ) .
303 # Get rid of invalid UTF-8, strip control characters
304 $val = htmlspecialchars( UtfNormal\Validator::cleanUp( stripcslashes(
$matches[1] ) ) );
305 $val = str_replace( [
"\n",
'�' ], [
' ',
'' ], $val );
306 return '<PAGE value="' . $val .
'" />';
315 if ( strval( $dump ) ==
'' ) {
320<?xml version=
"1.0" ?>
321<!DOCTYPE DjVuXML PUBLIC
"-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
327 $dump = str_replace(
"\r",
'', $dump );
328 $line = strtok( $dump,
"\n" );
331 if ( preg_match(
'/^( *)FORM:DJVU/',
$line, $m ) ) {
338 } elseif ( preg_match(
'/^( *)FORM:DJVM/',
$line, $m ) ) {
340 $parentLevel = strlen( $m[1] );
342 $line = strtok(
"\n" );
343 while (
$line !==
false ) {
344 $childLevel = strspn(
$line,
' ' );
345 if ( $childLevel <= $parentLevel ) {
350 if ( preg_match(
'/^ *DIRM.*indirect/',
$line ) ) {
351 wfDebug(
"Indirect multi-page DjVu document, bad for server!" );
355 if ( preg_match(
'/^ *FORM:DJVU/',
$line ) ) {
363 $line = strtok(
"\n" );
370 $xml .=
"</BODY>\n</DjVuXML>\n";
376 $parentLevel = strspn(
$line,
' ' );
377 $line = strtok(
"\n" );
380 while (
$line !==
false ) {
381 $childLevel = strspn(
$line,
' ' );
382 if ( $childLevel <= $parentLevel ) {
388 '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/',
396 #
'type' =>
'image/x.djvu',
402 Xml::element(
'PARAM', [
'name' =>
'DPI',
'value' => $m[3] ] ) .
"\n" .
403 Xml::element(
'PARAM', [
'name' =>
'GAMMA',
'value' => $m[4] ] ) .
"\n"
408 $line = strtok(
"\n" );
$wgDjvuTxt
Path of the djvutxt DJVU text extraction utility Enable this and $wgDjvuDump to enable text layer ext...
$wgDjvuDump
Path of the djvudump executable Enable this and $wgDjvuRenderer to enable djvu rendering example: $wg...
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
convertDumpToXML( $dump)
Hack to temporarily work around djvutoxml bug.
parseFormDjvu( $line, &$xml)
retrieveMetaData()
Return an XML string describing the DjVu image.
const DJVUTXT_MEMORY_LIMIT
Memory limit for the DjVu description software.
skipChunk( $file, $chunkLength)
dumpForm( $file, $length, $indent)
pageTextCallback( $matches)
dump()
For debugging; dump the IFF chunk structure.
isValid()
Check if the given file is indeed a valid DjVu image file.
getMultiPageInfo( $file, $formLength)
getImageSize()
Return width and height.
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.