24 use UtfNormal\Validator;
25 use Wikimedia\XMPReader\Reader as XMPReader;
43 $this->mFilename = $filename;
57 $data = $this->retrieveMetadata();
61 $width = $size[
'width'];
62 $height = $size[
'height'];
63 return [ $width, $height,
'Pdf',
64 "width=\"$width\" height=\"$height\"" ];
75 global $wgPdfHandlerDpi;
77 if ( isset( $data[
'pages'][$page][
'Page size'] ) ) {
78 $o = $data[
'pages'][$page][
'Page size'];
79 } elseif ( isset( $data[
'Page size'] ) ) {
80 $o = $data[
'Page size'];
86 if ( isset( $data[
'pages'][$page][
'Page rot'] ) ) {
87 $r = $data[
'pages'][$page][
'Page rot'];
88 } elseif ( isset( $data[
'Page rot'] ) ) {
89 $r = $data[
'Page rot'];
93 $size = explode(
'x', $o, 2 );
96 $width = intval( trim( $size[0] ) / 72 * $wgPdfHandlerDpi );
97 $height = explode(
' ', trim( $size[1] ), 2 );
98 $height = intval( trim( $height[0] ) / 72 * $wgPdfHandlerDpi );
99 if ( ( $r / 90 ) & 1 ) {
120 global $wgPdfInfo, $wgPdftoText;
128 '-enc',
'UTF-8', # Report metadata as UTF-8 text...
129 '-meta', # Report XMP metadata
132 $resultMeta = Shell::command( $cmdMeta )
137 '-enc',
'UTF-8', # Report metadata as UTF-8 text...
138 '-l',
'9999999', # Report page sizes
for all pages
141 $resultPages = Shell::command( $cmdPages )
144 $dump = $resultMeta->getStdout() . $resultPages->getStdout();
151 if ( isset( $wgPdftoText ) ) {
153 $result = Shell::command( $cmd )
155 $retval = $result->getExitCode();
156 $txt = $result->getStdout();
157 if ( $retval == 0 ) {
158 $txt = str_replace(
"\r\n",
"\n", $txt );
159 $pages = explode(
"\f", $txt );
160 foreach ( $pages as $page => $pageText ) {
163 $pages[$page] = Validator::cleanUp( $pageText );
165 $data[
'text'] = $pages;
176 if ( strval( $dump ) ==
'' ) {
180 $lines = explode(
"\n", $dump );
193 $data[
'xmp'] .=
"\n$line";
196 $bits = explode(
':',
$line, 2 );
197 if ( count( $bits ) > 1 ) {
198 $key = trim( $bits[0] );
199 if ( $key ===
'Metadata' ) {
204 $value = trim( $bits[1] );
208 if ( preg_match(
'/^Page +(\d+) (size|rot)$/', $key,
$matches ) ) {
209 $data[
'pages'][
$matches[1]][
$matches[2] ==
'size' ?
'Page size' :
'Page rot'] = $value;
211 $data[$key] = $value;
231 foreach ( $data as $key => $val ) {
234 $items[
'ObjectName'] = $val;
237 $items[
'ImageDescription'] = $val;
243 $keyList = array_filter( explode(
' ', $val ) );
244 if ( count( $keyList ) > 0 ) {
245 $items[
'Keywords'] = $keyList;
249 $items[
'Artist'] = $val;
254 $items[
'Software'] = $val;
258 $items[
'pdf-Producer'] = $val;
264 $items[
'DateTime'] = $timestamp;
270 $items[
'DateTimeDigitized'] = $timestamp;
280 $items[
'pdf-Version'] = $val;
288 $items[
'pdf-Encrypted'] = $val;
299 foreach ( $val as $page ) {
300 if ( isset( $page[
'Page size'] ) ) {
301 $pageSizes[$page[
'Page size']] =
true;
305 $pageSizeArray = array_keys( $pageSizes );
306 if ( count( $pageSizeArray ) > 0 ) {
307 $items[
'pdf-PageSize'] = $pageSizeArray;
313 $meta->addMetadata( $items,
'native' );
315 if ( isset( $data[
'xmp'] ) && XMPReader::isSupported() ) {
318 $xmp =
new XMPReader( LoggerFactory::getInstance(
'XMP' ) );
319 $xmp->parse( $data[
'xmp'] );
320 $xmpRes = $xmp->getResults();
321 foreach ( $xmpRes as
$type => $xmpSection ) {
322 $meta->addMetadata( $xmpSection,
$type );
325 unset( $data[
'xmp'] );
326 $data[
'mergedMetadata'] = $meta->getMetadataArray();