120 global $wgPdfInfo, $wgPdftoText;
128 '-enc',
'UTF-8', # Report metadata as UTF-8 text...
129 '-meta', # Report XMP metadata
132 $resultMeta = Shell::command( $cmdMeta )
137 '-enc',
'UTF-8', # Report metadata as UTF-8 text...
138 '-l',
'9999999', # Report page sizes
for all pages
141 $resultPages = Shell::command( $cmdPages )
144 $dump = $resultMeta->getStdout() . $resultPages->getStdout();
151 if ( isset( $wgPdftoText ) ) {
153 $result = Shell::command( $cmd )
155 $retval = $result->getExitCode();
156 $txt = $result->getStdout();
157 if ( $retval == 0 ) {
158 $txt = str_replace(
"\r\n",
"\n", $txt );
159 $pages = explode(
"\f", $txt );
160 foreach ( $pages as $page => $pageText ) {
163 $pages[$page] = Validator::cleanUp( $pageText );
165 $data[
'text'] = $pages;
231 foreach ( $data as $key => $val ) {
234 $items[
'ObjectName'] = $val;
237 $items[
'ImageDescription'] = $val;
243 $keyList = array_filter( explode(
' ', $val ) );
244 if ( count( $keyList ) > 0 ) {
245 $items[
'Keywords'] = $keyList;
249 $items[
'Artist'] = $val;
254 $items[
'Software'] = $val;
258 $items[
'pdf-Producer'] = $val;
264 $items[
'DateTime'] = $timestamp;
270 $items[
'DateTimeDigitized'] = $timestamp;
280 $items[
'pdf-Version'] = $val;
288 $items[
'pdf-Encrypted'] = $val;
299 foreach ( $val as $page ) {
300 if ( isset( $page[
'Page size'] ) ) {
301 $pageSizes[$page[
'Page size']] =
true;
305 $pageSizeArray = array_keys( $pageSizes );
306 if ( count( $pageSizeArray ) > 0 ) {
307 $items[
'pdf-PageSize'] = $pageSizeArray;
313 $meta->addMetadata( $items,
'native' );
315 if ( isset( $data[
'xmp'] ) && XMPReader::isSupported() ) {
318 $xmp =
new XMPReader( LoggerFactory::getInstance(
'XMP' ) );
319 $xmp->parse( $data[
'xmp'] );
320 $xmpRes = $xmp->getResults();
321 foreach ( $xmpRes as
$type => $xmpSection ) {
322 $meta->addMetadata( $xmpSection,
$type );
325 unset( $data[
'xmp'] );
326 $data[
'mergedMetadata'] = $meta->getMetadataArray();