Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 61 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
ImportOpenAlex | |
0.00% |
0 / 58 |
|
0.00% |
0 / 3 |
240 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
56 | |||
readline | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
56 |
1 | <?php |
2 | |
3 | /** |
4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, write to the Free Software Foundation, Inc., |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | * http://www.gnu.org/copyleft/gpl.html |
18 | * |
19 | * @ingroup Maintenance |
20 | */ |
21 | |
22 | use MediaWiki\Extension\MathSearch\Graph\Map; |
23 | |
24 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
25 | |
26 | class ImportOpenAlex extends Maintenance { |
27 | |
28 | /** @var string */ |
29 | private string $filename; |
30 | |
31 | public function __construct() { |
32 | parent::__construct(); |
33 | $this->addDescription( "Batch imports OpenAlex data from a CSV file." ); |
34 | $this->addArg( 'file', 'The file to be read', true ); |
35 | $this->setBatchSize( 100 ); |
36 | $this->requireExtension( 'MathSearch' ); |
37 | } |
38 | |
39 | public function execute() { |
40 | $this->filename = $this->getArg( 0 ); |
41 | if ( !is_file( $this->filename ) ) { |
42 | $this->output( "{$this->filename} is not a file.\n" ); |
43 | exit( 1 ); |
44 | } |
45 | $handle = fopen( $this->filename, 'r' ); |
46 | $columns = fgetcsv( $handle ); |
47 | $table = []; |
48 | if ( $columns === null ) { |
49 | throw new Exception( "Problem processing the csv file." ); |
50 | } |
51 | $line = fgetcsv( $handle, 0, ',', '"', '' ); |
52 | $graphMap = new Map(); |
53 | $segment = 0; |
54 | $jobname = 'openalex' . date( 'ymdhms' ); |
55 | while ( $line !== false ) { |
56 | try { |
57 | $table += $this->readline( $line, $columns ); |
58 | if ( count( $table ) > $this->getBatchSize() ) { |
59 | $this->output( "Push jobs to segment $segment.\n" ); |
60 | $graphMap->pushJob( |
61 | $table, |
62 | $segment++, |
63 | 'MediaWiki\Extension\MathSearch\Graph\Job\OpenAlex', |
64 | [ 'jobname' => $jobname ] ); |
65 | $table = []; |
66 | } |
67 | } catch ( Throwable $e ) { |
68 | $this->output( "Error processing line: \n" . |
69 | var_export( implode( ',', $line ), true ) . "\nError:" . |
70 | $e->getMessage() . "\n" ); |
71 | } |
72 | $line = fgetcsv( $handle, 0, ',', '"', '' ); |
73 | } |
74 | if ( count( $table ) ) { |
75 | $graphMap->pushJob( |
76 | $table, |
77 | $segment, |
78 | 'MediaWiki\Extension\MathSearch\Graph\Job\OpenAlex', |
79 | [ 'jobname' => $jobname ] ); |
80 | } |
81 | $this->output( "Pushed last $segment.\n" ); |
82 | |
83 | fclose( $handle ); |
84 | } |
85 | |
86 | private function readline( array $line, array $columns ): array { |
87 | global $wgMathOpenAlexQIdMap; |
88 | $pDe = $wgMathOpenAlexQIdMap['document']; |
89 | $pUrl = $wgMathOpenAlexQIdMap['prime_landing_page_url']; |
90 | |
91 | $fields = array_combine( $columns, $line ); |
92 | $data = []; |
93 | foreach ( $wgMathOpenAlexQIdMap as $oa_name => $pid ) { |
94 | if ( !array_key_exists( $pid, $data ) ) { |
95 | $field = $fields[$oa_name]; |
96 | if ( $field ) { |
97 | if ( str_starts_with( $field, 'https://' ) && $pid !== $pUrl ) { |
98 | $data[$pid] = ltrim( parse_url( $field, PHP_URL_PATH ), '/' ); |
99 | } else { |
100 | $data[$pid] = $field; |
101 | } |
102 | } |
103 | } |
104 | } |
105 | if ( !array_key_exists( $pDe, $data ) ) { |
106 | throw new Exception( "No document field found." ); |
107 | } |
108 | $de = (int)$data[$pDe]; |
109 | // save some bytes |
110 | unset( $data[$pDe] ); |
111 | return [ $de => $data ]; |
112 | } |
113 | |
114 | } |
115 | |
116 | $maintClass = ImportOpenAlex::class; |
117 | require_once RUN_MAINTENANCE_IF_MAIN; |