Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
DumpReader
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 5
182
0.00% covered (danger)
0.00%
0 / 1
 getLog
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 __construct
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 normalizeFilename
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 run
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
72
 addJob
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace MediaWiki\Extension\MathSearch\StackExchange;
4
5use MediaWiki\Logger\LoggerFactory;
6use MediaWiki\MediaWikiServices;
7
8class DumpReader {
9
10    /**
11     * @var XMLReader
12     */
13    private $file;
14    /**
15     * @var string
16     */
17    private $fileName;
18    /** @var string */
19    private $errPath;
20    /** @var int */
21    private $part = 0;
22
23    private static function getLog() {
24        return LoggerFactory::getInstance( 'MathSearch' );
25    }
26
27    /**
28     * @param \SplFileObject $file
29     * @param string $errPath
30     */
31    public function __construct( $file, $errPath ) {
32        $this->file = new XMLReader();
33        XMLReader::open( $file->getRealPath() );
34        $this->normalizeFilename( $file->getFilename() );
35        $this->errPath = $errPath;
36    }
37
38    private function normalizeFilename( $fileName ) {
39        // some posts file from arq20 math task were modified with additional version
40        // information by appending either .V1.0 or _V1_0
41        $fileparts = preg_split( "/[._]/", $fileName );
42        $normalized_fn = strtolower( $fileparts[0] );
43        $this->fileName = $normalized_fn;
44        self::getLog()->debug( "'$fileName' is normalized to '$normalized_fn'." );
45    }
46
47    public function run() {
48        $batchSize = 1000;
49        $rows = [];
50        $xml = $this->file;
51        while ( $xml->read() ) {
52            if ( $xml->name === 'row' && $xml->nodeType == XMLReader::ELEMENT ) {
53                $attribs = [];
54                if ( $xml->hasAttributes ) {
55                    while ( $xml->moveToNextAttribute() ) {
56                        $attribs[$xml->name] = $xml->value;
57                    }
58                    $rows[] = $attribs;
59                    if ( count( $rows ) >= $batchSize ) {
60                        $this->addJob( $rows );
61                        $rows = [];
62                    }
63                }
64            } elseif ( $xml->nodeType == XMLReader::ELEMENT ) {
65                self::getLog()->info( "Skip element: {line}", [ 'line' => $xml->name ] );
66            }
67        }
68        $this->addJob( $rows );
69    }
70
71    /**
72     * @param array $rows
73     */
74    private function addJob( array $rows ) {
75        $part = ++$this->part;
76        $title = Title::newFromText( "SE reader '$this->fileName' part $part" );
77        $job = new LineReaderJob( $title, [
78            'rows' => $rows,
79            'fileName' => $this->fileName,
80            'errFile' => $this->errPath . "/$this->fileName-$part-err.xml",
81        ] );
82        if ( method_exists( MediaWikiServices::class, 'getJobQueueGroup' ) ) {
83            // MW 1.37+
84            MediaWikiServices::getInstance()->getJobQueueGroup()->push( $job );
85        } else {
86            \JobQueueGroup::singleton()->push( $job );
87        }
88    }
89}