Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
86.21% |
50 / 58 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
PopulateDatabase | |
94.34% |
50 / 53 |
|
33.33% |
1 / 3 |
11.02 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
94.59% |
35 / 37 |
|
0.00% |
0 / 1 |
9.01 | |||
processScores | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
1.00 |
1 | <?php |
2 | |
3 | namespace ORES\Maintenance; |
4 | |
5 | use MediaWiki\Maintenance\Maintenance; |
6 | use ORES\Services\ORESServices; |
7 | use ORES\Services\ScoreFetcher; |
8 | use ORES\Storage\ScoreStorage; |
9 | |
10 | require_once getenv( 'MW_INSTALL_PATH' ) !== false |
11 | ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php' |
12 | : __DIR__ . '/../../../maintenance/Maintenance.php'; |
13 | |
14 | /** |
15 | * @ingroup Maintenance |
16 | */ |
17 | class PopulateDatabase extends Maintenance { |
18 | |
19 | /** |
20 | * @var int|null |
21 | */ |
22 | private $apiBatchSize; |
23 | |
24 | /** |
25 | * @var int|null |
26 | */ |
27 | private $revisionLimit; |
28 | |
29 | public function __construct() { |
30 | parent::__construct(); |
31 | |
32 | $this->requireExtension( 'ORES' ); |
33 | $this->addDescription( 'Populate ores_classification table by scoring ' . |
34 | 'the latest edits in recentchanges table that are not scored' ); |
35 | $this->addOption( 'number', 'Number of revisions to be scored', false, true, 'n' ); |
36 | $this->addOption( 'apibatch', 'Batch size for the API request', false, true ); |
37 | $this->setBatchSize( 5000 ); |
38 | } |
39 | |
40 | public function execute() { |
41 | global $wgOresExcludeBots, $wgOresRevisionsPerBatch; |
42 | |
43 | $scoreFetcher = ScoreFetcher::instance(); |
44 | /** @var ScoreStorage $scoreStorage */ |
45 | $scoreStorage = ORESServices::getScoreStorage(); |
46 | $batchSize = $this->getBatchSize(); |
47 | $this->revisionLimit = $this->getOption( 'number', 1000 ); |
48 | $this->apiBatchSize = $this->getOption( 'apibatch', $wgOresRevisionsPerBatch ?: 30 ); |
49 | |
50 | $latestRcId = 0; |
51 | $dbr = $this->getReplicaDB(); |
52 | |
53 | $count = 0; |
54 | while ( $count < $this->revisionLimit ) { |
55 | $conditions = [ 'oresc_id' => null, 'rc_type' => [ RC_EDIT, RC_NEW ] ]; |
56 | |
57 | if ( $wgOresExcludeBots === true ) { |
58 | $conditions['rc_bot'] = 0; |
59 | } |
60 | if ( $latestRcId ) { |
61 | $conditions[] = $dbr->expr( 'rc_id', '<', $latestRcId ); |
62 | } |
63 | |
64 | $res = $dbr->newSelectQueryBuilder() |
65 | ->select( [ 'rc_id', 'rc_this_oldid' ] ) |
66 | ->from( 'recentchanges' ) |
67 | ->leftJoin( 'ores_classification', null, 'oresc_rev = rc_this_oldid' ) |
68 | ->where( $conditions ) |
69 | ->orderBy( 'rc_id DESC' ) |
70 | ->limit( $batchSize ) |
71 | ->caller( __METHOD__ ) |
72 | ->fetchResultSet(); |
73 | |
74 | $pack = []; |
75 | foreach ( $res as $row ) { |
76 | $pack[] = $row->rc_this_oldid; |
77 | if ( count( $pack ) % $this->apiBatchSize === 0 ) { |
78 | $this->processScores( $pack, $scoreFetcher, $scoreStorage ); |
79 | $pack = []; |
80 | } |
81 | $latestRcId = $row->rc_id; |
82 | } |
83 | if ( $pack !== [] ) { |
84 | $this->processScores( $pack, $scoreFetcher, $scoreStorage ); |
85 | } |
86 | |
87 | $count += $batchSize; |
88 | $this->waitForReplication(); |
89 | |
90 | if ( $res->numRows() < $batchSize ) { |
91 | break; |
92 | } |
93 | } |
94 | |
95 | $this->output( "Finished processing the revisions\n" ); |
96 | } |
97 | |
98 | /** |
99 | * Process several edits and store the scores in the database |
100 | * |
101 | * @param int[] $revs Array of revision IDs |
102 | * @param ScoreFetcher $scoreFetcher |
103 | * @param ScoreStorage $scoreStorage service to store scores in persistence layer |
104 | */ |
105 | private function processScores( |
106 | array $revs, |
107 | ScoreFetcher $scoreFetcher, |
108 | ScoreStorage $scoreStorage |
109 | ) { |
110 | $size = count( $revs ); |
111 | $this->output( "Processing $size revisions\n" ); |
112 | |
113 | $scores = $scoreFetcher->getScores( $revs ); |
114 | $scoreStorage->storeScores( |
115 | $scores, |
116 | function ( $mssg, $revision ) { |
117 | $this->output( "ScoreFetcher errored for $revision: $mssg\n" ); |
118 | } |
119 | ); |
120 | } |
121 | |
122 | } |
123 | |
124 | $maintClass = PopulateDatabase::class; |
125 | require_once RUN_MAINTENANCE_IF_MAIN; |