Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
54 / 54 |
|
100.00% |
5 / 5 |
CRAP | |
100.00% |
1 / 1 |
SaneitizeLoop | |
100.00% |
54 / 54 |
|
100.00% |
5 / 5 |
13 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
run | |
100.00% |
37 / 37 |
|
100.00% |
1 / 1 |
7 | |||
createCheckerJob | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
checkMinLoopDuration | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
log | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Maintenance; |
4 | |
5 | use CirrusSearch\Job\CheckerJob; |
6 | use Elastica\Document; |
7 | use MediaWiki\MediaWikiServices; |
8 | use MediaWiki\Utils\MWTimestamp; |
9 | |
10 | /** |
11 | * Create saneitize jobs for a single execution of a saneitizer loop |
12 | * |
13 | * Maintains state in the job info pertaining to current position in |
14 | * the loop. The job info must be persisted between runs. |
15 | * |
16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by |
18 | * the Free Software Foundation; either version 2 of the License, or |
19 | * (at your option) any later version. |
20 | * |
21 | * This program is distributed in the hope that it will be useful, |
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 | * GNU General Public License for more details. |
25 | * |
26 | * You should have received a copy of the GNU General Public License along |
27 | * with this program; if not, write to the Free Software Foundation, Inc., |
28 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
29 | * http://www.gnu.org/copyleft/gpl.html |
30 | */ |
31 | |
32 | class SaneitizeLoop { |
33 | /** @var string Name of the saneitizer profile to use in created jobs */ |
34 | private $profileName; |
35 | |
36 | /** @var int The frequency, in seconds, that the saneitize loop is executed */ |
37 | private $pushJobFreq; |
38 | |
39 | /** @var int The number of pages to include per job */ |
40 | private $chunkSize; |
41 | |
42 | /** @var int Minimum number of seconds between loop restarts */ |
43 | private $minLoopDuration; |
44 | |
45 | /** @var callable */ |
46 | private $logger; |
47 | /** |
48 | * @var \JobQueueGroup |
49 | */ |
50 | private $jobQueueGroup; |
51 | |
52 | /** |
53 | * @param string $profileName Name of the saneitizer profile to use in created jobs |
54 | * @param int $pushJobFreq The frequency, in seconds, that the saneitize loop is executed |
55 | * @param int $chunkSize The number of pages to include per job |
56 | * @param int $minLoopDuration Minimum number of seconds between loop restarts |
57 | * @param callable|null $logger Callable accepting 2 arguments, first a log |
58 | * message and second either a channel name or null. |
59 | * @param \JobQueueGroup|null $jobQueueGroup |
60 | */ |
61 | public function __construct( |
62 | $profileName, $pushJobFreq, $chunkSize, $minLoopDuration, $logger = null, \JobQueueGroup $jobQueueGroup = null |
63 | ) { |
64 | $this->profileName = $profileName; |
65 | $this->pushJobFreq = $pushJobFreq; |
66 | $this->chunkSize = $chunkSize; |
67 | $this->minLoopDuration = $minLoopDuration; |
68 | $this->logger = $logger ?? static function ( $msg, $channel = null ) { |
69 | }; |
70 | $this->jobQueueGroup = $jobQueueGroup ?? MediaWikiServices::getInstance()->getJobQueueGroup(); |
71 | } |
72 | |
73 | /** |
74 | * Generate jobs for one run of a saneitize loop |
75 | * |
76 | * @param Document $jobInfo |
77 | * @param int $numJobs The number of jobs to create |
78 | * @param int $minId Minimum page_id on the wiki |
79 | * @param int $maxId Maximum page_id on the wiki |
80 | * @return CheckerJob[] The created jobs. May be less than requested. |
81 | */ |
82 | public function run( Document $jobInfo, $numJobs, $minId, $maxId ) { |
83 | // @var int |
84 | $from = $jobInfo->get( 'sanitize_job_id_offset' ); |
85 | $lastLoop = $jobInfo->get( 'sanitize_job_last_loop' ); |
86 | // ternary is BC for when loop_id didn't exist. |
87 | $loopId = $jobInfo->has( 'sanitize_job_loop_id' ) ? $jobInfo->get( 'sanitize_job_loop_id' ) : 0; |
88 | $jobsSent = $jobInfo->get( 'sanitize_job_jobs_sent' ); |
89 | $jobsSentTotal = $jobInfo->get( 'sanitize_job_jobs_sent_total' ); |
90 | $jobsSentCurLoop = 0; |
91 | $idsSent = $jobInfo->get( 'sanitize_job_ids_sent' ); |
92 | $idsSentTotal = $jobInfo->get( 'sanitize_job_ids_sent_total' ); |
93 | $jobs = []; |
94 | for ( $i = 0; $i < $numJobs; $i++ ) { |
95 | if ( $from <= $minId || $from >= $maxId ) { |
96 | // The previous loop has completed. Wait until that loop |
97 | // has taken the minimum required duration before starting |
98 | // the next one. |
99 | if ( !$this->checkMinLoopDuration( $lastLoop ) ) { |
100 | break; |
101 | } |
102 | $from = $minId; |
103 | $idsSent = 0; |
104 | $jobsSent = 0; |
105 | $lastLoop = MWTimestamp::time(); |
106 | $loopId += 1; |
107 | } |
108 | $to = min( $from + $this->chunkSize - 1, $maxId ); |
109 | $jobs[] = $this->createCheckerJob( $from, $to, $jobInfo->get( 'sanitize_job_cluster' ), $loopId ); |
110 | $jobsSent++; |
111 | $jobsSentTotal++; |
112 | $jobsSentCurLoop++; |
113 | $idsSent += $to - $from; |
114 | $idsSentTotal += $to - $from; |
115 | $from = $to + 1; |
116 | } |
117 | |
118 | if ( $jobs ) { |
119 | $jobInfo->set( 'sanitize_job_loop_id', $loopId ); |
120 | $jobInfo->set( 'sanitize_job_last_loop', $lastLoop ); |
121 | $jobInfo->set( 'sanitize_job_id_offset', $from ); |
122 | $jobInfo->set( 'sanitize_job_jobs_sent', $jobsSent ); |
123 | $jobInfo->set( 'sanitize_job_jobs_sent_total', $jobsSentTotal ); |
124 | $jobInfo->set( 'sanitize_job_ids_sent', $idsSent ); |
125 | $jobInfo->set( 'sanitize_job_ids_sent_total', $idsSentTotal ); |
126 | $this->log( "Created $jobsSentCurLoop jobs, setting from offset to $from.\n" ); |
127 | } else { |
128 | $this->log( "No jobs created.\n" ); |
129 | } |
130 | |
131 | return $jobs; |
132 | } |
133 | |
134 | /** |
135 | * @param int $from |
136 | * @param int $to |
137 | * @param string|null $cluster |
138 | * @param int $loopId |
139 | * @return CheckerJob |
140 | */ |
141 | private function createCheckerJob( $from, $to, $cluster, $loopId ) { |
142 | $delay = mt_rand( 0, $this->pushJobFreq ); |
143 | $this->log( "Creating CheckerJob( $from, $to, $delay, {$this->profileName}, $cluster, $loopId )\n" ); |
144 | return CheckerJob::build( $from, $to, $delay, $this->profileName, $cluster, $loopId, $this->jobQueueGroup ); |
145 | } |
146 | |
147 | /** |
148 | * @param int|null $lastLoop last loop start time |
149 | * @return bool true if minLoopDuration is not reached false otherwize |
150 | */ |
151 | private function checkMinLoopDuration( $lastLoop ) { |
152 | if ( $lastLoop !== null && ( MWTimestamp::time() - $lastLoop ) < $this->minLoopDuration ) { |
153 | $date = date( 'Y-m-d H:i:s', $lastLoop ); |
154 | $newLoop = date( 'Y-m-d H:i:s', $lastLoop + $this->minLoopDuration ); |
155 | $this->log( "Last loop ended at $date, new jobs will be sent when min_loop_duration is reached at $newLoop\n" ); |
156 | return false; |
157 | } |
158 | return true; |
159 | } |
160 | |
161 | /** |
162 | * @param string $msg |
163 | * @param string|null $channel |
164 | */ |
165 | private function log( $msg, $channel = null ) { |
166 | call_user_func( $this->logger, $msg, $channel ); |
167 | } |
168 | } |