MediaWiki REL1_28
JobRunner.php
Go to the documentation of this file.
1<?php
26use Liuggio\StatsdClient\Factory\StatsdDataFactory;
27use Psr\Log\LoggerAwareInterface;
28use Psr\Log\LoggerInterface;
29use Wikimedia\ScopedCallback;
30
37class JobRunner implements LoggerAwareInterface {
39 protected $debug;
40
44 protected $logger;
45
46 const MAX_ALLOWED_LAG = 3; // abort if more than this much DB lag is present
47 const LAG_CHECK_PERIOD = 1.0; // check replica DB lag this many seconds
48 const ERROR_BACKOFF_TTL = 1; // seconds to back off a queue due to errors
49
53 public function setDebugHandler( $debug ) {
54 $this->debug = $debug;
55 }
56
61 public function setLogger( LoggerInterface $logger ) {
62 $this->logger = $logger;
63 }
64
68 public function __construct( LoggerInterface $logger = null ) {
69 if ( $logger === null ) {
70 $logger = LoggerFactory::getInstance( 'runJobs' );
71 }
72 $this->setLogger( $logger );
73 }
74
99 public function run( array $options ) {
101
102 $response = [ 'jobs' => [], 'reached' => 'none-ready' ];
103
104 $type = isset( $options['type'] ) ? $options['type'] : false;
105 $maxJobs = isset( $options['maxJobs'] ) ? $options['maxJobs'] : false;
106 $maxTime = isset( $options['maxTime'] ) ? $options['maxTime'] : false;
107 $noThrottle = isset( $options['throttle'] ) && !$options['throttle'];
108
109 // Bail if job type is invalid
110 if ( $type !== false && !isset( $wgJobClasses[$type] ) ) {
111 $response['reached'] = 'none-possible';
112 return $response;
113 }
114 // Bail out if DB is in read-only mode
115 if ( wfReadOnly() ) {
116 $response['reached'] = 'read-only';
117 return $response;
118 }
119
120 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
121 // Bail out if there is too much DB lag.
122 // This check should not block as we want to try other wiki queues.
123 list( , $maxLag ) = $lbFactory->getMainLB( wfWikiID() )->getMaxLag();
124 if ( $maxLag >= self::MAX_ALLOWED_LAG ) {
125 $response['reached'] = 'replica-lag-limit';
126 return $response;
127 }
128
129 // Flush any pending DB writes for sanity
130 $lbFactory->commitAll( __METHOD__ );
131
132 // Catch huge single updates that lead to replica DB lag
133 $trxProfiler = Profiler::instance()->getTransactionProfiler();
134 $trxProfiler->setLogger( LoggerFactory::getInstance( 'DBPerformance' ) );
135 $trxProfiler->setExpectations( $wgTrxProfilerLimits['JobRunner'], __METHOD__ );
136
137 // Some jobs types should not run until a certain timestamp
138 $backoffs = []; // map of (type => UNIX expiry)
139 $backoffDeltas = []; // map of (type => seconds)
140 $wait = 'wait'; // block to read backoffs the first time
141
142 $group = JobQueueGroup::singleton();
143 $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
144 $jobsPopped = 0;
145 $timeMsTotal = 0;
146 $startTime = microtime( true ); // time since jobs started running
147 $lastCheckTime = 1; // timestamp of last replica DB check
148 do {
149 // Sync the persistent backoffs with concurrent runners
150 $backoffs = $this->syncBackoffDeltas( $backoffs, $backoffDeltas, $wait );
151 $blacklist = $noThrottle ? [] : array_keys( $backoffs );
152 $wait = 'nowait'; // less important now
153
154 if ( $type === false ) {
155 $job = $group->pop(
158 $blacklist
159 );
160 } elseif ( in_array( $type, $blacklist ) ) {
161 $job = false; // requested queue in backoff state
162 } else {
163 $job = $group->pop( $type ); // job from a single queue
164 }
165 $lbFactory->commitMasterChanges( __METHOD__ ); // flush any JobQueueDB writes
166
167 if ( $job ) { // found a job
168 ++$jobsPopped;
169 $popTime = time();
170 $jType = $job->getType();
171
172 WebRequest::overrideRequestId( $job->getRequestId() );
173
174 // Back off of certain jobs for a while (for throttling and for errors)
175 $ttw = $this->getBackoffTimeToWait( $job );
176 if ( $ttw > 0 ) {
177 // Always add the delta for other runners in case the time running the
178 // job negated the backoff for each individually but not collectively.
179 $backoffDeltas[$jType] = isset( $backoffDeltas[$jType] )
180 ? $backoffDeltas[$jType] + $ttw
181 : $ttw;
182 $backoffs = $this->syncBackoffDeltas( $backoffs, $backoffDeltas, $wait );
183 }
184
185 $info = $this->executeJob( $job, $lbFactory, $stats, $popTime );
186 if ( $info['status'] !== false || !$job->allowRetries() ) {
187 $group->ack( $job ); // succeeded or job cannot be retried
188 $lbFactory->commitMasterChanges( __METHOD__ ); // flush any JobQueueDB writes
189 }
190
191 // Back off of certain jobs for a while (for throttling and for errors)
192 if ( $info['status'] === false && mt_rand( 0, 49 ) == 0 ) {
193 $ttw = max( $ttw, self::ERROR_BACKOFF_TTL ); // too many errors
194 $backoffDeltas[$jType] = isset( $backoffDeltas[$jType] )
195 ? $backoffDeltas[$jType] + $ttw
196 : $ttw;
197 }
198
199 $response['jobs'][] = [
200 'type' => $jType,
201 'status' => ( $info['status'] === false ) ? 'failed' : 'ok',
202 'error' => $info['error'],
203 'time' => $info['timeMs']
204 ];
205 $timeMsTotal += $info['timeMs'];
206
207 // Break out if we hit the job count or wall time limits...
208 if ( $maxJobs && $jobsPopped >= $maxJobs ) {
209 $response['reached'] = 'job-limit';
210 break;
211 } elseif ( $maxTime && ( microtime( true ) - $startTime ) > $maxTime ) {
212 $response['reached'] = 'time-limit';
213 break;
214 }
215
216 // Don't let any of the main DB replica DBs get backed up.
217 // This only waits for so long before exiting and letting
218 // other wikis in the farm (on different masters) get a chance.
219 $timePassed = microtime( true ) - $lastCheckTime;
220 if ( $timePassed >= self::LAG_CHECK_PERIOD || $timePassed < 0 ) {
221 try {
222 $lbFactory->waitForReplication( [
223 'ifWritesSince' => $lastCheckTime,
224 'timeout' => self::MAX_ALLOWED_LAG
225 ] );
226 } catch ( DBReplicationWaitError $e ) {
227 $response['reached'] = 'replica-lag-limit';
228 break;
229 }
230 $lastCheckTime = microtime( true );
231 }
232 // Don't let any queue replica DBs/backups fall behind
233 if ( $jobsPopped > 0 && ( $jobsPopped % 100 ) == 0 ) {
234 $group->waitForBackups();
235 }
236
237 // Bail if near-OOM instead of in a job
238 if ( !$this->checkMemoryOK() ) {
239 $response['reached'] = 'memory-limit';
240 break;
241 }
242 }
243 } while ( $job ); // stop when there are no jobs
244
245 // Sync the persistent backoffs for the next runJobs.php pass
246 if ( $backoffDeltas ) {
247 $this->syncBackoffDeltas( $backoffs, $backoffDeltas, 'wait' );
248 }
249
250 $response['backoffs'] = $backoffs;
251 $response['elapsed'] = $timeMsTotal;
252
253 return $response;
254 }
255
263 private function executeJob( Job $job, LBFactory $lbFactory, $stats, $popTime ) {
264 $jType = $job->getType();
265 $msg = $job->toString() . " STARTING";
266 $this->logger->debug( $msg );
267 $this->debugCallback( $msg );
268
269 // Run the job...
270 $rssStart = $this->getMaxRssKb();
271 $jobStartTime = microtime( true );
272 try {
273 $fnameTrxOwner = get_class( $job ) . '::run'; // give run() outer scope
274 $lbFactory->beginMasterChanges( $fnameTrxOwner );
275 $status = $job->run();
276 $error = $job->getLastError();
277 $this->commitMasterChanges( $lbFactory, $job, $fnameTrxOwner );
278 // Important: this must be the last deferred update added (T100085, T154425)
279 DeferredUpdates::addCallableUpdate( [ JobQueueGroup::class, 'pushLazyJobs' ] );
280 // Run any deferred update tasks; doUpdates() manages transactions itself
281 DeferredUpdates::doUpdates();
282 } catch ( Exception $e ) {
283 MWExceptionHandler::rollbackMasterChangesAndLog( $e );
284 $status = false;
285 $error = get_class( $e ) . ': ' . $e->getMessage();
286 }
287 // Always attempt to call teardown() even if Job throws exception.
288 try {
289 $job->teardown( $status );
290 } catch ( Exception $e ) {
291 MWExceptionHandler::logException( $e );
292 }
293
294 // Commit all outstanding connections that are in a transaction
295 // to get a fresh repeatable read snapshot on every connection.
296 // Note that jobs are still responsible for handling replica DB lag.
297 $lbFactory->flushReplicaSnapshots( __METHOD__ );
298 // Clear out title cache data from prior snapshots
299 MediaWikiServices::getInstance()->getLinkCache()->clear();
300 $timeMs = intval( ( microtime( true ) - $jobStartTime ) * 1000 );
301 $rssEnd = $this->getMaxRssKb();
302
303 // Record how long jobs wait before getting popped
304 $readyTs = $job->getReadyTimestamp();
305 if ( $readyTs ) {
306 $pickupDelay = max( 0, $popTime - $readyTs );
307 $stats->timing( 'jobqueue.pickup_delay.all', 1000 * $pickupDelay );
308 $stats->timing( "jobqueue.pickup_delay.$jType", 1000 * $pickupDelay );
309 }
310 // Record root job age for jobs being run
311 $rootTimestamp = $job->getRootJobParams()['rootJobTimestamp'];
312 if ( $rootTimestamp ) {
313 $age = max( 0, $popTime - wfTimestamp( TS_UNIX, $rootTimestamp ) );
314 $stats->timing( "jobqueue.pickup_root_age.$jType", 1000 * $age );
315 }
316 // Track the execution time for jobs
317 $stats->timing( "jobqueue.run.$jType", $timeMs );
318 // Track RSS increases for jobs (in case of memory leaks)
319 if ( $rssStart && $rssEnd ) {
320 $stats->updateCount( "jobqueue.rss_delta.$jType", $rssEnd - $rssStart );
321 }
322
323 if ( $status === false ) {
324 $msg = $job->toString() . " t=$timeMs error={$error}";
325 $this->logger->error( $msg );
326 $this->debugCallback( $msg );
327 } else {
328 $msg = $job->toString() . " t=$timeMs good";
329 $this->logger->info( $msg );
330 $this->debugCallback( $msg );
331 }
332
333 return [ 'status' => $status, 'error' => $error, 'timeMs' => $timeMs ];
334 }
335
339 private function getMaxRssKb() {
340 $info = wfGetRusage() ?: [];
341 // see http://linux.die.net/man/2/getrusage
342 return isset( $info['ru_maxrss'] ) ? (int)$info['ru_maxrss'] : null;
343 }
344
350 private function getBackoffTimeToWait( Job $job ) {
352
353 if ( !isset( $wgJobBackoffThrottling[$job->getType()] ) ||
354 $job instanceof DuplicateJob // no work was done
355 ) {
356 return 0; // not throttled
357 }
358
359 $itemsPerSecond = $wgJobBackoffThrottling[$job->getType()];
360 if ( $itemsPerSecond <= 0 ) {
361 return 0; // not throttled
362 }
363
364 $seconds = 0;
365 if ( $job->workItemCount() > 0 ) {
366 $exactSeconds = $job->workItemCount() / $itemsPerSecond;
367 // use randomized rounding
368 $seconds = floor( $exactSeconds );
369 $remainder = $exactSeconds - $seconds;
370 $seconds += ( mt_rand() / mt_getrandmax() < $remainder ) ? 1 : 0;
371 }
372
373 return (int)$seconds;
374 }
375
384 private function loadBackoffs( array $backoffs, $mode = 'wait' ) {
385 $file = wfTempDir() . '/mw-runJobs-backoffs.json';
386 if ( is_file( $file ) ) {
387 $noblock = ( $mode === 'nowait' ) ? LOCK_NB : 0;
388 $handle = fopen( $file, 'rb' );
389 if ( !flock( $handle, LOCK_SH | $noblock ) ) {
390 fclose( $handle );
391 return $backoffs; // don't wait on lock
392 }
393 $content = stream_get_contents( $handle );
394 flock( $handle, LOCK_UN );
395 fclose( $handle );
396 $ctime = microtime( true );
397 $cBackoffs = json_decode( $content, true ) ?: [];
398 foreach ( $cBackoffs as $type => $timestamp ) {
399 if ( $timestamp < $ctime ) {
400 unset( $cBackoffs[$type] );
401 }
402 }
403 } else {
404 $cBackoffs = [];
405 }
406
407 return $cBackoffs;
408 }
409
421 private function syncBackoffDeltas( array $backoffs, array &$deltas, $mode = 'wait' ) {
422 if ( !$deltas ) {
423 return $this->loadBackoffs( $backoffs, $mode );
424 }
425
426 $noblock = ( $mode === 'nowait' ) ? LOCK_NB : 0;
427 $file = wfTempDir() . '/mw-runJobs-backoffs.json';
428 $handle = fopen( $file, 'wb+' );
429 if ( !flock( $handle, LOCK_EX | $noblock ) ) {
430 fclose( $handle );
431 return $backoffs; // don't wait on lock
432 }
433 $ctime = microtime( true );
434 $content = stream_get_contents( $handle );
435 $cBackoffs = json_decode( $content, true ) ?: [];
436 foreach ( $deltas as $type => $seconds ) {
437 $cBackoffs[$type] = isset( $cBackoffs[$type] ) && $cBackoffs[$type] >= $ctime
438 ? $cBackoffs[$type] + $seconds
439 : $ctime + $seconds;
440 }
441 foreach ( $cBackoffs as $type => $timestamp ) {
442 if ( $timestamp < $ctime ) {
443 unset( $cBackoffs[$type] );
444 }
445 }
446 ftruncate( $handle, 0 );
447 fwrite( $handle, json_encode( $cBackoffs ) );
448 flock( $handle, LOCK_UN );
449 fclose( $handle );
450
451 $deltas = [];
452
453 return $cBackoffs;
454 }
455
461 private function checkMemoryOK() {
462 static $maxBytes = null;
463 if ( $maxBytes === null ) {
464 $m = [];
465 if ( preg_match( '!^(\d+)(k|m|g|)$!i', ini_get( 'memory_limit' ), $m ) ) {
466 list( , $num, $unit ) = $m;
467 $conv = [ 'g' => 1073741824, 'm' => 1048576, 'k' => 1024, '' => 1 ];
468 $maxBytes = $num * $conv[strtolower( $unit )];
469 } else {
470 $maxBytes = 0;
471 }
472 }
473 $usedBytes = memory_get_usage();
474 if ( $maxBytes && $usedBytes >= 0.95 * $maxBytes ) {
475 $msg = "Detected excessive memory usage ($usedBytes/$maxBytes).";
476 $this->debugCallback( $msg );
477 $this->logger->error( $msg );
478
479 return false;
480 }
481
482 return true;
483 }
484
489 private function debugCallback( $msg ) {
490 if ( $this->debug ) {
491 call_user_func_array( $this->debug, [ wfTimestamp( TS_DB ) . " $msg\n" ] );
492 }
493 }
494
506 private function commitMasterChanges( LBFactory $lbFactory, Job $job, $fnameTrxOwner ) {
508
509 $time = false;
510 $lb = $lbFactory->getMainLB( wfWikiID() );
511 if ( $wgJobSerialCommitThreshold !== false && $lb->getServerCount() > 1 ) {
512 // Generally, there is one master connection to the local DB
513 $dbwSerial = $lb->getAnyOpenConnection( $lb->getWriterIndex() );
514 // We need natively blocking fast locks
515 if ( $dbwSerial && $dbwSerial->namedLocksEnqueue() ) {
516 $time = $dbwSerial->pendingWriteQueryDuration( $dbwSerial::ESTIMATE_DB_APPLY );
518 $dbwSerial = false;
519 }
520 } else {
521 $dbwSerial = false;
522 }
523 } else {
524 // There are no replica DBs or writes are all to foreign DB (we don't handle that)
525 $dbwSerial = false;
526 }
527
528 if ( !$dbwSerial ) {
529 $lbFactory->commitMasterChanges( $fnameTrxOwner );
530 return;
531 }
532
533 $ms = intval( 1000 * $time );
534 $msg = $job->toString() . " COMMIT ENQUEUED [{$ms}ms of writes]";
535 $this->logger->info( $msg );
536 $this->debugCallback( $msg );
537
538 // Wait for an exclusive lock to commit
539 if ( !$dbwSerial->lock( 'jobrunner-serial-commit', __METHOD__, 30 ) ) {
540 // This will trigger a rollback in the main loop
541 throw new DBError( $dbwSerial, "Timed out waiting on commit queue." );
542 }
543 $unlocker = new ScopedCallback( function () use ( $dbwSerial ) {
544 $dbwSerial->unlock( 'jobrunner-serial-commit', __METHOD__ );
545 } );
546
547 // Wait for the replica DBs to catch up
548 $pos = $lb->getMasterPos();
549 if ( $pos ) {
550 $lb->waitForAll( $pos );
551 }
552
553 // Actually commit the DB master changes
554 $lbFactory->commitMasterChanges( $fnameTrxOwner );
555 ScopedCallback::consume( $unlocker );
556 }
557}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgTrxProfilerLimits
Performance expectations for DB usage.
float[] $wgJobBackoffThrottling
Map of job types to how many job "work items" should be run per second on each job runner process.
$wgJobClasses
Maps jobs to their handling classes; extensions can add to this to provide custom jobs.
float bool $wgJobSerialCommitThreshold
Make job runners commit changes for replica DB-lag prone jobs one job at a time.
wfTempDir()
Tries to get the system directory for temporary files.
wfReadOnly()
Check whether the wiki is in read-only mode.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfWikiID()
Get an ASCII string identifying this wiki This is used as a prefix in memcached keys.
wfGetRusage()
Get system resource usage of current request context.
Database error base class.
Definition DBError.php:26
Exception class for replica DB wait timeouts.
No-op job that does nothing.
static singleton( $wiki=false)
Job queue runner utility methods.
Definition JobRunner.php:37
const LAG_CHECK_PERIOD
Definition JobRunner.php:47
setDebugHandler( $debug)
Definition JobRunner.php:53
run(array $options)
Run jobs of the specified number/type for the specified time.
Definition JobRunner.php:99
const MAX_ALLOWED_LAG
Definition JobRunner.php:46
__construct(LoggerInterface $logger=null)
Definition JobRunner.php:68
callable null $debug
Debug output handler.
Definition JobRunner.php:39
setLogger(LoggerInterface $logger)
Definition JobRunner.php:61
syncBackoffDeltas(array $backoffs, array &$deltas, $mode='wait')
Merge the current backoff expiries from persistent storage.
executeJob(Job $job, LBFactory $lbFactory, $stats, $popTime)
debugCallback( $msg)
Log the job message.
commitMasterChanges(LBFactory $lbFactory, Job $job, $fnameTrxOwner)
Issue a commit on all masters who are currently in a transaction and have made changes to the databas...
getBackoffTimeToWait(Job $job)
checkMemoryOK()
Make sure that this script is not too close to the memory usage limit.
const ERROR_BACKOFF_TTL
Definition JobRunner.php:48
loadBackoffs(array $backoffs, $mode='wait')
Get the previous backoff expiries from persistent storage On I/O or lock acquisition failure this ret...
Class to both describe a background job and handle jobs.
Definition Job.php:31
An interface for generating database load balancers.
Definition LBFactory.php:31
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.
static instance()
Singleton.
Definition Profiler.php:61
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
$lbFactory
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition hooks.txt:1049
the array() calling protocol came about after MediaWiki 1.4rc1.
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2568
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition hooks.txt:1752
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1096
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition hooks.txt:1094
this hook is for auditing only $response
Definition hooks.txt:805
processing should stop and the error should be shown to the user * false
Definition hooks.txt:189
returning false will NOT prevent logging $e
Definition hooks.txt:2110
if( $limit) $timestamp
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
if(count( $args)< 1) $job
const TS_DB
MySQL DATETIME (YYYY-MM-DD HH:MM:SS)
Definition defines.php:16
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
Definition defines.php:6