Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 80 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| InitImageData | |
0.00% |
0 / 74 |
|
0.00% |
0 / 3 |
110 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 49 |
|
0.00% |
0 / 1 |
42 | |||
| waitForMaxPressure | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | |
| 3 | $IP = getenv( 'MW_INSTALL_PATH' ); |
| 4 | if ( $IP === false ) { |
| 5 | $IP = __DIR__ . '/../../..'; |
| 6 | } |
| 7 | require_once "$IP/maintenance/Maintenance.php"; |
| 8 | |
| 9 | use MediaWiki\Deferred\LinksUpdate\ImageLinksTable; |
| 10 | use MediaWiki\JobQueue\JobQueueGroup; |
| 11 | use MediaWiki\Maintenance\Maintenance; |
| 12 | use PageImages\Job\InitImageDataJob; |
| 13 | |
| 14 | /** |
| 15 | * @license WTFPL |
| 16 | * @author Max Semenik |
| 17 | */ |
| 18 | class InitImageData extends Maintenance { |
| 19 | |
| 20 | public function __construct() { |
| 21 | parent::__construct(); |
| 22 | $this->addDescription( 'Initializes PageImages data' ); |
| 23 | $this->addOption( 'namespaces', |
| 24 | 'Comma-separated list of namespace(s) to refresh', false, true ); |
| 25 | $this->addOption( 'earlier-than', |
| 26 | 'Run only on pages touched earlier than this timestamp', false, true ); |
| 27 | $this->addOption( 'later-than', |
| 28 | 'Run only on pages touched later than this timestamp', false, true ); |
| 29 | $this->addOption( 'start', 'Starting page ID', false, true ); |
| 30 | $this->addOption( 'queue-pressure', 'Maximum number of jobs to enqueue at a time. ' . |
| 31 | 'If not provided or 0 will be run in-process.', false, true ); |
| 32 | $this->addOption( 'quiet', "Don't report on job queue pressure" ); |
| 33 | $this->setBatchSize( 100 ); |
| 34 | |
| 35 | $this->requireExtension( 'PageImages' ); |
| 36 | } |
| 37 | |
| 38 | /** |
| 39 | * Do the actual work of filling out page images |
| 40 | */ |
| 41 | public function execute() { |
| 42 | $lastId = $this->getOption( 'start', 0 ); |
| 43 | $isQuiet = $this->getOption( 'quiet', false ); |
| 44 | $queue = null; |
| 45 | $maxPressure = $this->getOption( 'queue-pressure', 0 ); |
| 46 | if ( $maxPressure > 0 ) { |
| 47 | $queue = $this->getServiceContainer()->getJobQueueGroup(); |
| 48 | } |
| 49 | |
| 50 | do { |
| 51 | $dbr = $this->getServiceContainer() |
| 52 | ->getConnectionProvider() |
| 53 | ->getReplicaDatabase( ImageLinksTable::VIRTUAL_DOMAIN ); |
| 54 | $queryBuilder = $dbr->newSelectQueryBuilder() |
| 55 | ->select( 'page_id' ) |
| 56 | ->from( 'page' ) |
| 57 | ->leftJoin( 'imagelinks', null, 'page_id = il_from' ) |
| 58 | ->where( [ |
| 59 | $dbr->expr( 'page_id', '>', (int)$lastId ), |
| 60 | $dbr->expr( 'il_from', '!=', null ), |
| 61 | 'page_is_redirect' => 0, |
| 62 | ] ) |
| 63 | ->orderBy( 'page_id' ) |
| 64 | ->groupBy( 'page_id' ) |
| 65 | ->limit( $this->mBatchSize ) |
| 66 | ->caller( __METHOD__ ); |
| 67 | if ( $this->hasOption( 'namespaces' ) ) { |
| 68 | $ns = explode( ',', $this->getOption( 'namespaces' ) ); |
| 69 | $queryBuilder->andWhere( [ 'page_namespace' => $ns ] ); |
| 70 | } else { |
| 71 | $queryBuilder->andWhere( [ |
| 72 | 'page_namespace' => $this->getServiceContainer()->getMainConfig()->get( 'PageImagesNamespaces' ) |
| 73 | ] ); |
| 74 | } |
| 75 | if ( $this->hasOption( 'earlier-than' ) ) { |
| 76 | $queryBuilder->andWhere( |
| 77 | $dbr->expr( 'page_touched', '<', $dbr->timestamp( $this->getOption( 'earlier-than' ) ) ) |
| 78 | ); |
| 79 | } |
| 80 | if ( $this->hasOption( 'later-than' ) ) { |
| 81 | $queryBuilder->andWhere( |
| 82 | $dbr->expr( 'page_touched', '>', $dbr->timestamp( $this->getOption( 'later-than' ) ) ) |
| 83 | ); |
| 84 | } |
| 85 | $pageIds = $queryBuilder->fetchFieldValues(); |
| 86 | $job = new InitImageDataJob( |
| 87 | [ 'page_ids' => $pageIds ], |
| 88 | $this->getServiceContainer()->getDBLoadBalancerFactory() |
| 89 | ); |
| 90 | if ( $queue === null ) { |
| 91 | $job->run(); |
| 92 | } else { |
| 93 | $queue->push( $job ); |
| 94 | $this->waitForMaxPressure( $queue, $maxPressure, $isQuiet ); |
| 95 | } |
| 96 | $lastId = end( $pageIds ); |
| 97 | $this->output( "$lastId\n" ); |
| 98 | } while ( $pageIds ); |
| 99 | $this->output( "done\n" ); |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * @param JobQueueGroup $queue The job queue to fetch pressure from |
| 104 | * @param int $maxPressure The maximum number of queued + active |
| 105 | * jobs that can exist when returning |
| 106 | * @param bool $isQuiet When false report on job queue pressure every 10s |
| 107 | */ |
| 108 | private function waitForMaxPressure( JobQueueGroup $queue, $maxPressure, $isQuiet ): void { |
| 109 | $group = $queue->get( 'InitImageDataJob' ); |
| 110 | $i = 0; |
| 111 | do { |
| 112 | sleep( 1 ); |
| 113 | $queued = $group->getSize(); |
| 114 | $running = $group->getAcquiredCount(); |
| 115 | $abandoned = $group->getAbandonedCount(); |
| 116 | |
| 117 | if ( !$isQuiet && ++$i % 10 === 0 ) { |
| 118 | $now = date( 'Y-m-d H:i:s T' ); |
| 119 | $this->output( "[$now] Queued: $queued Running: $running " . |
| 120 | "Abandoned: $abandoned Max: $maxPressure\n" ); |
| 121 | } |
| 122 | } while ( $queued + $running - $abandoned >= $maxPressure ); |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | $maintClass = InitImageData::class; |
| 127 | require_once RUN_MAINTENANCE_IF_MAIN; |