Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 119 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
| PersistRevisionThreadItems | |
0.00% |
0 / 113 |
|
0.00% |
0 / 4 |
506 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 49 |
|
0.00% |
0 / 1 |
90 | |||
| process | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
90 | |||
| processRow | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\DiscussionTools\Maintenance; |
| 4 | |
| 5 | use MediaWiki\Exception\MWExceptionRenderer; |
| 6 | use MediaWiki\Extension\DiscussionTools\Hooks\HookUtils; |
| 7 | use MediaWiki\Extension\DiscussionTools\ThreadItemStore; |
| 8 | use MediaWiki\Language\Language; |
| 9 | use MediaWiki\Maintenance\Maintenance; |
| 10 | use MediaWiki\Revision\RevisionStore; |
| 11 | use MediaWiki\Shell\Shell; |
| 12 | use MediaWiki\Title\Title; |
| 13 | use stdClass; |
| 14 | use Throwable; |
| 15 | use Wikimedia\Rdbms\IReadableDatabase; |
| 16 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 17 | |
| 18 | $IP = getenv( 'MW_INSTALL_PATH' ); |
| 19 | if ( $IP === false ) { |
| 20 | $IP = __DIR__ . '/../../..'; |
| 21 | } |
| 22 | require_once "$IP/maintenance/Maintenance.php"; |
| 23 | |
| 24 | class PersistRevisionThreadItems extends Maintenance { |
| 25 | |
| 26 | private IReadableDatabase $dbr; |
| 27 | private ThreadItemStore $itemStore; |
| 28 | private RevisionStore $revStore; |
| 29 | private Language $lang; |
| 30 | |
| 31 | public function __construct() { |
| 32 | parent::__construct(); |
| 33 | $this->requireExtension( 'DiscussionTools' ); |
| 34 | $this->addDescription( 'Persist thread item information for the given pages/revisions' ); |
| 35 | $this->addOption( 'rev', 'Revision ID to process', false, true, false, true ); |
| 36 | $this->addOption( 'page', 'Page title to process', false, true, false, true ); |
| 37 | $this->addOption( 'namespace', 'Namespace number to process', false, true, false, true ); |
| 38 | $this->addOption( 'all', 'Process the whole wiki' ); |
| 39 | $this->addOption( 'current', 'Process current revisions only' ); |
| 40 | $this->addOption( 'start', 'Restart from this position (as printed by the script)', false, true ); |
| 41 | $this->addOption( 'touched-after', 'Only process pages touched after this timestamp', false, true ); |
| 42 | $this->addOption( 'touched-before', 'Only process pages touched before this timestamp', false, true ); |
| 43 | $this->setBatchSize( 100 ); |
| 44 | } |
| 45 | |
| 46 | public function execute() { |
| 47 | $services = $this->getServiceContainer(); |
| 48 | |
| 49 | $this->dbr = $dbr = $this->getDB( DB_REPLICA ); |
| 50 | $this->itemStore = $services->getService( 'DiscussionTools.ThreadItemStore' ); |
| 51 | $this->revStore = $services->getRevisionStore(); |
| 52 | $this->lang = $services->getLanguageFactory()->getLanguage( 'en' ); |
| 53 | |
| 54 | $qb = $dbr->newSelectQueryBuilder(); |
| 55 | |
| 56 | $qb->queryInfo( $this->revStore->getQueryInfo( [ 'page' ] ) ); |
| 57 | |
| 58 | if ( $this->getOption( 'all' ) ) { |
| 59 | // Do nothing |
| 60 | |
| 61 | } elseif ( $this->getOption( 'namespace' ) ) { |
| 62 | $qb->where( [ 'page_namespace' => $this->getOption( 'namespace' ) ] ); |
| 63 | |
| 64 | } elseif ( $this->getOption( 'page' ) ) { |
| 65 | $linkBatch = $services->getLinkBatchFactory()->newLinkBatch(); |
| 66 | foreach ( $this->getOption( 'page' ) as $page ) { |
| 67 | $linkBatch->addObj( Title::newFromText( $page ) ); |
| 68 | } |
| 69 | $pageIds = array_map( static function ( $page ) { |
| 70 | return $page->getId(); |
| 71 | }, $linkBatch->getPageIdentities() ); |
| 72 | |
| 73 | $qb->where( [ 'rev_page' => $pageIds ] ); |
| 74 | |
| 75 | } elseif ( $this->getOption( 'rev' ) ) { |
| 76 | $qb->where( [ 'rev_id' => $this->getOption( 'rev' ) ] ); |
| 77 | } else { |
| 78 | $this->error( "One of 'all', 'page', or 'rev' required" ); |
| 79 | $this->maybeHelp( true ); |
| 80 | return; |
| 81 | } |
| 82 | |
| 83 | // Add conditions from HookUtils::isAvailableForTitle(). |
| 84 | // Keep this in sync with that method. |
| 85 | $nsInfo = $services->getNamespaceInfo(); |
| 86 | $signatureNamespaces = array_values( array_filter( |
| 87 | $nsInfo->getValidNamespaces(), |
| 88 | [ $nsInfo, 'wantSignatures' ] |
| 89 | ) ); |
| 90 | $qb->leftJoin( 'page_props', null, [ |
| 91 | 'pp_propname' => 'newsectionlink', |
| 92 | 'pp_page = page_id', |
| 93 | ] ); |
| 94 | $qb->where( |
| 95 | $dbr->expr( 'page_namespace', '=', $signatureNamespaces ) |
| 96 | ->or( 'pp_propname', '!=', null ) |
| 97 | ); |
| 98 | |
| 99 | if ( $this->getOption( 'current' ) ) { |
| 100 | $qb->where( 'rev_id = page_latest' ); |
| 101 | $index = [ 'page_id' ]; |
| 102 | |
| 103 | if ( $this->getOption( 'touched-after' ) ) { |
| 104 | $qb->where( $dbr->expr( |
| 105 | 'page_touched', '>', $dbr->timestamp( $this->getOption( 'touched-after' ) ) |
| 106 | ) ); |
| 107 | } |
| 108 | if ( $this->getOption( 'touched-before' ) ) { |
| 109 | $qb->where( $dbr->expr( |
| 110 | 'page_touched', '<', $dbr->timestamp( $this->getOption( 'touched-before' ) ) |
| 111 | ) ); |
| 112 | } |
| 113 | |
| 114 | } else { |
| 115 | // Process in order by page and time to avoid confusing results while the script is running |
| 116 | $index = [ 'rev_page', 'rev_timestamp', 'rev_id' ]; |
| 117 | } |
| 118 | |
| 119 | $this->process( $qb, $index ); |
| 120 | } |
| 121 | |
| 122 | private function process( SelectQueryBuilder $qb, array $index ): void { |
| 123 | $start = microtime( true ); |
| 124 | |
| 125 | $qb->caller( __METHOD__ ); |
| 126 | |
| 127 | // estimateRowCount() refuses to work when fields are set, so we can't just call it on $qb |
| 128 | $countQueryInfo = $qb->getQueryInfo(); |
| 129 | $count = $qb->newSubquery() |
| 130 | ->rawTables( $countQueryInfo['tables'] ) |
| 131 | ->where( $countQueryInfo['conds'] ) |
| 132 | ->options( $countQueryInfo['options'] ) |
| 133 | ->joinConds( $countQueryInfo['join_conds'] ) |
| 134 | ->caller( __METHOD__ ) |
| 135 | ->estimateRowCount(); |
| 136 | $this->output( "Processing... (estimated $count rows)\n" ); |
| 137 | |
| 138 | $processed = 0; |
| 139 | $updated = 0; |
| 140 | |
| 141 | $qb->orderBy( $index ); |
| 142 | $batchSize = $this->getBatchSize(); |
| 143 | $qb->limit( $batchSize ); |
| 144 | |
| 145 | $batchStart = null; |
| 146 | if ( $this->getOption( 'start' ) ) { |
| 147 | $batchStart = json_decode( $this->getOption( 'start' ) ); |
| 148 | if ( !$batchStart ) { |
| 149 | $this->error( "Invalid 'start'" ); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | while ( true ) { |
| 154 | $qbForBatch = clone $qb; |
| 155 | if ( $batchStart ) { |
| 156 | $batchStartCond = $this->dbr->buildComparison( '>', array_combine( $index, $batchStart ) ); |
| 157 | $qbForBatch->where( $batchStartCond ); |
| 158 | |
| 159 | $batchStartOutput = Shell::escape( json_encode( $batchStart ) ); |
| 160 | $this->output( "--start $batchStartOutput\n" ); |
| 161 | } |
| 162 | |
| 163 | $res = $qbForBatch->fetchResultSet(); |
| 164 | foreach ( $res as $row ) { |
| 165 | $updated += (int)$this->processRow( $row ); |
| 166 | } |
| 167 | $processed += $res->numRows(); |
| 168 | |
| 169 | $this->output( "Processed $processed (updated $updated) of $count rows\n" ); |
| 170 | |
| 171 | $this->waitForReplication(); |
| 172 | |
| 173 | if ( $res->numRows() < $batchSize || !isset( $row ) ) { |
| 174 | // Done |
| 175 | break; |
| 176 | } |
| 177 | |
| 178 | // Update the conditions to select the next batch. |
| 179 | $batchStart = []; |
| 180 | foreach ( $index as $field ) { |
| 181 | $batchStart[] = $row->$field; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | $duration = microtime( true ) - $start; |
| 186 | $durationFormatted = $this->lang->formatTimePeriod( $duration ); |
| 187 | $this->output( "Finished in $durationFormatted\n" ); |
| 188 | } |
| 189 | |
| 190 | /** |
| 191 | * @param stdClass $row Database table row |
| 192 | */ |
| 193 | private function processRow( stdClass $row ): bool { |
| 194 | $changed = false; |
| 195 | try { |
| 196 | $rev = $this->revStore->newRevisionFromRow( $row ); |
| 197 | $title = Title::newFromPageIdentity( $rev->getPage() ); |
| 198 | if ( HookUtils::isAvailableForTitle( $title ) ) { |
| 199 | $threadItemSet = HookUtils::parseRevisionParsoidHtml( $rev, false )->getValueOrThrow(); |
| 200 | |
| 201 | // Store permalink data (even when store is disabled - T334258) |
| 202 | $changed = $this->itemStore->insertThreadItems( $rev, $threadItemSet ); |
| 203 | } |
| 204 | } catch ( Throwable $e ) { |
| 205 | $this->output( "Error while processing revid=$row->rev_id, pageid=$row->rev_page\n" ); |
| 206 | MWExceptionRenderer::output( $e, MWExceptionRenderer::AS_RAW ); |
| 207 | } |
| 208 | return $changed; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | $maintClass = PersistRevisionThreadItems::class; |
| 213 | require_once RUN_MAINTENANCE_IF_MAIN; |