Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 197 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
| FlowRemoveOldTopics | |
0.00% |
0 / 191 |
|
0.00% |
0 / 12 |
1122 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
| removeHeader | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
72 | |||
| removeTopics | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
6 | |||
| removeTopicsWithFlowUpdates | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
6 | |||
| removeWorkflows | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
| removeTopicList | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| removeSummary | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| sortSubtree | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
| removePosts | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
| removeReferences | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
| multiRemove | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Flow\Maintenance; |
| 4 | |
| 5 | use Flow\Container; |
| 6 | use Flow\Data\ManagerGroup; |
| 7 | use Flow\DbFactory; |
| 8 | use Flow\Exception\FlowException; |
| 9 | use Flow\Model\AbstractRevision; |
| 10 | use Flow\Model\Header; |
| 11 | use Flow\Model\PostRevision; |
| 12 | use Flow\Model\UUID; |
| 13 | use Flow\Model\Workflow; |
| 14 | use Flow\OccupationController; |
| 15 | use Flow\Repository\TreeRepository; |
| 16 | use MediaWiki\Maintenance\Maintenance; |
| 17 | use MediaWiki\MediaWikiServices; |
| 18 | use MediaWiki\WikiMap\WikiMap; |
| 19 | use Wikimedia\Rdbms\DBUnexpectedError; |
| 20 | |
| 21 | $IP = getenv( 'MW_INSTALL_PATH' ); |
| 22 | if ( $IP === false ) { |
| 23 | $IP = __DIR__ . '/../../..'; |
| 24 | } |
| 25 | |
| 26 | require_once "$IP/maintenance/Maintenance.php"; |
| 27 | |
| 28 | /** |
| 29 | * @ingroup Maintenance |
| 30 | */ |
| 31 | class FlowRemoveOldTopics extends Maintenance { |
| 32 | /** |
| 33 | * @var bool |
| 34 | */ |
| 35 | protected $dryRun = false; |
| 36 | |
| 37 | /** |
| 38 | * @var ManagerGroup |
| 39 | */ |
| 40 | protected $storage; |
| 41 | |
| 42 | /** |
| 43 | * @var TreeRepository |
| 44 | */ |
| 45 | protected $treeRepo; |
| 46 | |
| 47 | /** |
| 48 | * @var DbFactory |
| 49 | */ |
| 50 | protected $dbFactory; |
| 51 | |
| 52 | public function __construct() { |
| 53 | parent::__construct(); |
| 54 | |
| 55 | $this->addDescription( "Deletes old topics" ); |
| 56 | |
| 57 | $this->addOption( 'date', 'Date cutoff (in any format understood by wfTimestamp), topics ' . |
| 58 | 'older than this date will be deleted.', true, true ); |
| 59 | $this->addOption( 'dryrun', 'Simulate script run, without actually deleting anything' ); |
| 60 | |
| 61 | $this->setBatchSize( 10 ); |
| 62 | |
| 63 | $this->requireExtension( 'Flow' ); |
| 64 | } |
| 65 | |
| 66 | public function execute() { |
| 67 | $this->dryRun = $this->getOption( 'dryrun', false ); |
| 68 | $this->storage = Container::get( 'storage' ); |
| 69 | $this->treeRepo = Container::get( 'repository.tree' ); |
| 70 | $this->dbFactory = Container::get( 'db.factory' ); |
| 71 | |
| 72 | $timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) ); |
| 73 | |
| 74 | $this->removeHeader( $timestamp ); |
| 75 | // remove topics that are older than the given timestamp |
| 76 | $this->removeTopics( $timestamp ); |
| 77 | // remove topics that have more recent updates, but only from Flow talk |
| 78 | // page manager |
| 79 | $this->removeTopicsWithFlowUpdates( $timestamp ); |
| 80 | } |
| 81 | |
| 82 | protected function removeHeader( $timestamp ) { |
| 83 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
| 84 | $batchSize = $this->getBatchSize(); |
| 85 | |
| 86 | // we don't store a timestamp with revisions - the id also holds date |
| 87 | // info, so that's what we should compare against |
| 88 | $endId = UUID::getComparisonUUID( $timestamp ); |
| 89 | |
| 90 | // start from around unix epoch - there can be no Flow data before that |
| 91 | $startId = UUID::getComparisonUUID( '1' ); |
| 92 | do { |
| 93 | /** @var Header[] $revisions */ |
| 94 | $revisions = $this->storage->find( |
| 95 | 'Header', |
| 96 | [ |
| 97 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
| 98 | 'rev_type' => 'header', |
| 99 | $dbr->expr( 'rev_id', '>', $startId->getBinary() ), |
| 100 | $dbr->expr( 'rev_id', '<', $endId->getBinary() ), |
| 101 | // only fetch original post at this point: we still need to |
| 102 | // narrow down the results |
| 103 | 'rev_parent_id' => null, |
| 104 | ], |
| 105 | [ |
| 106 | 'limit' => $batchSize, |
| 107 | 'sort' => 'rev_id', |
| 108 | 'order' => 'ASC', |
| 109 | ] |
| 110 | ); |
| 111 | |
| 112 | if ( !$revisions ) { |
| 113 | break; |
| 114 | } |
| 115 | |
| 116 | // prepare for next batch, which will start at this |
| 117 | /** @var UUID $startId */ |
| 118 | $startId = end( $revisions )->getRevisionId(); |
| 119 | |
| 120 | // we've now found all first revisions prior to a certain date, but we |
| 121 | // don't want to remove those that have revisions after that date cutoff |
| 122 | // (we don't want to break history) |
| 123 | // let's see if any has revisions more recent than timestamp |
| 124 | $conds = []; |
| 125 | $uuids = []; |
| 126 | foreach ( $revisions as $revision ) { |
| 127 | // keep track of UUIDs we may want to delete |
| 128 | $uuids[$revision->getCollectionId()->getAlphadecimal()] = $revision->getCollectionId(); |
| 129 | |
| 130 | $conds[] = [ |
| 131 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
| 132 | 'rev_type' => 'header', |
| 133 | $dbr->expr( 'rev_id', '>=', $endId->getBinary() ), |
| 134 | 'rev_type_id' => $revision->getCollectionId()->getBinary(), |
| 135 | ]; |
| 136 | } |
| 137 | |
| 138 | /** @var Header[] $recent */ |
| 139 | $recent = $this->storage->findMulti( 'Header', $conds, [ 'limit' => 1 ] ); |
| 140 | |
| 141 | // now exclude collection ids where there's a revision that is more |
| 142 | // recent than the timestamp cutoff |
| 143 | foreach ( $recent as $revisions ) { |
| 144 | foreach ( $revisions as $revision ) { |
| 145 | unset( $uuids[$revision->getCollectionId()->getAlphadecimal()] ); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | // by now, there may be nothing left to remove, so move on to the |
| 150 | // next batch... |
| 151 | if ( !$uuids ) { |
| 152 | continue; |
| 153 | } |
| 154 | |
| 155 | $revisions = $this->storage->find( |
| 156 | 'Header', |
| 157 | [ |
| 158 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
| 159 | 'rev_type' => 'header', |
| 160 | 'rev_type_id' => UUID::convertUUIDs( $uuids ), |
| 161 | ] |
| 162 | ); |
| 163 | |
| 164 | $this->output( 'Removing ' . count( $revisions ) . ' header revisions from ' . |
| 165 | count( $uuids ) . ' headers (up to ' . $startId->getTimestamp() . ")\n" ); |
| 166 | |
| 167 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
| 168 | |
| 169 | foreach ( $revisions as $revision ) { |
| 170 | $this->removeReferences( $revision ); |
| 171 | } |
| 172 | |
| 173 | $this->multiRemove( $revisions ); |
| 174 | |
| 175 | if ( $this->dryRun ) { |
| 176 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
| 177 | } else { |
| 178 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
| 179 | $this->dbFactory->waitForReplicas(); |
| 180 | } |
| 181 | } while ( $revisions ); |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * @param string $timestamp Timestamp in TS_MW format |
| 186 | * @throws \Flow\Exception\FlowException |
| 187 | */ |
| 188 | protected function removeTopics( $timestamp ) { |
| 189 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
| 190 | $batchSize = $this->getBatchSize(); |
| 191 | |
| 192 | // start from around unix epoch - there can be no Flow data before that |
| 193 | $startId = UUID::getComparisonUUID( '1' ); |
| 194 | do { |
| 195 | $workflows = $this->storage->find( |
| 196 | 'Workflow', |
| 197 | [ |
| 198 | $dbr->expr( 'workflow_id', '>', $startId->getBinary() ), |
| 199 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
| 200 | 'workflow_type' => 'topic', |
| 201 | $dbr->expr( 'workflow_last_update_timestamp', '<', $dbr->timestamp( $timestamp ) ), |
| 202 | ], |
| 203 | [ |
| 204 | 'limit' => $batchSize, |
| 205 | 'sort' => 'workflow_id', |
| 206 | 'order' => 'ASC', |
| 207 | ] |
| 208 | ); |
| 209 | |
| 210 | if ( !$workflows ) { |
| 211 | break; |
| 212 | } |
| 213 | |
| 214 | // prepare for next batch |
| 215 | /** @var UUID $startId */ |
| 216 | $startId = end( $workflows )->getId(); |
| 217 | |
| 218 | $this->output( 'Removing ' . count( $workflows ) . |
| 219 | ' topic workflows (up to ' . $startId->getTimestamp() . ")\n" ); |
| 220 | $this->removeWorkflows( $workflows ); |
| 221 | } while ( $workflows ); |
| 222 | } |
| 223 | |
| 224 | /** |
| 225 | * @param string $timestamp Timestamp in TS_MW format |
| 226 | * @throws DBUnexpectedError |
| 227 | * @throws FlowException |
| 228 | */ |
| 229 | protected function removeTopicsWithFlowUpdates( $timestamp ) { |
| 230 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
| 231 | $batchSize = $this->getBatchSize(); |
| 232 | /** @var OccupationController $occupationController */ |
| 233 | $occupationController = MediaWikiServices::getInstance()->getService( 'FlowTalkpageManager' ); |
| 234 | $talkpageManager = $occupationController->getTalkpageManager(); |
| 235 | |
| 236 | // start from around unix epoch - there can be no Flow data before that |
| 237 | $batchStartId = UUID::getComparisonUUID( '1' ); |
| 238 | |
| 239 | // we only care about revisions since cutoff here |
| 240 | $cutoffStartId = UUID::getComparisonUUID( $timestamp ); |
| 241 | |
| 242 | do { |
| 243 | $workflowIds = $dbr->newSelectQueryBuilder() |
| 244 | ->select( 'workflow_id' ) |
| 245 | ->from( 'flow_workflow' ) |
| 246 | ->join( 'flow_tree_node', null, 'tree_ancestor_id = workflow_id' ) |
| 247 | ->join( 'flow_revision', null, 'rev_type_id = tree_descendant_id' ) |
| 248 | ->where( [ |
| 249 | // revisions more recent than cutoff time |
| 250 | $dbr->expr( 'rev_id', '>', $cutoffStartId->getBinary() ), |
| 251 | // workflow_id condition is only used to batch, the exact |
| 252 | // $batchStartId otherwise doesn't matter (unlike rev_id) |
| 253 | $dbr->expr( 'workflow_id', '>', $batchStartId->getBinary() ), |
| 254 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
| 255 | 'workflow_type' => 'topic', |
| 256 | $dbr->expr( 'workflow_last_update_timestamp', '>=', $dbr->timestamp( $timestamp ) ), |
| 257 | ] ) |
| 258 | ->limit( $batchSize ) |
| 259 | ->orderBy( 'workflow_id' ) |
| 260 | // we only want to find topics that were only altered by talk |
| 261 | // page manager: as long as anyone else edited any post, we're |
| 262 | // not interested in it |
| 263 | ->groupBy( 'workflow_id' ) |
| 264 | ->having( 'GROUP_CONCAT(DISTINCT rev_user_id) = ' . $talkpageManager->getId() ) |
| 265 | ->caller( __METHOD__ ) |
| 266 | ->fetchResultSet(); |
| 267 | |
| 268 | if ( !$workflowIds ) { |
| 269 | break; |
| 270 | } |
| 271 | |
| 272 | $workflows = $this->storage->getMulti( 'Workflow', $workflowIds ); |
| 273 | |
| 274 | // prepare for next batch |
| 275 | /** @var UUID $batchStartId */ |
| 276 | $batchStartId = end( $workflows )->getId(); |
| 277 | |
| 278 | $this->output( 'Removing ' . count( $workflows ) . ' topic workflows with recent ' . |
| 279 | 'Flow updates (up to ' . $batchStartId->getTimestamp() . ")\n" ); |
| 280 | $this->removeWorkflows( $workflows ); |
| 281 | } while ( $workflows ); |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * @param Workflow[] $workflows |
| 286 | * @throws DBUnexpectedError |
| 287 | */ |
| 288 | protected function removeWorkflows( array $workflows ) { |
| 289 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
| 290 | |
| 291 | foreach ( $workflows as $workflow ) { |
| 292 | $this->removeSummary( $workflow ); |
| 293 | $this->removePosts( $workflow ); |
| 294 | $this->removeTopicList( $workflow ); |
| 295 | } |
| 296 | |
| 297 | $this->multiRemove( $workflows ); |
| 298 | |
| 299 | if ( $this->dryRun ) { |
| 300 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
| 301 | } else { |
| 302 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
| 303 | $this->dbFactory->waitForReplicas(); |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | protected function removeTopicList( Workflow $workflow ) { |
| 308 | $entries = $this->storage->find( 'TopicListEntry', [ 'topic_id' => $workflow->getId() ] ); |
| 309 | if ( $entries ) { |
| 310 | $this->output( 'Removing ' . count( $entries ) . " topiclist entries.\n" ); |
| 311 | $this->multiRemove( $entries ); |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | protected function removeSummary( Workflow $workflow ) { |
| 316 | $revisions = $this->storage->find( 'PostSummary', [ 'rev_type_id' => $workflow->getId() ] ); |
| 317 | if ( $revisions ) { |
| 318 | foreach ( $revisions as $revision ) { |
| 319 | $this->removeReferences( $revision ); |
| 320 | } |
| 321 | |
| 322 | $this->output( 'Removing ' . count( $revisions ) . " summary revisions from 1 topic.\n" ); |
| 323 | $this->multiRemove( $revisions ); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | /** |
| 328 | * @param UUID $parentId |
| 329 | * @param array $subtree |
| 330 | * @return array |
| 331 | */ |
| 332 | protected function sortSubtree( UUID $parentId, array $subtree ) { |
| 333 | $flat = []; |
| 334 | |
| 335 | // first recursively process all children, so they come first in $flat |
| 336 | foreach ( $subtree['children'] as $id => $data ) { |
| 337 | $flat = array_merge( |
| 338 | $flat, |
| 339 | $this->sortSubtree( UUID::create( $id ), $data ) |
| 340 | ); |
| 341 | } |
| 342 | |
| 343 | // then add parent, which should come last in $flat |
| 344 | $flat[] = $parentId; |
| 345 | |
| 346 | return $flat; |
| 347 | } |
| 348 | |
| 349 | protected function removePosts( Workflow $workflow ) { |
| 350 | // fetch all children (posts) from a topic & reverse-sort all the posts: |
| 351 | // deepest-nested children should come first, parents last |
| 352 | $subtree = $this->treeRepo->fetchSubtree( $workflow->getId() ); |
| 353 | $uuids = $this->sortSubtree( $workflow->getId(), $subtree ); |
| 354 | |
| 355 | $conds = []; |
| 356 | foreach ( $uuids as $id ) { |
| 357 | $conds[] = [ 'rev_type_id' => $id ]; |
| 358 | } |
| 359 | |
| 360 | $posts = $this->storage->findMulti( 'PostRevision', $conds ); |
| 361 | $count = 0; |
| 362 | foreach ( $posts as $revisions ) { |
| 363 | /** @var PostRevision[] $revisions */ |
| 364 | foreach ( $revisions as $revision ) { |
| 365 | $this->removeReferences( $revision ); |
| 366 | } |
| 367 | |
| 368 | $count += count( $revisions ); |
| 369 | $this->multiRemove( $revisions ); |
| 370 | |
| 371 | foreach ( $revisions as $revision ) { |
| 372 | $this->treeRepo->delete( $revision->getCollectionId() ); |
| 373 | } |
| 374 | } |
| 375 | $this->output( 'Removing ' . $count . ' post revisions from ' . count( $posts ) . " posts.\n" ); |
| 376 | } |
| 377 | |
| 378 | protected function removeReferences( AbstractRevision $revision ) { |
| 379 | $wikiReferences = $this->storage->find( 'WikiReference', [ |
| 380 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
| 381 | 'ref_src_object_type' => $revision->getRevisionType(), |
| 382 | 'ref_src_object_id' => $revision->getCollectionId(), |
| 383 | ] ); |
| 384 | if ( $wikiReferences ) { |
| 385 | $this->output( 'Removing ' . count( $wikiReferences ) . " wiki references from 1 revision.\n" ); |
| 386 | $this->multiRemove( $wikiReferences ); |
| 387 | } |
| 388 | |
| 389 | $urlReferences = $this->storage->find( 'URLReference', [ |
| 390 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
| 391 | 'ref_src_object_type' => $revision->getRevisionType(), |
| 392 | 'ref_src_object_id' => $revision->getCollectionId(), |
| 393 | ] ); |
| 394 | if ( $urlReferences ) { |
| 395 | $this->output( 'Removing ' . count( $urlReferences ) . " url references from 1 revision.\n" ); |
| 396 | $this->multiRemove( $urlReferences ); |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | protected function multiRemove( array $objects ) { |
| 401 | $this->storage->multiRemove( $objects ); |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | $maintClass = FlowRemoveOldTopics::class; |
| 406 | require_once RUN_MAINTENANCE_IF_MAIN; |