Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 197
0.00% covered (danger)
0.00%
0 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
FlowRemoveOldTopics
0.00% covered (danger)
0.00%
0 / 191
0.00% covered (danger)
0.00%
0 / 12
1122
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 removeHeader
0.00% covered (danger)
0.00%
0 / 57
0.00% covered (danger)
0.00%
0 / 1
72
 removeTopics
0.00% covered (danger)
0.00%
0 / 24
0.00% covered (danger)
0.00%
0 / 1
6
 removeTopicsWithFlowUpdates
0.00% covered (danger)
0.00%
0 / 35
0.00% covered (danger)
0.00%
0 / 1
6
 removeWorkflows
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
12
 removeTopicList
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 removeSummary
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
12
 sortSubtree
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
6
 removePosts
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
30
 removeReferences
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
12
 multiRemove
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Flow\Maintenance;
4
5use Flow\Container;
6use Flow\Data\ManagerGroup;
7use Flow\DbFactory;
8use Flow\Exception\FlowException;
9use Flow\Model\AbstractRevision;
10use Flow\Model\Header;
11use Flow\Model\PostRevision;
12use Flow\Model\UUID;
13use Flow\Model\Workflow;
14use Flow\OccupationController;
15use Flow\Repository\TreeRepository;
16use Maintenance;
17use MediaWiki\MediaWikiServices;
18use MediaWiki\WikiMap\WikiMap;
19use Wikimedia\Rdbms\DBUnexpectedError;
20
21$IP = getenv( 'MW_INSTALL_PATH' );
22if ( $IP === false ) {
23    $IP = __DIR__ . '/../../..';
24}
25
26require_once "$IP/maintenance/Maintenance.php";
27
28/**
29 * @ingroup Maintenance
30 */
31class FlowRemoveOldTopics extends Maintenance {
32    /**
33     * @var bool
34     */
35    protected $dryRun = false;
36
37    /**
38     * @var ManagerGroup
39     */
40    protected $storage;
41
42    /**
43     * @var TreeRepository
44     */
45    protected $treeRepo;
46
47    /**
48     * @var DbFactory
49     */
50    protected $dbFactory;
51
52    public function __construct() {
53        parent::__construct();
54
55        $this->addDescription( "Deletes old topics" );
56
57        $this->addOption( 'date', 'Date cutoff (in any format understood by wfTimestamp), topics ' .
58            'older than this date will be deleted.', true, true );
59        $this->addOption( 'dryrun', 'Simulate script run, without actually deleting anything' );
60
61        $this->setBatchSize( 10 );
62
63        $this->requireExtension( 'Flow' );
64    }
65
66    public function execute() {
67        $this->dryRun = $this->getOption( 'dryrun', false );
68        $this->storage = Container::get( 'storage' );
69        $this->treeRepo = Container::get( 'repository.tree' );
70        $this->dbFactory = Container::get( 'db.factory' );
71
72        $timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) );
73
74        $this->removeHeader( $timestamp );
75        // remove topics that are older than the given timestamp
76        $this->removeTopics( $timestamp );
77        // remove topics that have more recent updates, but only from Flow talk
78        // page manager
79        $this->removeTopicsWithFlowUpdates( $timestamp );
80    }
81
82    protected function removeHeader( $timestamp ) {
83        $dbr = $this->dbFactory->getDB( DB_REPLICA );
84        $batchSize = $this->getBatchSize();
85
86        // we don't store a timestamp with revisions - the id also holds date
87        // info, so that's what we should compare against
88        $endId = UUID::getComparisonUUID( $timestamp );
89
90        // start from around unix epoch - there can be no Flow data before that
91        $startId = UUID::getComparisonUUID( '1' );
92        do {
93            /** @var Header[] $revisions */
94            $revisions = $this->storage->find(
95                'Header',
96                [
97                    'rev_user_wiki' => WikiMap::getCurrentWikiId(),
98                    'rev_type' => 'header',
99                    $dbr->expr( 'rev_id', '>', $startId->getBinary() ),
100                    $dbr->expr( 'rev_id', '<', $endId->getBinary() ),
101                    // only fetch original post at this point: we still need to
102                    // narrow down the results
103                    'rev_parent_id' => null,
104                ],
105                [
106                    'limit' => $batchSize,
107                    'sort' => 'rev_id',
108                    'order' => 'ASC',
109                ]
110            );
111
112            if ( !$revisions ) {
113                break;
114            }
115
116            // prepare for next batch, which will start at this
117            /** @var UUID $startId */
118            $startId = end( $revisions )->getRevisionId();
119
120            // we've now found all first revisions prior to a certain date, but we
121            // don't want to remove those that have revisions after that date cutoff
122            // (we don't want to break history)
123            // let's see if any has revisions more recent than timestamp
124            $conds = [];
125            $uuids = [];
126            foreach ( $revisions as $revision ) {
127                // keep track of UUIDs we may want to delete
128                $uuids[$revision->getCollectionId()->getAlphadecimal()] = $revision->getCollectionId();
129
130                $conds[] = [
131                    'rev_user_wiki' => WikiMap::getCurrentWikiId(),
132                    'rev_type' => 'header',
133                    $dbr->expr( 'rev_id', '>=', $endId->getBinary() ),
134                    'rev_type_id' => $revision->getCollectionId()->getBinary(),
135                ];
136            }
137
138            /** @var Header[] $recent */
139            $recent = $this->storage->findMulti( 'Header', $conds, [ 'limit' => 1 ] );
140
141            // now exclude collection ids where there's a revision that is more
142            // recent than the timestamp cutoff
143            foreach ( $recent as $revisions ) {
144                foreach ( $revisions as $revision ) {
145                    unset( $uuids[$revision->getCollectionId()->getAlphadecimal()] );
146                }
147            }
148
149            // by now, there may be nothing left to remove, so move on to the
150            // next batch...
151            if ( !$uuids ) {
152                continue;
153            }
154
155            $revisions = $this->storage->find(
156                'Header',
157                [
158                    'rev_user_wiki' => WikiMap::getCurrentWikiId(),
159                    'rev_type' => 'header',
160                    'rev_type_id' => UUID::convertUUIDs( $uuids ),
161                ]
162            );
163
164            $this->output( 'Removing ' . count( $revisions ) . ' header revisions from ' .
165                count( $uuids ) . ' headers (up to ' . $startId->getTimestamp() . ")\n" );
166
167            $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ );
168
169            foreach ( $revisions as $revision ) {
170                $this->removeReferences( $revision );
171            }
172
173            $this->multiRemove( $revisions );
174
175            if ( $this->dryRun ) {
176                $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ );
177            } else {
178                $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ );
179                $this->dbFactory->waitForReplicas();
180            }
181        } while ( $revisions );
182    }
183
184    /**
185     * @param string $timestamp Timestamp in TS_MW format
186     * @throws \Flow\Exception\FlowException
187     */
188    protected function removeTopics( $timestamp ) {
189        $dbr = $this->dbFactory->getDB( DB_REPLICA );
190        $batchSize = $this->getBatchSize();
191
192        // start from around unix epoch - there can be no Flow data before that
193        $startId = UUID::getComparisonUUID( '1' );
194        do {
195            $workflows = $this->storage->find(
196                'Workflow',
197                [
198                    $dbr->expr( 'workflow_id', '>', $startId->getBinary() ),
199                    'workflow_wiki' => WikiMap::getCurrentWikiId(),
200                    'workflow_type' => 'topic',
201                    $dbr->expr( 'workflow_last_update_timestamp', '<', $dbr->timestamp( $timestamp ) ),
202                ],
203                [
204                    'limit' => $batchSize,
205                    'sort' => 'workflow_id',
206                    'order' => 'ASC',
207                ]
208            );
209
210            if ( !$workflows ) {
211                break;
212            }
213
214            // prepare for next batch
215            /** @var UUID $startId */
216            $startId = end( $workflows )->getId();
217
218            $this->output( 'Removing ' . count( $workflows ) .
219                ' topic workflows (up to ' . $startId->getTimestamp() . ")\n" );
220            $this->removeWorkflows( $workflows );
221        } while ( $workflows );
222    }
223
224    /**
225     * @param string $timestamp Timestamp in TS_MW format
226     * @throws DBUnexpectedError
227     * @throws FlowException
228     */
229    protected function removeTopicsWithFlowUpdates( $timestamp ) {
230        $dbr = $this->dbFactory->getDB( DB_REPLICA );
231        $batchSize = $this->getBatchSize();
232        /** @var OccupationController $occupationController */
233        $occupationController = MediaWikiServices::getInstance()->getService( 'FlowTalkpageManager' );
234        $talkpageManager = $occupationController->getTalkpageManager();
235
236        // start from around unix epoch - there can be no Flow data before that
237        $batchStartId = UUID::getComparisonUUID( '1' );
238
239        // we only care about revisions since cutoff here
240        $cutoffStartId = UUID::getComparisonUUID( $timestamp );
241
242        do {
243            $workflowIds = $dbr->newSelectQueryBuilder()
244                ->select( 'workflow_id' )
245                ->from( 'flow_workflow' )
246                ->join( 'flow_tree_node', null, 'tree_ancestor_id = workflow_id' )
247                ->join( 'flow_revision', null, 'rev_type_id = tree_descendant_id' )
248                ->where( [
249                    // revisions more recent than cutoff time
250                    $dbr->expr( 'rev_id', '>', $cutoffStartId->getBinary() ),
251                    // workflow_id condition is only used to batch, the exact
252                    // $batchStartId otherwise doesn't matter (unlike rev_id)
253                    $dbr->expr( 'workflow_id', '>', $batchStartId->getBinary() ),
254                    'workflow_wiki' => WikiMap::getCurrentWikiId(),
255                    'workflow_type' => 'topic',
256                    $dbr->expr( 'workflow_last_update_timestamp', '>=', $dbr->timestamp( $timestamp ) ),
257                ] )
258                ->limit( $batchSize )
259                ->orderBy( 'workflow_id' )
260                // we only want to find topics that were only altered by talk
261                // page manager: as long as anyone else edited any post, we're
262                // not interested in it
263                ->groupBy( 'workflow_id' )
264                ->having( 'GROUP_CONCAT(DISTINCT rev_user_id) = ' . $talkpageManager->getId() )
265                ->caller( __METHOD__ )
266                ->fetchResultSet();
267
268            if ( !$workflowIds ) {
269                break;
270            }
271
272            $workflows = $this->storage->getMulti( 'Workflow', $workflowIds );
273
274            // prepare for next batch
275            /** @var UUID $batchStartId */
276            $batchStartId = end( $workflows )->getId();
277
278            $this->output( 'Removing ' . count( $workflows ) . ' topic workflows with recent ' .
279                'Flow updates (up to ' . $batchStartId->getTimestamp() . ")\n" );
280            $this->removeWorkflows( $workflows );
281        } while ( $workflows );
282    }
283
284    /**
285     * @param Workflow[] $workflows
286     * @throws DBUnexpectedError
287     */
288    protected function removeWorkflows( array $workflows ) {
289        $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ );
290
291        foreach ( $workflows as $workflow ) {
292            $this->removeSummary( $workflow );
293            $this->removePosts( $workflow );
294            $this->removeTopicList( $workflow );
295        }
296
297        $this->multiRemove( $workflows );
298
299        if ( $this->dryRun ) {
300            $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ );
301        } else {
302            $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ );
303            $this->dbFactory->waitForReplicas();
304        }
305    }
306
307    protected function removeTopicList( Workflow $workflow ) {
308        $entries = $this->storage->find( 'TopicListEntry', [ 'topic_id' => $workflow->getId() ] );
309        if ( $entries ) {
310            $this->output( 'Removing ' . count( $entries ) . " topiclist entries.\n" );
311            $this->multiRemove( $entries );
312        }
313    }
314
315    protected function removeSummary( Workflow $workflow ) {
316        $revisions = $this->storage->find( 'PostSummary', [ 'rev_type_id' => $workflow->getId() ] );
317        if ( $revisions ) {
318            foreach ( $revisions as $revision ) {
319                $this->removeReferences( $revision );
320            }
321
322            $this->output( 'Removing ' . count( $revisions ) . " summary revisions from 1 topic.\n" );
323            $this->multiRemove( $revisions );
324        }
325    }
326
327    /**
328     * @param UUID $parentId
329     * @param array $subtree
330     * @return array
331     */
332    protected function sortSubtree( UUID $parentId, array $subtree ) {
333        $flat = [];
334
335        // first recursively process all children, so they come first in $flat
336        foreach ( $subtree['children'] as $id => $data ) {
337            $flat = array_merge(
338                $flat,
339                $this->sortSubtree( UUID::create( $id ), $data )
340            );
341        }
342
343        // then add parent, which should come last in $flat
344        $flat[] = $parentId;
345
346        return $flat;
347    }
348
349    protected function removePosts( Workflow $workflow ) {
350        // fetch all children (posts) from a topic & reverse-sort all the posts:
351        // deepest-nested children should come first, parents last
352        $subtree = $this->treeRepo->fetchSubtree( $workflow->getId() );
353        $uuids = $this->sortSubtree( $workflow->getId(), $subtree );
354
355        $conds = [];
356        foreach ( $uuids as $id ) {
357            $conds[] = [ 'rev_type_id' => $id ];
358        }
359
360        $posts = $this->storage->findMulti( 'PostRevision', $conds );
361        $count = 0;
362        foreach ( $posts as $revisions ) {
363            /** @var PostRevision[] $revisions */
364            foreach ( $revisions as $revision ) {
365                $this->removeReferences( $revision );
366            }
367
368            $count += count( $revisions );
369            $this->multiRemove( $revisions );
370
371            foreach ( $revisions as $revision ) {
372                $this->treeRepo->delete( $revision->getCollectionId() );
373            }
374        }
375        $this->output( 'Removing ' . $count . ' post revisions from ' . count( $posts ) . " posts.\n" );
376    }
377
378    protected function removeReferences( AbstractRevision $revision ) {
379        $wikiReferences = $this->storage->find( 'WikiReference', [
380            'ref_src_wiki' => WikiMap::getCurrentWikiId(),
381            'ref_src_object_type' => $revision->getRevisionType(),
382            'ref_src_object_id' => $revision->getCollectionId(),
383        ] );
384        if ( $wikiReferences ) {
385            $this->output( 'Removing ' . count( $wikiReferences ) . " wiki references from 1 revision.\n" );
386            $this->multiRemove( $wikiReferences );
387        }
388
389        $urlReferences = $this->storage->find( 'URLReference', [
390            'ref_src_wiki' => WikiMap::getCurrentWikiId(),
391            'ref_src_object_type' => $revision->getRevisionType(),
392            'ref_src_object_id' => $revision->getCollectionId(),
393        ] );
394        if ( $urlReferences ) {
395            $this->output( 'Removing ' . count( $urlReferences ) . " url references from 1 revision.\n" );
396            $this->multiRemove( $urlReferences );
397        }
398    }
399
400    protected function multiRemove( array $objects ) {
401        $this->storage->multiRemove( $objects );
402    }
403}
404
405$maintClass = FlowRemoveOldTopics::class;
406require_once RUN_MAINTENANCE_IF_MAIN;