Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 197 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
FlowRemoveOldTopics | |
0.00% |
0 / 191 |
|
0.00% |
0 / 12 |
1122 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
removeHeader | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
72 | |||
removeTopics | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
6 | |||
removeTopicsWithFlowUpdates | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
6 | |||
removeWorkflows | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
removeTopicList | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
removeSummary | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
sortSubtree | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
removePosts | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
removeReferences | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
multiRemove | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Flow\Maintenance; |
4 | |
5 | use Flow\Container; |
6 | use Flow\Data\ManagerGroup; |
7 | use Flow\DbFactory; |
8 | use Flow\Exception\FlowException; |
9 | use Flow\Model\AbstractRevision; |
10 | use Flow\Model\Header; |
11 | use Flow\Model\PostRevision; |
12 | use Flow\Model\UUID; |
13 | use Flow\Model\Workflow; |
14 | use Flow\OccupationController; |
15 | use Flow\Repository\TreeRepository; |
16 | use MediaWiki\Maintenance\Maintenance; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\WikiMap\WikiMap; |
19 | use Wikimedia\Rdbms\DBUnexpectedError; |
20 | |
21 | $IP = getenv( 'MW_INSTALL_PATH' ); |
22 | if ( $IP === false ) { |
23 | $IP = __DIR__ . '/../../..'; |
24 | } |
25 | |
26 | require_once "$IP/maintenance/Maintenance.php"; |
27 | |
28 | /** |
29 | * @ingroup Maintenance |
30 | */ |
31 | class FlowRemoveOldTopics extends Maintenance { |
32 | /** |
33 | * @var bool |
34 | */ |
35 | protected $dryRun = false; |
36 | |
37 | /** |
38 | * @var ManagerGroup |
39 | */ |
40 | protected $storage; |
41 | |
42 | /** |
43 | * @var TreeRepository |
44 | */ |
45 | protected $treeRepo; |
46 | |
47 | /** |
48 | * @var DbFactory |
49 | */ |
50 | protected $dbFactory; |
51 | |
52 | public function __construct() { |
53 | parent::__construct(); |
54 | |
55 | $this->addDescription( "Deletes old topics" ); |
56 | |
57 | $this->addOption( 'date', 'Date cutoff (in any format understood by wfTimestamp), topics ' . |
58 | 'older than this date will be deleted.', true, true ); |
59 | $this->addOption( 'dryrun', 'Simulate script run, without actually deleting anything' ); |
60 | |
61 | $this->setBatchSize( 10 ); |
62 | |
63 | $this->requireExtension( 'Flow' ); |
64 | } |
65 | |
66 | public function execute() { |
67 | $this->dryRun = $this->getOption( 'dryrun', false ); |
68 | $this->storage = Container::get( 'storage' ); |
69 | $this->treeRepo = Container::get( 'repository.tree' ); |
70 | $this->dbFactory = Container::get( 'db.factory' ); |
71 | |
72 | $timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) ); |
73 | |
74 | $this->removeHeader( $timestamp ); |
75 | // remove topics that are older than the given timestamp |
76 | $this->removeTopics( $timestamp ); |
77 | // remove topics that have more recent updates, but only from Flow talk |
78 | // page manager |
79 | $this->removeTopicsWithFlowUpdates( $timestamp ); |
80 | } |
81 | |
82 | protected function removeHeader( $timestamp ) { |
83 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
84 | $batchSize = $this->getBatchSize(); |
85 | |
86 | // we don't store a timestamp with revisions - the id also holds date |
87 | // info, so that's what we should compare against |
88 | $endId = UUID::getComparisonUUID( $timestamp ); |
89 | |
90 | // start from around unix epoch - there can be no Flow data before that |
91 | $startId = UUID::getComparisonUUID( '1' ); |
92 | do { |
93 | /** @var Header[] $revisions */ |
94 | $revisions = $this->storage->find( |
95 | 'Header', |
96 | [ |
97 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
98 | 'rev_type' => 'header', |
99 | $dbr->expr( 'rev_id', '>', $startId->getBinary() ), |
100 | $dbr->expr( 'rev_id', '<', $endId->getBinary() ), |
101 | // only fetch original post at this point: we still need to |
102 | // narrow down the results |
103 | 'rev_parent_id' => null, |
104 | ], |
105 | [ |
106 | 'limit' => $batchSize, |
107 | 'sort' => 'rev_id', |
108 | 'order' => 'ASC', |
109 | ] |
110 | ); |
111 | |
112 | if ( !$revisions ) { |
113 | break; |
114 | } |
115 | |
116 | // prepare for next batch, which will start at this |
117 | /** @var UUID $startId */ |
118 | $startId = end( $revisions )->getRevisionId(); |
119 | |
120 | // we've now found all first revisions prior to a certain date, but we |
121 | // don't want to remove those that have revisions after that date cutoff |
122 | // (we don't want to break history) |
123 | // let's see if any has revisions more recent than timestamp |
124 | $conds = []; |
125 | $uuids = []; |
126 | foreach ( $revisions as $revision ) { |
127 | // keep track of UUIDs we may want to delete |
128 | $uuids[$revision->getCollectionId()->getAlphadecimal()] = $revision->getCollectionId(); |
129 | |
130 | $conds[] = [ |
131 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
132 | 'rev_type' => 'header', |
133 | $dbr->expr( 'rev_id', '>=', $endId->getBinary() ), |
134 | 'rev_type_id' => $revision->getCollectionId()->getBinary(), |
135 | ]; |
136 | } |
137 | |
138 | /** @var Header[] $recent */ |
139 | $recent = $this->storage->findMulti( 'Header', $conds, [ 'limit' => 1 ] ); |
140 | |
141 | // now exclude collection ids where there's a revision that is more |
142 | // recent than the timestamp cutoff |
143 | foreach ( $recent as $revisions ) { |
144 | foreach ( $revisions as $revision ) { |
145 | unset( $uuids[$revision->getCollectionId()->getAlphadecimal()] ); |
146 | } |
147 | } |
148 | |
149 | // by now, there may be nothing left to remove, so move on to the |
150 | // next batch... |
151 | if ( !$uuids ) { |
152 | continue; |
153 | } |
154 | |
155 | $revisions = $this->storage->find( |
156 | 'Header', |
157 | [ |
158 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
159 | 'rev_type' => 'header', |
160 | 'rev_type_id' => UUID::convertUUIDs( $uuids ), |
161 | ] |
162 | ); |
163 | |
164 | $this->output( 'Removing ' . count( $revisions ) . ' header revisions from ' . |
165 | count( $uuids ) . ' headers (up to ' . $startId->getTimestamp() . ")\n" ); |
166 | |
167 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
168 | |
169 | foreach ( $revisions as $revision ) { |
170 | $this->removeReferences( $revision ); |
171 | } |
172 | |
173 | $this->multiRemove( $revisions ); |
174 | |
175 | if ( $this->dryRun ) { |
176 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
177 | } else { |
178 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
179 | $this->dbFactory->waitForReplicas(); |
180 | } |
181 | } while ( $revisions ); |
182 | } |
183 | |
184 | /** |
185 | * @param string $timestamp Timestamp in TS_MW format |
186 | * @throws \Flow\Exception\FlowException |
187 | */ |
188 | protected function removeTopics( $timestamp ) { |
189 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
190 | $batchSize = $this->getBatchSize(); |
191 | |
192 | // start from around unix epoch - there can be no Flow data before that |
193 | $startId = UUID::getComparisonUUID( '1' ); |
194 | do { |
195 | $workflows = $this->storage->find( |
196 | 'Workflow', |
197 | [ |
198 | $dbr->expr( 'workflow_id', '>', $startId->getBinary() ), |
199 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
200 | 'workflow_type' => 'topic', |
201 | $dbr->expr( 'workflow_last_update_timestamp', '<', $dbr->timestamp( $timestamp ) ), |
202 | ], |
203 | [ |
204 | 'limit' => $batchSize, |
205 | 'sort' => 'workflow_id', |
206 | 'order' => 'ASC', |
207 | ] |
208 | ); |
209 | |
210 | if ( !$workflows ) { |
211 | break; |
212 | } |
213 | |
214 | // prepare for next batch |
215 | /** @var UUID $startId */ |
216 | $startId = end( $workflows )->getId(); |
217 | |
218 | $this->output( 'Removing ' . count( $workflows ) . |
219 | ' topic workflows (up to ' . $startId->getTimestamp() . ")\n" ); |
220 | $this->removeWorkflows( $workflows ); |
221 | } while ( $workflows ); |
222 | } |
223 | |
224 | /** |
225 | * @param string $timestamp Timestamp in TS_MW format |
226 | * @throws DBUnexpectedError |
227 | * @throws FlowException |
228 | */ |
229 | protected function removeTopicsWithFlowUpdates( $timestamp ) { |
230 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
231 | $batchSize = $this->getBatchSize(); |
232 | /** @var OccupationController $occupationController */ |
233 | $occupationController = MediaWikiServices::getInstance()->getService( 'FlowTalkpageManager' ); |
234 | $talkpageManager = $occupationController->getTalkpageManager(); |
235 | |
236 | // start from around unix epoch - there can be no Flow data before that |
237 | $batchStartId = UUID::getComparisonUUID( '1' ); |
238 | |
239 | // we only care about revisions since cutoff here |
240 | $cutoffStartId = UUID::getComparisonUUID( $timestamp ); |
241 | |
242 | do { |
243 | $workflowIds = $dbr->newSelectQueryBuilder() |
244 | ->select( 'workflow_id' ) |
245 | ->from( 'flow_workflow' ) |
246 | ->join( 'flow_tree_node', null, 'tree_ancestor_id = workflow_id' ) |
247 | ->join( 'flow_revision', null, 'rev_type_id = tree_descendant_id' ) |
248 | ->where( [ |
249 | // revisions more recent than cutoff time |
250 | $dbr->expr( 'rev_id', '>', $cutoffStartId->getBinary() ), |
251 | // workflow_id condition is only used to batch, the exact |
252 | // $batchStartId otherwise doesn't matter (unlike rev_id) |
253 | $dbr->expr( 'workflow_id', '>', $batchStartId->getBinary() ), |
254 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
255 | 'workflow_type' => 'topic', |
256 | $dbr->expr( 'workflow_last_update_timestamp', '>=', $dbr->timestamp( $timestamp ) ), |
257 | ] ) |
258 | ->limit( $batchSize ) |
259 | ->orderBy( 'workflow_id' ) |
260 | // we only want to find topics that were only altered by talk |
261 | // page manager: as long as anyone else edited any post, we're |
262 | // not interested in it |
263 | ->groupBy( 'workflow_id' ) |
264 | ->having( 'GROUP_CONCAT(DISTINCT rev_user_id) = ' . $talkpageManager->getId() ) |
265 | ->caller( __METHOD__ ) |
266 | ->fetchResultSet(); |
267 | |
268 | if ( !$workflowIds ) { |
269 | break; |
270 | } |
271 | |
272 | $workflows = $this->storage->getMulti( 'Workflow', $workflowIds ); |
273 | |
274 | // prepare for next batch |
275 | /** @var UUID $batchStartId */ |
276 | $batchStartId = end( $workflows )->getId(); |
277 | |
278 | $this->output( 'Removing ' . count( $workflows ) . ' topic workflows with recent ' . |
279 | 'Flow updates (up to ' . $batchStartId->getTimestamp() . ")\n" ); |
280 | $this->removeWorkflows( $workflows ); |
281 | } while ( $workflows ); |
282 | } |
283 | |
284 | /** |
285 | * @param Workflow[] $workflows |
286 | * @throws DBUnexpectedError |
287 | */ |
288 | protected function removeWorkflows( array $workflows ) { |
289 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
290 | |
291 | foreach ( $workflows as $workflow ) { |
292 | $this->removeSummary( $workflow ); |
293 | $this->removePosts( $workflow ); |
294 | $this->removeTopicList( $workflow ); |
295 | } |
296 | |
297 | $this->multiRemove( $workflows ); |
298 | |
299 | if ( $this->dryRun ) { |
300 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
301 | } else { |
302 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
303 | $this->dbFactory->waitForReplicas(); |
304 | } |
305 | } |
306 | |
307 | protected function removeTopicList( Workflow $workflow ) { |
308 | $entries = $this->storage->find( 'TopicListEntry', [ 'topic_id' => $workflow->getId() ] ); |
309 | if ( $entries ) { |
310 | $this->output( 'Removing ' . count( $entries ) . " topiclist entries.\n" ); |
311 | $this->multiRemove( $entries ); |
312 | } |
313 | } |
314 | |
315 | protected function removeSummary( Workflow $workflow ) { |
316 | $revisions = $this->storage->find( 'PostSummary', [ 'rev_type_id' => $workflow->getId() ] ); |
317 | if ( $revisions ) { |
318 | foreach ( $revisions as $revision ) { |
319 | $this->removeReferences( $revision ); |
320 | } |
321 | |
322 | $this->output( 'Removing ' . count( $revisions ) . " summary revisions from 1 topic.\n" ); |
323 | $this->multiRemove( $revisions ); |
324 | } |
325 | } |
326 | |
327 | /** |
328 | * @param UUID $parentId |
329 | * @param array $subtree |
330 | * @return array |
331 | */ |
332 | protected function sortSubtree( UUID $parentId, array $subtree ) { |
333 | $flat = []; |
334 | |
335 | // first recursively process all children, so they come first in $flat |
336 | foreach ( $subtree['children'] as $id => $data ) { |
337 | $flat = array_merge( |
338 | $flat, |
339 | $this->sortSubtree( UUID::create( $id ), $data ) |
340 | ); |
341 | } |
342 | |
343 | // then add parent, which should come last in $flat |
344 | $flat[] = $parentId; |
345 | |
346 | return $flat; |
347 | } |
348 | |
349 | protected function removePosts( Workflow $workflow ) { |
350 | // fetch all children (posts) from a topic & reverse-sort all the posts: |
351 | // deepest-nested children should come first, parents last |
352 | $subtree = $this->treeRepo->fetchSubtree( $workflow->getId() ); |
353 | $uuids = $this->sortSubtree( $workflow->getId(), $subtree ); |
354 | |
355 | $conds = []; |
356 | foreach ( $uuids as $id ) { |
357 | $conds[] = [ 'rev_type_id' => $id ]; |
358 | } |
359 | |
360 | $posts = $this->storage->findMulti( 'PostRevision', $conds ); |
361 | $count = 0; |
362 | foreach ( $posts as $revisions ) { |
363 | /** @var PostRevision[] $revisions */ |
364 | foreach ( $revisions as $revision ) { |
365 | $this->removeReferences( $revision ); |
366 | } |
367 | |
368 | $count += count( $revisions ); |
369 | $this->multiRemove( $revisions ); |
370 | |
371 | foreach ( $revisions as $revision ) { |
372 | $this->treeRepo->delete( $revision->getCollectionId() ); |
373 | } |
374 | } |
375 | $this->output( 'Removing ' . $count . ' post revisions from ' . count( $posts ) . " posts.\n" ); |
376 | } |
377 | |
378 | protected function removeReferences( AbstractRevision $revision ) { |
379 | $wikiReferences = $this->storage->find( 'WikiReference', [ |
380 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
381 | 'ref_src_object_type' => $revision->getRevisionType(), |
382 | 'ref_src_object_id' => $revision->getCollectionId(), |
383 | ] ); |
384 | if ( $wikiReferences ) { |
385 | $this->output( 'Removing ' . count( $wikiReferences ) . " wiki references from 1 revision.\n" ); |
386 | $this->multiRemove( $wikiReferences ); |
387 | } |
388 | |
389 | $urlReferences = $this->storage->find( 'URLReference', [ |
390 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
391 | 'ref_src_object_type' => $revision->getRevisionType(), |
392 | 'ref_src_object_id' => $revision->getCollectionId(), |
393 | ] ); |
394 | if ( $urlReferences ) { |
395 | $this->output( 'Removing ' . count( $urlReferences ) . " url references from 1 revision.\n" ); |
396 | $this->multiRemove( $urlReferences ); |
397 | } |
398 | } |
399 | |
400 | protected function multiRemove( array $objects ) { |
401 | $this->storage->multiRemove( $objects ); |
402 | } |
403 | } |
404 | |
405 | $maintClass = FlowRemoveOldTopics::class; |
406 | require_once RUN_MAINTENANCE_IF_MAIN; |