Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 197 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
FlowRemoveOldTopics | |
0.00% |
0 / 191 |
|
0.00% |
0 / 12 |
1122 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
removeHeader | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
72 | |||
removeTopics | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
6 | |||
removeTopicsWithFlowUpdates | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
6 | |||
removeWorkflows | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
removeTopicList | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
removeSummary | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
sortSubtree | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
removePosts | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
removeReferences | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
multiRemove | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Flow\Maintenance; |
4 | |
5 | use Flow\Container; |
6 | use Flow\Data\ManagerGroup; |
7 | use Flow\Data\Utils\RawSql; |
8 | use Flow\DbFactory; |
9 | use Flow\Exception\FlowException; |
10 | use Flow\Hooks; |
11 | use Flow\Model\AbstractRevision; |
12 | use Flow\Model\Header; |
13 | use Flow\Model\PostRevision; |
14 | use Flow\Model\UUID; |
15 | use Flow\Model\Workflow; |
16 | use Flow\Repository\TreeRepository; |
17 | use Maintenance; |
18 | use MediaWiki\WikiMap\WikiMap; |
19 | use Wikimedia\Rdbms\DBUnexpectedError; |
20 | |
21 | $IP = getenv( 'MW_INSTALL_PATH' ); |
22 | if ( $IP === false ) { |
23 | $IP = __DIR__ . '/../../..'; |
24 | } |
25 | |
26 | require_once "$IP/maintenance/Maintenance.php"; |
27 | |
28 | /** |
29 | * @ingroup Maintenance |
30 | */ |
31 | class FlowRemoveOldTopics extends Maintenance { |
32 | /** |
33 | * @var bool |
34 | */ |
35 | protected $dryRun = false; |
36 | |
37 | /** |
38 | * @var ManagerGroup |
39 | */ |
40 | protected $storage; |
41 | |
42 | /** |
43 | * @var TreeRepository |
44 | */ |
45 | protected $treeRepo; |
46 | |
47 | /** |
48 | * @var DbFactory |
49 | */ |
50 | protected $dbFactory; |
51 | |
52 | public function __construct() { |
53 | parent::__construct(); |
54 | |
55 | $this->addDescription( "Deletes old topics" ); |
56 | |
57 | $this->addOption( 'date', 'Date cutoff (in any format understood by wfTimestamp), topics ' . |
58 | 'older than this date will be deleted.', true, true ); |
59 | $this->addOption( 'dryrun', 'Simulate script run, without actually deleting anything' ); |
60 | |
61 | $this->setBatchSize( 10 ); |
62 | |
63 | $this->requireExtension( 'Flow' ); |
64 | } |
65 | |
66 | public function execute() { |
67 | $this->dryRun = $this->getOption( 'dryrun', false ); |
68 | $this->storage = Container::get( 'storage' ); |
69 | $this->treeRepo = Container::get( 'repository.tree' ); |
70 | $this->dbFactory = Container::get( 'db.factory' ); |
71 | |
72 | $timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) ); |
73 | |
74 | $this->removeHeader( $timestamp ); |
75 | // remove topics that are older than the given timestamp |
76 | $this->removeTopics( $timestamp ); |
77 | // remove topics that have more recent updates, but only from Flow talk |
78 | // page manager |
79 | $this->removeTopicsWithFlowUpdates( $timestamp ); |
80 | } |
81 | |
82 | protected function removeHeader( $timestamp ) { |
83 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
84 | $batchSize = $this->getBatchSize(); |
85 | |
86 | // we don't store a timestamp with revisions - the id also holds date |
87 | // info, so that's what we should compare against |
88 | $endId = UUID::getComparisonUUID( $timestamp ); |
89 | |
90 | // start from around unix epoch - there can be no Flow data before that |
91 | $startId = UUID::getComparisonUUID( '1' ); |
92 | do { |
93 | /** @var Header[] $revisions */ |
94 | $revisions = $this->storage->find( |
95 | 'Header', |
96 | [ |
97 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
98 | 'rev_type' => 'header', |
99 | new RawSql( 'rev_id > ' . $dbr->addQuotes( $startId->getBinary() ) ), |
100 | new RawSql( 'rev_id < ' . $dbr->addQuotes( $endId->getBinary() ) ), |
101 | // only fetch original post at this point: we still need to |
102 | // narrow down the results |
103 | 'rev_parent_id' => null, |
104 | ], |
105 | [ |
106 | 'limit' => $batchSize, |
107 | 'sort' => 'rev_id', |
108 | 'order' => 'ASC', |
109 | ] |
110 | ); |
111 | |
112 | if ( !$revisions ) { |
113 | break; |
114 | } |
115 | |
116 | // prepare for next batch, which will start at this |
117 | /** @var UUID $startId */ |
118 | $startId = end( $revisions )->getRevisionId(); |
119 | |
120 | // we've now found all first revisions prior to a certain date, but we |
121 | // don't want to remove those that have revisions after that date cutoff |
122 | // (we don't want to break history) |
123 | // let's see if any has revisions more recent than timestamp |
124 | $conds = []; |
125 | $uuids = []; |
126 | foreach ( $revisions as $revision ) { |
127 | // keep track of UUIDs we may want to delete |
128 | $uuids[$revision->getCollectionId()->getAlphadecimal()] = $revision->getCollectionId(); |
129 | |
130 | $conds[] = [ |
131 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
132 | 'rev_type' => 'header', |
133 | new RawSql( 'rev_id >= ' . $dbr->addQuotes( $endId->getBinary() ) ), |
134 | 'rev_type_id' => $revision->getCollectionId()->getBinary(), |
135 | ]; |
136 | } |
137 | |
138 | /** @var Header[] $recent */ |
139 | $recent = $this->storage->findMulti( 'Header', $conds, [ 'limit' => 1 ] ); |
140 | |
141 | // now exclude collection ids where there's a revision that is more |
142 | // recent than the timestamp cutoff |
143 | foreach ( $recent as $revisions ) { |
144 | foreach ( $revisions as $revision ) { |
145 | unset( $uuids[$revision->getCollectionId()->getAlphadecimal()] ); |
146 | } |
147 | } |
148 | |
149 | // by now, there may be nothing left to remove, so move on to the |
150 | // next batch... |
151 | if ( !$uuids ) { |
152 | continue; |
153 | } |
154 | |
155 | $revisions = $this->storage->find( |
156 | 'Header', |
157 | [ |
158 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
159 | 'rev_type' => 'header', |
160 | 'rev_type_id' => UUID::convertUUIDs( $uuids ), |
161 | ] |
162 | ); |
163 | |
164 | $this->output( 'Removing ' . count( $revisions ) . ' header revisions from ' . |
165 | count( $uuids ) . ' headers (up to ' . $startId->getTimestamp() . ")\n" ); |
166 | |
167 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
168 | |
169 | foreach ( $revisions as $revision ) { |
170 | $this->removeReferences( $revision ); |
171 | } |
172 | |
173 | $this->multiRemove( $revisions ); |
174 | |
175 | if ( $this->dryRun ) { |
176 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
177 | } else { |
178 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
179 | $this->dbFactory->waitForReplicas(); |
180 | } |
181 | } while ( $revisions ); |
182 | } |
183 | |
184 | /** |
185 | * @param string $timestamp Timestamp in TS_MW format |
186 | * @throws \Flow\Exception\FlowException |
187 | */ |
188 | protected function removeTopics( $timestamp ) { |
189 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
190 | $batchSize = $this->getBatchSize(); |
191 | |
192 | // start from around unix epoch - there can be no Flow data before that |
193 | $startId = UUID::getComparisonUUID( '1' ); |
194 | do { |
195 | $workflows = $this->storage->find( |
196 | 'Workflow', |
197 | [ |
198 | new RawSql( 'workflow_id > ' . $dbr->addQuotes( $startId->getBinary() ) ), |
199 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
200 | 'workflow_type' => 'topic', |
201 | new RawSql( 'workflow_last_update_timestamp < ' . $dbr->addQuotes( $dbr->timestamp( $timestamp ) ) ), |
202 | ], |
203 | [ |
204 | 'limit' => $batchSize, |
205 | 'sort' => 'workflow_id', |
206 | 'order' => 'ASC', |
207 | ] |
208 | ); |
209 | |
210 | if ( !$workflows ) { |
211 | break; |
212 | } |
213 | |
214 | // prepare for next batch |
215 | /** @var UUID $startId */ |
216 | $startId = end( $workflows )->getId(); |
217 | |
218 | $this->output( 'Removing ' . count( $workflows ) . |
219 | ' topic workflows (up to ' . $startId->getTimestamp() . ")\n" ); |
220 | $this->removeWorkflows( $workflows ); |
221 | } while ( $workflows ); |
222 | } |
223 | |
224 | /** |
225 | * @param string $timestamp Timestamp in TS_MW format |
226 | * @throws DBUnexpectedError |
227 | * @throws FlowException |
228 | */ |
229 | protected function removeTopicsWithFlowUpdates( $timestamp ) { |
230 | $dbr = $this->dbFactory->getDB( DB_REPLICA ); |
231 | $batchSize = $this->getBatchSize(); |
232 | $talkpageManager = Hooks::getOccupationController()->getTalkpageManager(); |
233 | |
234 | // start from around unix epoch - there can be no Flow data before that |
235 | $batchStartId = UUID::getComparisonUUID( '1' ); |
236 | |
237 | // we only care about revisions since cutoff here |
238 | $cutoffStartId = UUID::getComparisonUUID( $timestamp ); |
239 | |
240 | do { |
241 | $workflowIds = $dbr->selectFieldValues( |
242 | [ 'flow_workflow', 'flow_tree_node', 'flow_revision' ], |
243 | 'workflow_id', |
244 | [ |
245 | // revisions more recent than cutoff time |
246 | 'rev_id > ' . $dbr->addQuotes( $cutoffStartId->getBinary() ), |
247 | // workflow_id condition is only used to batch, the exact |
248 | // $batchStartId otherwise doesn't matter (unlike rev_id) |
249 | 'workflow_id > ' . $dbr->addQuotes( $batchStartId->getBinary() ), |
250 | 'workflow_wiki' => WikiMap::getCurrentWikiId(), |
251 | 'workflow_type' => 'topic', |
252 | 'workflow_last_update_timestamp >= ' . $dbr->addQuotes( $dbr->timestamp( $timestamp ) ), |
253 | ], |
254 | __METHOD__, |
255 | [ |
256 | 'LIMIT' => $batchSize, |
257 | 'ORDER BY' => 'workflow_id ASC', |
258 | // we only want to find topics that were only altered by talk |
259 | // page manager: as long as anyone else edited any post, we're |
260 | // not interested in it |
261 | 'GROUP BY' => 'workflow_id', |
262 | 'HAVING' => [ 'GROUP_CONCAT(DISTINCT rev_user_id)' => $talkpageManager->getId() ], |
263 | ], |
264 | [ |
265 | 'flow_tree_node' => [ 'INNER JOIN', [ 'tree_ancestor_id = workflow_id' ] ], |
266 | 'flow_revision' => [ 'INNER JOIN', [ 'rev_type_id = tree_descendant_id' ] ], |
267 | ] |
268 | ); |
269 | |
270 | if ( !$workflowIds ) { |
271 | break; |
272 | } |
273 | |
274 | $workflows = $this->storage->getMulti( 'Workflow', $workflowIds ); |
275 | |
276 | // prepare for next batch |
277 | /** @var UUID $batchStartId */ |
278 | $batchStartId = end( $workflows )->getId(); |
279 | |
280 | $this->output( 'Removing ' . count( $workflows ) . ' topic workflows with recent ' . |
281 | 'Flow updates (up to ' . $batchStartId->getTimestamp() . ")\n" ); |
282 | $this->removeWorkflows( $workflows ); |
283 | } while ( $workflows ); |
284 | } |
285 | |
286 | /** |
287 | * @param Workflow[] $workflows |
288 | * @throws DBUnexpectedError |
289 | */ |
290 | protected function removeWorkflows( array $workflows ) { |
291 | $this->dbFactory->getDB( DB_PRIMARY )->begin( __METHOD__ ); |
292 | |
293 | foreach ( $workflows as $workflow ) { |
294 | $this->removeSummary( $workflow ); |
295 | $this->removePosts( $workflow ); |
296 | $this->removeTopicList( $workflow ); |
297 | } |
298 | |
299 | $this->multiRemove( $workflows ); |
300 | |
301 | if ( $this->dryRun ) { |
302 | $this->dbFactory->getDB( DB_PRIMARY )->rollback( __METHOD__ ); |
303 | } else { |
304 | $this->dbFactory->getDB( DB_PRIMARY )->commit( __METHOD__ ); |
305 | $this->dbFactory->waitForReplicas(); |
306 | } |
307 | } |
308 | |
309 | protected function removeTopicList( Workflow $workflow ) { |
310 | $entries = $this->storage->find( 'TopicListEntry', [ 'topic_id' => $workflow->getId() ] ); |
311 | if ( $entries ) { |
312 | $this->output( 'Removing ' . count( $entries ) . " topiclist entries.\n" ); |
313 | $this->multiRemove( $entries ); |
314 | } |
315 | } |
316 | |
317 | protected function removeSummary( Workflow $workflow ) { |
318 | $revisions = $this->storage->find( 'PostSummary', [ 'rev_type_id' => $workflow->getId() ] ); |
319 | if ( $revisions ) { |
320 | foreach ( $revisions as $revision ) { |
321 | $this->removeReferences( $revision ); |
322 | } |
323 | |
324 | $this->output( 'Removing ' . count( $revisions ) . " summary revisions from 1 topic.\n" ); |
325 | $this->multiRemove( $revisions ); |
326 | } |
327 | } |
328 | |
329 | /** |
330 | * @param UUID $parentId |
331 | * @param array $subtree |
332 | * @return array |
333 | */ |
334 | protected function sortSubtree( UUID $parentId, array $subtree ) { |
335 | $flat = []; |
336 | |
337 | // first recursively process all children, so they come first in $flat |
338 | foreach ( $subtree['children'] as $id => $data ) { |
339 | $flat = array_merge( |
340 | $flat, |
341 | $this->sortSubtree( UUID::create( $id ), $data ) |
342 | ); |
343 | } |
344 | |
345 | // then add parent, which should come last in $flat |
346 | $flat[] = $parentId; |
347 | |
348 | return $flat; |
349 | } |
350 | |
351 | protected function removePosts( Workflow $workflow ) { |
352 | // fetch all children (posts) from a topic & reverse-sort all the posts: |
353 | // deepest-nested children should come first, parents last |
354 | $subtree = $this->treeRepo->fetchSubtree( $workflow->getId() ); |
355 | $uuids = $this->sortSubtree( $workflow->getId(), $subtree ); |
356 | |
357 | $conds = []; |
358 | foreach ( $uuids as $id ) { |
359 | $conds[] = [ 'rev_type_id' => $id ]; |
360 | } |
361 | |
362 | $posts = $this->storage->findMulti( 'PostRevision', $conds ); |
363 | $count = 0; |
364 | foreach ( $posts as $revisions ) { |
365 | /** @var PostRevision[] $revisions */ |
366 | foreach ( $revisions as $revision ) { |
367 | $this->removeReferences( $revision ); |
368 | } |
369 | |
370 | $count += count( $revisions ); |
371 | $this->multiRemove( $revisions ); |
372 | |
373 | foreach ( $revisions as $revision ) { |
374 | $this->treeRepo->delete( $revision->getCollectionId() ); |
375 | } |
376 | } |
377 | $this->output( 'Removing ' . $count . ' post revisions from ' . count( $posts ) . " posts.\n" ); |
378 | } |
379 | |
380 | protected function removeReferences( AbstractRevision $revision ) { |
381 | $wikiReferences = $this->storage->find( 'WikiReference', [ |
382 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
383 | 'ref_src_object_type' => $revision->getRevisionType(), |
384 | 'ref_src_object_id' => $revision->getCollectionId(), |
385 | ] ); |
386 | if ( $wikiReferences ) { |
387 | $this->output( 'Removing ' . count( $wikiReferences ) . " wiki references from 1 revision.\n" ); |
388 | $this->multiRemove( $wikiReferences ); |
389 | } |
390 | |
391 | $urlReferences = $this->storage->find( 'URLReference', [ |
392 | 'ref_src_wiki' => WikiMap::getCurrentWikiId(), |
393 | 'ref_src_object_type' => $revision->getRevisionType(), |
394 | 'ref_src_object_id' => $revision->getCollectionId(), |
395 | ] ); |
396 | if ( $urlReferences ) { |
397 | $this->output( 'Removing ' . count( $urlReferences ) . " url references from 1 revision.\n" ); |
398 | $this->multiRemove( $urlReferences ); |
399 | } |
400 | } |
401 | |
402 | protected function multiRemove( array $objects ) { |
403 | $this->storage->multiRemove( $objects ); |
404 | } |
405 | } |
406 | |
407 | $maintClass = FlowRemoveOldTopics::class; |
408 | require_once RUN_MAINTENANCE_IF_MAIN; |