Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 202
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
Exporter
0.00% covered (danger)
0.00%
0 / 202
0.00% covered (danger)
0.00%
0 / 13
2256
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 19
0.00% covered (danger)
0.00%
0 / 1
6
 schemaVersion
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 openStream
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
2
 getWorkflowIterator
0.00% covered (danger)
0.00%
0 / 28
0.00% covered (danger)
0.00%
0 / 1
56
 dump
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
20
 formatWorkflow
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
20
 formatTopic
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
12
 formatHeader
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatPost
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
20
 formatSummary
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatRevisions
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
110
 formatRevision
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
42
 isAllowed
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Flow\Dump;
4
5use BatchRowIterator;
6use Exception;
7use Flow\Collection\PostSummaryCollection;
8use Flow\Container;
9use Flow\Data\ManagerGroup;
10use Flow\Model\AbstractRevision;
11use Flow\Model\Header;
12use Flow\Model\PostRevision;
13use Flow\Model\PostSummary;
14use Flow\Model\UUID;
15use Flow\Model\Workflow;
16use Flow\RevisionActionPermissions;
17use Flow\Search\Iterators\AbstractIterator;
18use Flow\Search\Iterators\HeaderIterator;
19use Flow\Search\Iterators\TopicIterator;
20use MediaWiki\CommentStore\CommentStore;
21use MediaWiki\HookContainer\HookContainer;
22use MediaWiki\MediaWikiServices;
23use MediaWiki\Revision\RevisionStore;
24use MediaWiki\Title\Title;
25use MediaWiki\Title\TitleParser;
26use MediaWiki\User\CentralId\CentralIdLookup;
27use MediaWiki\User\User;
28use MediaWiki\WikiMap\WikiMap;
29use MediaWiki\Xml\Xml;
30use ReflectionProperty;
31use WikiExporter;
32use Wikimedia\Rdbms\IReadableDatabase;
33use Wikimedia\Timestamp\TimestampException;
34
35class Exporter extends WikiExporter {
36    /**
37     * Map of [db column name => xml attribute name]
38     *
39     * @var array
40     */
41    public static $map = [
42        'rev_id' => 'id',
43        'rev_user_id' => 'userid',
44        'rev_user_ip' => 'userip',
45        'rev_user_wiki' => 'userwiki',
46        'rev_parent_id' => 'parentid',
47        'rev_change_type' => 'changetype',
48        'rev_type' => 'type',
49        'rev_type_id' => 'typeid',
50        'rev_content' => 'content',
51        'rev_content_url' => 'contenturl',
52        'rev_flags' => 'flags',
53        'rev_mod_state' => 'modstate',
54        'rev_mod_user_id' => 'moduserid',
55        'rev_mod_user_ip' => 'moduserip',
56        'rev_mod_user_wiki' => 'moduserwiki',
57        'rev_mod_timestamp' => 'modtimestamp',
58        'rev_mod_reason' => 'modreason',
59        'rev_last_edit_id' => 'lasteditid',
60        'rev_edit_user_id' => 'edituserid',
61        'rev_edit_user_ip' => 'edituserip',
62        'rev_edit_user_wiki' => 'edituserwiki',
63        'rev_content_length' => 'contentlength',
64        'rev_previous_content_length' => 'previouscontentlength',
65
66        'tree_parent_id' => 'treeparentid',
67        'tree_rev_descendant_id' => 'treedescendantid',
68        'tree_rev_id' => 'treerevid',
69        'tree_orig_user_id' => 'treeoriguserid',
70        'tree_orig_user_ip' => 'treeoriguserip',
71        'tree_orig_user_wiki' => 'treeoriguserwiki',
72    ];
73
74    /**
75     * @var ReflectionProperty Previous revision property
76     */
77    protected $prevRevisionProperty;
78
79    /**
80     * @var ReflectionProperty Change type property
81     */
82    protected $changeTypeProperty;
83
84    /**
85     * To convert between local and global user ids
86     *
87     * @var CentralIdLookup|null
88     */
89    protected $lookup;
90
91    /**
92     * @inheritDoc
93     */
94    public function __construct(
95        $db,
96        CommentStore $commentStore,
97        HookContainer $hookContainer,
98        RevisionStore $revisionStore,
99        TitleParser $titleParser,
100        $history = WikiExporter::CURRENT,
101        $text = WikiExporter::TEXT,
102        $limitNamespaces = null
103    ) {
104        parent::__construct(
105            $db,
106            $commentStore,
107            $hookContainer,
108            $revisionStore,
109            $titleParser,
110            $history,
111            $text,
112            $limitNamespaces
113        );
114        $this->prevRevisionProperty = new ReflectionProperty( AbstractRevision::class, 'prevRevision' );
115
116        $this->changeTypeProperty = new ReflectionProperty( AbstractRevision::class, 'changeType' );
117
118        try {
119            $this->lookup = MediaWikiServices::getInstance()
120                ->getCentralIdLookupFactory()
121                ->getLookup( 'CentralAuth' );
122        } catch ( \Throwable $unused ) {
123            $this->lookup = null;
124        }
125    }
126
127    public static function schemaVersion() {
128        /*
129         * Be sure to also update the schema/namespace on mediawiki.org when
130         * making any changes:
131         * @see https://gerrit.wikimedia.org/r/#/c/281640/
132         */
133        return '1.0';
134    }
135
136    public function openStream() {
137        global $wgLanguageCode;
138        $version = static::schemaVersion();
139
140        $output = Xml::openElement(
141            'mediawiki',
142            [
143                'xmlns' => "http://www.mediawiki.org/xml/flow-$version/",
144                'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
145                'xsi:schemaLocation' => "http://www.mediawiki.org/xml/flow-$version" .
146                    "https://www.mediawiki.org/xml/flow-$version.xsd",
147                'version' => $version,
148                'xml:lang' => $wgLanguageCode
149            ]
150        ) . "\n";
151        $this->sink->write( $output );
152    }
153
154    /**
155     * @param string[]|null $pages Array of DB-prefixed page titles
156     * @param int|null $startId page_id to start from (inclusive)
157     * @param int|null $endId page_id to end (exclusive)
158     * @param string|null $workflowStartId workflow_id, b36-encoded, to start from (inclusive)
159     * @param string|null $workflowEndId wokflow_id, b36-encoded, to end (exclusive)
160     * @return BatchRowIterator
161     */
162    public function getWorkflowIterator( ?array $pages = null, $startId = null, $endId = null,
163        $workflowStartId = null, $workflowEndId = null ) {
164        /** @var IReadableDatabase $dbr */
165        $dbr = Container::get( 'db.factory' )->getDB( DB_REPLICA );
166
167        $iterator = new BatchRowIterator( $dbr, 'flow_workflow', 'workflow_id', 300 );
168        $iterator->setFetchColumns( [ '*' ] );
169        $iterator->addConditions( [ 'workflow_wiki' => WikiMap::getCurrentWikiId() ] );
170        $iterator->addConditions( [ 'workflow_type' => 'discussion' ] );
171        $iterator->setCaller( __METHOD__ );
172
173        if ( $pages ) {
174            $pageConds = [];
175            foreach ( $pages as $page ) {
176                $title = Title::newFromDBkey( $page );
177                $pageConds[] = $dbr->andExpr( [
178                    'workflow_namespace' => $title->getNamespace(),
179                    'workflow_title_text' => $title->getDBkey()
180                ] );
181            }
182
183            $iterator->addConditions( [ $dbr->orExpr( $pageConds ) ] );
184        }
185        if ( $startId ) {
186            $iterator->addConditions( [ $dbr->expr( 'workflow_page_id', '>=', $startId ) ] );
187        }
188        if ( $endId ) {
189            $iterator->addConditions( [ $dbr->expr( 'workflow_page_id', '<', $endId ) ] );
190        }
191
192        if ( $workflowStartId ) {
193            $tempUUID = UUID::create( $workflowStartId );
194            $decodedId = $tempUUID->getBinary();
195            $iterator->addConditions( [ $dbr->expr( 'workflow_id', '>=', $decodedId ) ] );
196        }
197        if ( $workflowEndId ) {
198            $tempUUID = UUID::create( $workflowEndId );
199            $decodedId = $tempUUID->getBinary();
200            $iterator->addConditions( [ $dbr->expr( 'workflow_id', '<', $decodedId ) ] );
201        }
202        return $iterator;
203    }
204
205    /**
206     * @param BatchRowIterator $workflowIterator
207     * @throws Exception
208     * @throws TimestampException
209     * @throws \Flow\Exception\InvalidInputException
210     */
211    public function dump( BatchRowIterator $workflowIterator ) {
212        foreach ( $workflowIterator as $rows ) {
213            foreach ( $rows as $row ) {
214                $workflow = Workflow::fromStorageRow( (array)$row );
215
216                $headerIterator = Container::get( 'search.index.iterators.header' );
217                $topicIterator = Container::get( 'search.index.iterators.topic' );
218                $topicIterator->orderByUUID = true;
219                /** @var AbstractIterator $iterator */
220                foreach ( [ $headerIterator, $topicIterator ] as $iterator ) {
221                    $iterator->setPage( $row->workflow_page_id );
222                }
223
224                $this->formatWorkflow( $workflow, $headerIterator, $topicIterator );
225            }
226        }
227    }
228
229    protected function formatWorkflow( Workflow $workflow, HeaderIterator $headerIterator, TopicIterator $topicIterator ) {
230        if ( $workflow->isDeleted() ) {
231            return;
232        }
233
234        $output = Xml::openElement( 'board', [
235            'id' => $workflow->getId()->getAlphadecimal(),
236            'title' => $workflow->getOwnerTitle()->getPrefixedDBkey(),
237        ] ) . "\n";
238        $this->sink->write( $output );
239
240        foreach ( $headerIterator as $revision ) {
241            /** @var Header $revision */
242            '@phan-var Header $revision';
243            $this->formatHeader( $revision );
244        }
245        foreach ( $topicIterator as $revision ) {
246            /** @var PostRevision $revision */
247            '@phan-var PostRevision $revision';
248            $this->formatTopic( $revision );
249        }
250
251        $output = Xml::closeElement( 'board' ) . "\n";
252        $this->sink->write( $output );
253    }
254
255    protected function formatTopic( PostRevision $revision ) {
256        if ( !$this->isAllowed( $revision ) ) {
257            return;
258        }
259
260        $output = Xml::openElement( 'topic', [
261            'id' => $revision->getCollectionId()->getAlphadecimal(),
262        ] ) . "\n";
263        $this->sink->write( $output );
264
265        $this->formatPost( $revision );
266
267        // find summary for this topic & add it as revision
268        $summaryCollection = PostSummaryCollection::newFromId( $revision->getCollectionId() );
269        try {
270            /** @var PostSummary $summary */
271            $summary = $summaryCollection->getLastRevision();
272            '@phan-var PostSummary $summary';
273            $this->formatSummary( $summary );
274        } catch ( \Exception ) {
275            // no summary - that's ok!
276        }
277
278        $output = Xml::closeElement( 'topic' ) . "\n";
279        $this->sink->write( $output );
280    }
281
282    protected function formatHeader( Header $revision ) {
283        if ( !$this->isAllowed( $revision ) ) {
284            return;
285        }
286
287        $output = Xml::openElement( 'description', [
288            'id' => $revision->getCollectionId()->getAlphadecimal()
289        ] ) . "\n";
290        $this->sink->write( $output );
291
292        $this->formatRevisions( $revision );
293
294        $output = Xml::closeElement( 'description' ) . "\n";
295        $this->sink->write( $output );
296    }
297
298    protected function formatPost( PostRevision $revision ) {
299        if ( !$this->isAllowed( $revision ) ) {
300            return;
301        }
302
303        $output = Xml::openElement( 'post', [
304            'id' => $revision->getCollectionId()->getAlphadecimal()
305        ] ) . "\n";
306        $this->sink->write( $output );
307
308        $this->formatRevisions( $revision );
309
310        if ( $revision->getChildren() ) {
311            $output = Xml::openElement( 'children' ) . "\n";
312            $this->sink->write( $output );
313
314            foreach ( $revision->getChildren() as $child ) {
315                $this->formatPost( $child );
316            }
317
318            $output = Xml::closeElement( 'children' ) . "\n";
319            $this->sink->write( $output );
320        }
321
322        $output = Xml::closeElement( 'post' ) . "\n";
323        $this->sink->write( $output );
324    }
325
326    protected function formatSummary( PostSummary $revision ) {
327        if ( !$this->isAllowed( $revision ) ) {
328            return;
329        }
330
331        $output = Xml::openElement( 'summary', [
332            'id' => $revision->getCollectionId()->getAlphadecimal()
333        ] ) . "\n";
334        $this->sink->write( $output );
335
336        $this->formatRevisions( $revision );
337
338        $output = Xml::closeElement( 'summary' ) . "\n";
339        $this->sink->write( $output );
340    }
341
342    protected function formatRevisions( AbstractRevision $revision ) {
343        $output = Xml::openElement( 'revisions' ) . "\n";
344        $this->sink->write( $output );
345
346        $collection = $revision->getCollection();
347        if ( $this->history === WikiExporter::FULL ) {
348            /** @var AbstractRevision[] $revisions */
349            $revisions = array_reverse( $collection->getAllRevisions() );
350            $prevId = null;
351
352            foreach ( $revisions as $revision ) {
353                if ( $this->isAllowed( $revision ) ) {
354                    if ( $prevId !== null ) {
355                        // override parent id: this is used to get rid of gaps
356                        // that are caused by moderated items, where the
357                        // revision tree would be incorrect
358                        $this->prevRevisionProperty->setValue( $revision, $prevId );
359
360                        // Since $prevId is set, we know
361                        // there was a gap, and the original
362                        // hide-topic/delete-topic/suppress-topic
363                        // was removed. Since that is used for
364                        // listeners in FlowActions.php, we replace
365                        // restore-topic with edit-title and make a
366                        // null edit (we don't do null edits in the
367                        // normal application flow, but this
368                        // provides a way to replace restore).
369                        $oldChangeType = $revision->getChangeType();
370
371                        if ( $oldChangeType === 'restore-topic' ) {
372                            $this->changeTypeProperty->setValue( $revision, 'edit-title' );
373                        }
374
375                        if ( $oldChangeType === 'restore-post' ) {
376                            $this->changeTypeProperty->setValue( $revision, 'edit-post' );
377                        }
378
379                        $prevId = null;
380                    }
381                    $this->formatRevision( $revision );
382                } elseif ( $prevId === null ) {
383                    // if revision can't be dumped, store its parent id so we
384                    // can re-apply it to the next one that can be displayed, so
385                    // we don't have gaps
386                    $prevId = $revision->getPrevRevisionId();
387                }
388            }
389        } elseif ( $this->history === WikiExporter::CURRENT ) {
390            $first = $collection->getFirstRevision();
391
392            // storing only last revision won't work (it'll reference non-existing
393            // parents): we'll construct a bogus revision with most of the original
394            // metadata, but with the current content & id (= timestamp)
395            $first = $first->toStorageRow( $first );
396            $last = $revision->toStorageRow( $revision );
397            $first['rev_id'] = $last['rev_id'];
398            $first['rev_content'] = $last['rev_content'];
399            $first['rev_flags'] = $last['rev_flags'];
400            if ( isset( $first['tree_rev_id'] ) ) {
401                // PostRevision-only: tree_rev_id must match rev_id
402                $first['tree_rev_id'] = $first['rev_id'];
403            }
404
405            // clear buffered cache, to make sure it doesn't serve the existing (already
406            // loaded) revision when trying to turn our bogus mixed data into a revision
407            /** @var ManagerGroup $storage */
408            $storage = Container::get( 'storage' );
409            $storage->clear();
410
411            $mix = $revision->fromStorageRow( $first );
412
413            $this->formatRevision( $mix );
414        }
415
416        $output = Xml::closeElement( 'revisions' ) . "\n";
417        $this->sink->write( $output );
418    }
419
420    /**
421     * @param AbstractRevision $revision
422     * @suppress SecurityCheck-DoubleEscaped
423     */
424    protected function formatRevision( AbstractRevision $revision ) {
425        if ( !$this->isAllowed( $revision ) ) {
426            return;
427        }
428
429        $attribs = $revision->toStorageRow( $revision );
430
431        // make sure there are no leftover key columns (unknown to $attribs)
432        $keys = array_intersect_key( static::$map, $attribs );
433        // now make sure $values columns are in the same order as $keys are
434        // (array_merge) and there are no leftover columns (array_intersect_key)
435        $values = array_intersect_key( array_merge( $keys, $attribs ), $keys );
436        // combine them
437        $attribs = array_combine( $keys, $values );
438        // and get rid of columns with null values
439        $attribs = array_filter( $attribs, static function ( $value ) {
440            return $value !== null;
441        } );
442
443        // references to external store etc. are useless; we'll include the real
444        // content as node text
445        unset( $attribs['content'], $attribs['contenturl'] );
446        $format = $revision->getContentFormat();
447        $attribs['flags'] = 'utf-8,' . $format;
448
449        if ( $this->lookup ) {
450            $userIdFields = [ 'userid', 'treeoriguserid', 'moduserid', 'edituserid' ];
451            foreach ( $userIdFields as $userIdField ) {
452                if ( isset( $attribs[ $userIdField ] ) ) {
453                    $user = User::newFromId( (int)$attribs[ $userIdField ] );
454                    $globalUserId = $this->lookup->centralIdFromLocalUser(
455                        $user,
456                        CentralIdLookup::AUDIENCE_RAW
457                    );
458                    if ( $globalUserId ) {
459                        $attribs[ 'global' . $userIdField ] = $globalUserId;
460                    }
461                }
462            }
463        }
464
465        $output = Xml::element(
466            'revision',
467            $attribs,
468            $revision->getContent( $format )
469        ) . "\n";
470        // filter out bad characters that may have crept into old revisions
471        $output = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $output );
472        $this->sink->write( $output );
473    }
474
475    /**
476     * Test if anon users are allowed to view a particular revision.
477     *
478     * @param AbstractRevision $revision
479     * @return bool
480     */
481    protected function isAllowed( AbstractRevision $revision ) {
482        $user = User::newFromId( 0 );
483        $actions = Container::get( 'flow_actions' );
484        $permissions = new RevisionActionPermissions( $actions, $user );
485
486        return $permissions->isAllowed( $revision, 'view' );
487    }
488}