Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 206
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
Exporter
0.00% covered (danger)
0.00%
0 / 206
0.00% covered (danger)
0.00%
0 / 13
2256
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
6
 schemaVersion
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 openStream
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 getWorkflowIterator
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
56
 dump
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
20
 formatWorkflow
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
20
 formatTopic
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
12
 formatHeader
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatPost
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
20
 formatSummary
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatRevisions
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
110
 formatRevision
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
42
 isAllowed
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Flow\Dump;
4
5use BatchRowIterator;
6use Exception;
7use Flow\Collection\PostSummaryCollection;
8use Flow\Container;
9use Flow\Data\ManagerGroup;
10use Flow\Model\AbstractRevision;
11use Flow\Model\Header;
12use Flow\Model\PostRevision;
13use Flow\Model\PostSummary;
14use Flow\Model\UUID;
15use Flow\Model\Workflow;
16use Flow\RevisionActionPermissions;
17use Flow\Search\Iterators\AbstractIterator;
18use Flow\Search\Iterators\HeaderIterator;
19use Flow\Search\Iterators\TopicIterator;
20use MediaWiki\CommentStore\CommentStore;
21use MediaWiki\HookContainer\HookContainer;
22use MediaWiki\MediaWikiServices;
23use MediaWiki\Revision\RevisionStore;
24use MediaWiki\Title\Title;
25use MediaWiki\Title\TitleParser;
26use MediaWiki\User\CentralId\CentralIdLookup;
27use MediaWiki\User\User;
28use MediaWiki\WikiMap\WikiMap;
29use ReflectionProperty;
30use WikiExporter;
31use Wikimedia\Rdbms\IDatabase;
32use Wikimedia\Timestamp\TimestampException;
33use Xml;
34
35class Exporter extends WikiExporter {
36    /**
37     * Map of [db column name => xml attribute name]
38     *
39     * @var array
40     */
41    public static $map = [
42        'rev_id' => 'id',
43        'rev_user_id' => 'userid',
44        'rev_user_ip' => 'userip',
45        'rev_user_wiki' => 'userwiki',
46        'rev_parent_id' => 'parentid',
47        'rev_change_type' => 'changetype',
48        'rev_type' => 'type',
49        'rev_type_id' => 'typeid',
50        'rev_content' => 'content',
51        'rev_content_url' => 'contenturl',
52        'rev_flags' => 'flags',
53        'rev_mod_state' => 'modstate',
54        'rev_mod_user_id' => 'moduserid',
55        'rev_mod_user_ip' => 'moduserip',
56        'rev_mod_user_wiki' => 'moduserwiki',
57        'rev_mod_timestamp' => 'modtimestamp',
58        'rev_mod_reason' => 'modreason',
59        'rev_last_edit_id' => 'lasteditid',
60        'rev_edit_user_id' => 'edituserid',
61        'rev_edit_user_ip' => 'edituserip',
62        'rev_edit_user_wiki' => 'edituserwiki',
63        'rev_content_length' => 'contentlength',
64        'rev_previous_content_length' => 'previouscontentlength',
65
66        'tree_parent_id' => 'treeparentid',
67        'tree_rev_descendant_id' => 'treedescendantid',
68        'tree_rev_id' => 'treerevid',
69        'tree_orig_user_id' => 'treeoriguserid',
70        'tree_orig_user_ip' => 'treeoriguserip',
71        'tree_orig_user_wiki' => 'treeoriguserwiki',
72    ];
73
74    /**
75     * @var ReflectionProperty Previous revision property
76     */
77    protected $prevRevisionProperty;
78
79    /**
80     * @var ReflectionProperty Change type property
81     */
82    protected $changeTypeProperty;
83
84    /**
85     * To convert between local and global user ids
86     *
87     * @var CentralIdLookup|null
88     */
89    protected $lookup;
90
91    /**
92     * @inheritDoc
93     */
94    public function __construct(
95        $db,
96        CommentStore $commentStore,
97        HookContainer $hookContainer,
98        RevisionStore $revisionStore,
99        TitleParser $titleParser,
100        $history = WikiExporter::CURRENT,
101        $text = WikiExporter::TEXT,
102        $limitNamespaces = null
103    ) {
104        parent::__construct(
105            $db,
106            $commentStore,
107            $hookContainer,
108            $revisionStore,
109            $titleParser,
110            $history,
111            $text,
112            $limitNamespaces
113        );
114        $this->prevRevisionProperty = new ReflectionProperty( AbstractRevision::class, 'prevRevision' );
115        $this->prevRevisionProperty->setAccessible( true );
116
117        $this->changeTypeProperty = new ReflectionProperty( AbstractRevision::class, 'changeType' );
118        $this->changeTypeProperty->setAccessible( true );
119
120        try {
121            $this->lookup = MediaWikiServices::getInstance()
122                ->getCentralIdLookupFactory()
123                ->getLookup( 'CentralAuth' );
124        } catch ( \Throwable $unused ) {
125            $this->lookup = null;
126        }
127    }
128
129    public static function schemaVersion() {
130        /*
131         * Be sure to also update the schema/namespace on mediawiki.org when
132         * making any changes:
133         * @see https://gerrit.wikimedia.org/r/#/c/281640/
134         */
135        return '1.0';
136    }
137
138    public function openStream() {
139        global $wgLanguageCode;
140        $version = static::schemaVersion();
141
142        $output = Xml::openElement(
143            'mediawiki',
144            [
145                'xmlns' => "http://www.mediawiki.org/xml/flow-$version/",
146                'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
147                'xsi:schemaLocation' => "http://www.mediawiki.org/xml/flow-$version/ https://www.mediawiki.org/xml/flow-$version.xsd",
148                'version' => $version,
149                'xml:lang' => $wgLanguageCode
150            ]
151        ) . "\n";
152        $this->sink->write( $output );
153    }
154
155    /**
156     * @param string[]|null $pages Array of DB-prefixed page titles
157     * @param int|null $startId page_id to start from (inclusive)
158     * @param int|null $endId page_id to end (exclusive)
159     * @param string|null $workflowStartId workflow_id, b36-encoded, to start from (inclusive)
160     * @param string|null $workflowEndId wokflow_id, b36-encoded, to end (exclusive)
161     * @return BatchRowIterator
162     */
163    public function getWorkflowIterator( array $pages = null, $startId = null, $endId = null,
164        $workflowStartId = null, $workflowEndId = null ) {
165        /** @var IDatabase $dbr */
166        $dbr = Container::get( 'db.factory' )->getDB( DB_REPLICA );
167
168        $iterator = new BatchRowIterator( $dbr, 'flow_workflow', 'workflow_id', 300 );
169        $iterator->setFetchColumns( [ '*' ] );
170        $iterator->addConditions( [ 'workflow_wiki' => WikiMap::getCurrentWikiId() ] );
171        $iterator->addConditions( [ 'workflow_type' => 'discussion' ] );
172        $iterator->setCaller( __METHOD__ );
173
174        if ( $pages ) {
175            $pageConds = [];
176            foreach ( $pages as $page ) {
177                $title = Title::newFromDBkey( $page );
178                $pageConds[] = $dbr->makeList(
179                    [
180                        'workflow_namespace' => $title->getNamespace(),
181                        'workflow_title_text' => $title->getDBkey()
182                    ],
183                    LIST_AND
184                );
185            }
186
187            $iterator->addConditions( [ $dbr->makeList( $pageConds, LIST_OR ) ] );
188        }
189        if ( $startId ) {
190            $iterator->addConditions( [ 'workflow_page_id >= ' . $dbr->addQuotes( $startId ) ] );
191        }
192        if ( $endId ) {
193            $iterator->addConditions( [ 'workflow_page_id < ' . $dbr->addQuotes( $endId ) ] );
194        }
195
196        if ( $workflowStartId ) {
197            $tempUUID = UUID::create( $workflowStartId );
198            $decodedId = $tempUUID->getBinary();
199            $iterator->addConditions( [ 'workflow_id >= ' . $dbr->addQuotes( $decodedId ) ] );
200        }
201        if ( $workflowEndId ) {
202            $tempUUID = UUID::create( $workflowEndId );
203            $decodedId = $tempUUID->getBinary();
204            $iterator->addConditions( [ 'workflow_id < ' . $dbr->addQuotes( $decodedId ) ] );
205        }
206        return $iterator;
207    }
208
209    /**
210     * @param BatchRowIterator $workflowIterator
211     * @throws Exception
212     * @throws TimestampException
213     * @throws \Flow\Exception\InvalidInputException
214     */
215    public function dump( BatchRowIterator $workflowIterator ) {
216        foreach ( $workflowIterator as $rows ) {
217            foreach ( $rows as $row ) {
218                $workflow = Workflow::fromStorageRow( (array)$row );
219
220                $headerIterator = Container::get( 'search.index.iterators.header' );
221                $topicIterator = Container::get( 'search.index.iterators.topic' );
222                $topicIterator->orderByUUID = true;
223                /** @var AbstractIterator $iterator */
224                foreach ( [ $headerIterator, $topicIterator ] as $iterator ) {
225                    $iterator->setPage( $row->workflow_page_id );
226                }
227
228                $this->formatWorkflow( $workflow, $headerIterator, $topicIterator );
229            }
230        }
231    }
232
233    protected function formatWorkflow( Workflow $workflow, HeaderIterator $headerIterator, TopicIterator $topicIterator ) {
234        if ( $workflow->isDeleted() ) {
235            return;
236        }
237
238        $output = Xml::openElement( 'board', [
239            'id' => $workflow->getId()->getAlphadecimal(),
240            'title' => $workflow->getOwnerTitle()->getPrefixedDBkey(),
241        ] ) . "\n";
242        $this->sink->write( $output );
243
244        foreach ( $headerIterator as $revision ) {
245            /** @var Header $revision */
246            '@phan-var Header $revision';
247            $this->formatHeader( $revision );
248        }
249        foreach ( $topicIterator as $revision ) {
250            /** @var PostRevision $revision */
251            '@phan-var PostRevision $revision';
252            $this->formatTopic( $revision );
253        }
254
255        $output = Xml::closeElement( 'board' ) . "\n";
256        $this->sink->write( $output );
257    }
258
259    protected function formatTopic( PostRevision $revision ) {
260        if ( !$this->isAllowed( $revision ) ) {
261            return;
262        }
263
264        $output = Xml::openElement( 'topic', [
265            'id' => $revision->getCollectionId()->getAlphadecimal(),
266        ] ) . "\n";
267        $this->sink->write( $output );
268
269        $this->formatPost( $revision );
270
271        // find summary for this topic & add it as revision
272        $summaryCollection = PostSummaryCollection::newFromId( $revision->getCollectionId() );
273        try {
274            /** @var PostSummary $summary */
275            $summary = $summaryCollection->getLastRevision();
276            '@phan-var PostSummary $summary';
277            $this->formatSummary( $summary );
278        } catch ( \Exception $e ) {
279            // no summary - that's ok!
280        }
281
282        $output = Xml::closeElement( 'topic' ) . "\n";
283        $this->sink->write( $output );
284    }
285
286    protected function formatHeader( Header $revision ) {
287        if ( !$this->isAllowed( $revision ) ) {
288            return;
289        }
290
291        $output = Xml::openElement( 'description', [
292            'id' => $revision->getCollectionId()->getAlphadecimal()
293        ] ) . "\n";
294        $this->sink->write( $output );
295
296        $this->formatRevisions( $revision );
297
298        $output = Xml::closeElement( 'description' ) . "\n";
299        $this->sink->write( $output );
300    }
301
302    protected function formatPost( PostRevision $revision ) {
303        if ( !$this->isAllowed( $revision ) ) {
304            return;
305        }
306
307        $output = Xml::openElement( 'post', [
308            'id' => $revision->getCollectionId()->getAlphadecimal()
309        ] ) . "\n";
310        $this->sink->write( $output );
311
312        $this->formatRevisions( $revision );
313
314        if ( $revision->getChildren() ) {
315            $output = Xml::openElement( 'children' ) . "\n";
316            $this->sink->write( $output );
317
318            foreach ( $revision->getChildren() as $child ) {
319                $this->formatPost( $child );
320            }
321
322            $output = Xml::closeElement( 'children' ) . "\n";
323            $this->sink->write( $output );
324        }
325
326        $output = Xml::closeElement( 'post' ) . "\n";
327        $this->sink->write( $output );
328    }
329
330    protected function formatSummary( PostSummary $revision ) {
331        if ( !$this->isAllowed( $revision ) ) {
332            return;
333        }
334
335        $output = Xml::openElement( 'summary', [
336            'id' => $revision->getCollectionId()->getAlphadecimal()
337        ] ) . "\n";
338        $this->sink->write( $output );
339
340        $this->formatRevisions( $revision );
341
342        $output = Xml::closeElement( 'summary' ) . "\n";
343        $this->sink->write( $output );
344    }
345
346    protected function formatRevisions( AbstractRevision $revision ) {
347        $output = Xml::openElement( 'revisions' ) . "\n";
348        $this->sink->write( $output );
349
350        $collection = $revision->getCollection();
351        if ( $this->history === WikiExporter::FULL ) {
352            /** @var AbstractRevision[] $revisions */
353            $revisions = array_reverse( $collection->getAllRevisions() );
354            $prevId = null;
355
356            foreach ( $revisions as $revision ) {
357                if ( $this->isAllowed( $revision ) ) {
358                    if ( $prevId !== null ) {
359                        // override parent id: this is used to get rid of gaps
360                        // that are caused by moderated items, where the
361                        // revision tree would be incorrect
362                        $this->prevRevisionProperty->setValue( $revision, $prevId );
363
364                        // Since $prevId is set, we know
365                        // there was a gap, and the original
366                        // hide-topic/delete-topic/suppress-topic
367                        // was removed. Since that is used for
368                        // listeners in FlowActions.php, we replace
369                        // restore-topic with edit-title and make a
370                        // null edit (we don't do null edits in the
371                        // normal application flow, but this
372                        // provides a way to replace restore).
373                        $oldChangeType = $revision->getChangeType();
374
375                        if ( $oldChangeType === 'restore-topic' ) {
376                            $this->changeTypeProperty->setValue( $revision, 'edit-title' );
377                        }
378
379                        if ( $oldChangeType === 'restore-post' ) {
380                            $this->changeTypeProperty->setValue( $revision, 'edit-post' );
381                        }
382
383                        $prevId = null;
384                    }
385                    $this->formatRevision( $revision );
386                } elseif ( $prevId === null ) {
387                    // if revision can't be dumped, store its parent id so we
388                    // can re-apply it to the next one that can be displayed, so
389                    // we don't have gaps
390                    $prevId = $revision->getPrevRevisionId();
391                }
392            }
393        } elseif ( $this->history === WikiExporter::CURRENT ) {
394            $first = $collection->getFirstRevision();
395
396            // storing only last revision won't work (it'll reference non-existing
397            // parents): we'll construct a bogus revision with most of the original
398            // metadata, but with the current content & id (= timestamp)
399            $first = $first->toStorageRow( $first );
400            $last = $revision->toStorageRow( $revision );
401            $first['rev_id'] = $last['rev_id'];
402            $first['rev_content'] = $last['rev_content'];
403            $first['rev_flags'] = $last['rev_flags'];
404            if ( isset( $first['tree_rev_id'] ) ) {
405                // PostRevision-only: tree_rev_id must match rev_id
406                $first['tree_rev_id'] = $first['rev_id'];
407            }
408
409            // clear buffered cache, to make sure it doesn't serve the existing (already
410            // loaded) revision when trying to turn our bogus mixed data into a revision
411            /** @var ManagerGroup $storage */
412            $storage = Container::get( 'storage' );
413            $storage->clear();
414
415            $mix = $revision->fromStorageRow( $first );
416
417            $this->formatRevision( $mix );
418        }
419
420        $output = Xml::closeElement( 'revisions' ) . "\n";
421        $this->sink->write( $output );
422    }
423
424    /**
425     * @param AbstractRevision $revision
426     * @suppress SecurityCheck-DoubleEscaped
427     */
428    protected function formatRevision( AbstractRevision $revision ) {
429        if ( !$this->isAllowed( $revision ) ) {
430            return;
431        }
432
433        $attribs = $revision->toStorageRow( $revision );
434
435        // make sure there are no leftover key columns (unknown to $attribs)
436        $keys = array_intersect_key( static::$map, $attribs );
437        // now make sure $values columns are in the same order as $keys are
438        // (array_merge) and there are no leftover columns (array_intersect_key)
439        $values = array_intersect_key( array_merge( $keys, $attribs ), $keys );
440        // combine them
441        $attribs = array_combine( $keys, $values );
442        // and get rid of columns with null values
443        $attribs = array_filter( $attribs, static function ( $value ) {
444            return $value !== null;
445        } );
446
447        // references to external store etc. are useless; we'll include the real
448        // content as node text
449        unset( $attribs['content'], $attribs['contenturl'] );
450        $format = $revision->getContentFormat();
451        $attribs['flags'] = 'utf-8,' . $format;
452
453        if ( $this->lookup ) {
454            $userIdFields = [ 'userid', 'treeoriguserid', 'moduserid', 'edituserid' ];
455            foreach ( $userIdFields as $userIdField ) {
456                if ( isset( $attribs[ $userIdField ] ) ) {
457                    $user = User::newFromId( (int)$attribs[ $userIdField ] );
458                    $globalUserId = $this->lookup->centralIdFromLocalUser(
459                        $user,
460                        CentralIdLookup::AUDIENCE_RAW
461                    );
462                    if ( $globalUserId ) {
463                        $attribs[ 'global' . $userIdField ] = $globalUserId;
464                    }
465                }
466            }
467        }
468
469        $output = Xml::element(
470            'revision',
471            $attribs,
472            $revision->getContent( $format )
473        ) . "\n";
474        // filter out bad characters that may have crept into old revisions
475        $output = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $output );
476        $this->sink->write( $output );
477    }
478
479    /**
480     * Test if anon users are allowed to view a particular revision.
481     *
482     * @param AbstractRevision $revision
483     * @return bool
484     */
485    protected function isAllowed( AbstractRevision $revision ) {
486        $user = User::newFromId( 0 );
487        $actions = Container::get( 'flow_actions' );
488        $permissions = new RevisionActionPermissions( $actions, $user );
489
490        return $permissions->isAllowed( $revision, 'view' );
491    }
492}