Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 203
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
Exporter
0.00% covered (danger)
0.00%
0 / 203
0.00% covered (danger)
0.00%
0 / 13
2256
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
6
 schemaVersion
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 openStream
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 getWorkflowIterator
0.00% covered (danger)
0.00%
0 / 28
0.00% covered (danger)
0.00%
0 / 1
56
 dump
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
20
 formatWorkflow
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
20
 formatTopic
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
12
 formatHeader
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatPost
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
20
 formatSummary
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
6
 formatRevisions
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
110
 formatRevision
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
42
 isAllowed
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace Flow\Dump;
4
5use BatchRowIterator;
6use Exception;
7use Flow\Collection\PostSummaryCollection;
8use Flow\Container;
9use Flow\Data\ManagerGroup;
10use Flow\Model\AbstractRevision;
11use Flow\Model\Header;
12use Flow\Model\PostRevision;
13use Flow\Model\PostSummary;
14use Flow\Model\UUID;
15use Flow\Model\Workflow;
16use Flow\RevisionActionPermissions;
17use Flow\Search\Iterators\AbstractIterator;
18use Flow\Search\Iterators\HeaderIterator;
19use Flow\Search\Iterators\TopicIterator;
20use MediaWiki\CommentStore\CommentStore;
21use MediaWiki\HookContainer\HookContainer;
22use MediaWiki\MediaWikiServices;
23use MediaWiki\Revision\RevisionStore;
24use MediaWiki\Title\Title;
25use MediaWiki\Title\TitleParser;
26use MediaWiki\User\CentralId\CentralIdLookup;
27use MediaWiki\User\User;
28use MediaWiki\WikiMap\WikiMap;
29use MediaWiki\Xml\Xml;
30use ReflectionProperty;
31use WikiExporter;
32use Wikimedia\Rdbms\IReadableDatabase;
33use Wikimedia\Timestamp\TimestampException;
34
35class Exporter extends WikiExporter {
36    /**
37     * Map of [db column name => xml attribute name]
38     *
39     * @var array
40     */
41    public static $map = [
42        'rev_id' => 'id',
43        'rev_user_id' => 'userid',
44        'rev_user_ip' => 'userip',
45        'rev_user_wiki' => 'userwiki',
46        'rev_parent_id' => 'parentid',
47        'rev_change_type' => 'changetype',
48        'rev_type' => 'type',
49        'rev_type_id' => 'typeid',
50        'rev_content' => 'content',
51        'rev_content_url' => 'contenturl',
52        'rev_flags' => 'flags',
53        'rev_mod_state' => 'modstate',
54        'rev_mod_user_id' => 'moduserid',
55        'rev_mod_user_ip' => 'moduserip',
56        'rev_mod_user_wiki' => 'moduserwiki',
57        'rev_mod_timestamp' => 'modtimestamp',
58        'rev_mod_reason' => 'modreason',
59        'rev_last_edit_id' => 'lasteditid',
60        'rev_edit_user_id' => 'edituserid',
61        'rev_edit_user_ip' => 'edituserip',
62        'rev_edit_user_wiki' => 'edituserwiki',
63        'rev_content_length' => 'contentlength',
64        'rev_previous_content_length' => 'previouscontentlength',
65
66        'tree_parent_id' => 'treeparentid',
67        'tree_rev_descendant_id' => 'treedescendantid',
68        'tree_rev_id' => 'treerevid',
69        'tree_orig_user_id' => 'treeoriguserid',
70        'tree_orig_user_ip' => 'treeoriguserip',
71        'tree_orig_user_wiki' => 'treeoriguserwiki',
72    ];
73
74    /**
75     * @var ReflectionProperty Previous revision property
76     */
77    protected $prevRevisionProperty;
78
79    /**
80     * @var ReflectionProperty Change type property
81     */
82    protected $changeTypeProperty;
83
84    /**
85     * To convert between local and global user ids
86     *
87     * @var CentralIdLookup|null
88     */
89    protected $lookup;
90
91    /**
92     * @inheritDoc
93     */
94    public function __construct(
95        $db,
96        CommentStore $commentStore,
97        HookContainer $hookContainer,
98        RevisionStore $revisionStore,
99        TitleParser $titleParser,
100        $history = WikiExporter::CURRENT,
101        $text = WikiExporter::TEXT,
102        $limitNamespaces = null
103    ) {
104        parent::__construct(
105            $db,
106            $commentStore,
107            $hookContainer,
108            $revisionStore,
109            $titleParser,
110            $history,
111            $text,
112            $limitNamespaces
113        );
114        $this->prevRevisionProperty = new ReflectionProperty( AbstractRevision::class, 'prevRevision' );
115        $this->prevRevisionProperty->setAccessible( true );
116
117        $this->changeTypeProperty = new ReflectionProperty( AbstractRevision::class, 'changeType' );
118        $this->changeTypeProperty->setAccessible( true );
119
120        try {
121            $this->lookup = MediaWikiServices::getInstance()
122                ->getCentralIdLookupFactory()
123                ->getLookup( 'CentralAuth' );
124        } catch ( \Throwable $unused ) {
125            $this->lookup = null;
126        }
127    }
128
129    public static function schemaVersion() {
130        /*
131         * Be sure to also update the schema/namespace on mediawiki.org when
132         * making any changes:
133         * @see https://gerrit.wikimedia.org/r/#/c/281640/
134         */
135        return '1.0';
136    }
137
138    public function openStream() {
139        global $wgLanguageCode;
140        $version = static::schemaVersion();
141
142        $output = Xml::openElement(
143            'mediawiki',
144            [
145                'xmlns' => "http://www.mediawiki.org/xml/flow-$version/",
146                'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
147                'xsi:schemaLocation' => "http://www.mediawiki.org/xml/flow-$version/ https://www.mediawiki.org/xml/flow-$version.xsd",
148                'version' => $version,
149                'xml:lang' => $wgLanguageCode
150            ]
151        ) . "\n";
152        $this->sink->write( $output );
153    }
154
155    /**
156     * @param string[]|null $pages Array of DB-prefixed page titles
157     * @param int|null $startId page_id to start from (inclusive)
158     * @param int|null $endId page_id to end (exclusive)
159     * @param string|null $workflowStartId workflow_id, b36-encoded, to start from (inclusive)
160     * @param string|null $workflowEndId wokflow_id, b36-encoded, to end (exclusive)
161     * @return BatchRowIterator
162     */
163    public function getWorkflowIterator( ?array $pages = null, $startId = null, $endId = null,
164        $workflowStartId = null, $workflowEndId = null ) {
165        /** @var IReadableDatabase $dbr */
166        $dbr = Container::get( 'db.factory' )->getDB( DB_REPLICA );
167
168        $iterator = new BatchRowIterator( $dbr, 'flow_workflow', 'workflow_id', 300 );
169        $iterator->setFetchColumns( [ '*' ] );
170        $iterator->addConditions( [ 'workflow_wiki' => WikiMap::getCurrentWikiId() ] );
171        $iterator->addConditions( [ 'workflow_type' => 'discussion' ] );
172        $iterator->setCaller( __METHOD__ );
173
174        if ( $pages ) {
175            $pageConds = [];
176            foreach ( $pages as $page ) {
177                $title = Title::newFromDBkey( $page );
178                $pageConds[] = $dbr->andExpr( [
179                    'workflow_namespace' => $title->getNamespace(),
180                    'workflow_title_text' => $title->getDBkey()
181                ] );
182            }
183
184            $iterator->addConditions( [ $dbr->orExpr( $pageConds ) ] );
185        }
186        if ( $startId ) {
187            $iterator->addConditions( [ $dbr->expr( 'workflow_page_id', '>=', $startId ) ] );
188        }
189        if ( $endId ) {
190            $iterator->addConditions( [ $dbr->expr( 'workflow_page_id', '<', $endId ) ] );
191        }
192
193        if ( $workflowStartId ) {
194            $tempUUID = UUID::create( $workflowStartId );
195            $decodedId = $tempUUID->getBinary();
196            $iterator->addConditions( [ $dbr->expr( 'workflow_id', '>=', $decodedId ) ] );
197        }
198        if ( $workflowEndId ) {
199            $tempUUID = UUID::create( $workflowEndId );
200            $decodedId = $tempUUID->getBinary();
201            $iterator->addConditions( [ $dbr->expr( 'workflow_id', '<', $decodedId ) ] );
202        }
203        return $iterator;
204    }
205
206    /**
207     * @param BatchRowIterator $workflowIterator
208     * @throws Exception
209     * @throws TimestampException
210     * @throws \Flow\Exception\InvalidInputException
211     */
212    public function dump( BatchRowIterator $workflowIterator ) {
213        foreach ( $workflowIterator as $rows ) {
214            foreach ( $rows as $row ) {
215                $workflow = Workflow::fromStorageRow( (array)$row );
216
217                $headerIterator = Container::get( 'search.index.iterators.header' );
218                $topicIterator = Container::get( 'search.index.iterators.topic' );
219                $topicIterator->orderByUUID = true;
220                /** @var AbstractIterator $iterator */
221                foreach ( [ $headerIterator, $topicIterator ] as $iterator ) {
222                    $iterator->setPage( $row->workflow_page_id );
223                }
224
225                $this->formatWorkflow( $workflow, $headerIterator, $topicIterator );
226            }
227        }
228    }
229
230    protected function formatWorkflow( Workflow $workflow, HeaderIterator $headerIterator, TopicIterator $topicIterator ) {
231        if ( $workflow->isDeleted() ) {
232            return;
233        }
234
235        $output = Xml::openElement( 'board', [
236            'id' => $workflow->getId()->getAlphadecimal(),
237            'title' => $workflow->getOwnerTitle()->getPrefixedDBkey(),
238        ] ) . "\n";
239        $this->sink->write( $output );
240
241        foreach ( $headerIterator as $revision ) {
242            /** @var Header $revision */
243            '@phan-var Header $revision';
244            $this->formatHeader( $revision );
245        }
246        foreach ( $topicIterator as $revision ) {
247            /** @var PostRevision $revision */
248            '@phan-var PostRevision $revision';
249            $this->formatTopic( $revision );
250        }
251
252        $output = Xml::closeElement( 'board' ) . "\n";
253        $this->sink->write( $output );
254    }
255
256    protected function formatTopic( PostRevision $revision ) {
257        if ( !$this->isAllowed( $revision ) ) {
258            return;
259        }
260
261        $output = Xml::openElement( 'topic', [
262            'id' => $revision->getCollectionId()->getAlphadecimal(),
263        ] ) . "\n";
264        $this->sink->write( $output );
265
266        $this->formatPost( $revision );
267
268        // find summary for this topic & add it as revision
269        $summaryCollection = PostSummaryCollection::newFromId( $revision->getCollectionId() );
270        try {
271            /** @var PostSummary $summary */
272            $summary = $summaryCollection->getLastRevision();
273            '@phan-var PostSummary $summary';
274            $this->formatSummary( $summary );
275        } catch ( \Exception $e ) {
276            // no summary - that's ok!
277        }
278
279        $output = Xml::closeElement( 'topic' ) . "\n";
280        $this->sink->write( $output );
281    }
282
283    protected function formatHeader( Header $revision ) {
284        if ( !$this->isAllowed( $revision ) ) {
285            return;
286        }
287
288        $output = Xml::openElement( 'description', [
289            'id' => $revision->getCollectionId()->getAlphadecimal()
290        ] ) . "\n";
291        $this->sink->write( $output );
292
293        $this->formatRevisions( $revision );
294
295        $output = Xml::closeElement( 'description' ) . "\n";
296        $this->sink->write( $output );
297    }
298
299    protected function formatPost( PostRevision $revision ) {
300        if ( !$this->isAllowed( $revision ) ) {
301            return;
302        }
303
304        $output = Xml::openElement( 'post', [
305            'id' => $revision->getCollectionId()->getAlphadecimal()
306        ] ) . "\n";
307        $this->sink->write( $output );
308
309        $this->formatRevisions( $revision );
310
311        if ( $revision->getChildren() ) {
312            $output = Xml::openElement( 'children' ) . "\n";
313            $this->sink->write( $output );
314
315            foreach ( $revision->getChildren() as $child ) {
316                $this->formatPost( $child );
317            }
318
319            $output = Xml::closeElement( 'children' ) . "\n";
320            $this->sink->write( $output );
321        }
322
323        $output = Xml::closeElement( 'post' ) . "\n";
324        $this->sink->write( $output );
325    }
326
327    protected function formatSummary( PostSummary $revision ) {
328        if ( !$this->isAllowed( $revision ) ) {
329            return;
330        }
331
332        $output = Xml::openElement( 'summary', [
333            'id' => $revision->getCollectionId()->getAlphadecimal()
334        ] ) . "\n";
335        $this->sink->write( $output );
336
337        $this->formatRevisions( $revision );
338
339        $output = Xml::closeElement( 'summary' ) . "\n";
340        $this->sink->write( $output );
341    }
342
343    protected function formatRevisions( AbstractRevision $revision ) {
344        $output = Xml::openElement( 'revisions' ) . "\n";
345        $this->sink->write( $output );
346
347        $collection = $revision->getCollection();
348        if ( $this->history === WikiExporter::FULL ) {
349            /** @var AbstractRevision[] $revisions */
350            $revisions = array_reverse( $collection->getAllRevisions() );
351            $prevId = null;
352
353            foreach ( $revisions as $revision ) {
354                if ( $this->isAllowed( $revision ) ) {
355                    if ( $prevId !== null ) {
356                        // override parent id: this is used to get rid of gaps
357                        // that are caused by moderated items, where the
358                        // revision tree would be incorrect
359                        $this->prevRevisionProperty->setValue( $revision, $prevId );
360
361                        // Since $prevId is set, we know
362                        // there was a gap, and the original
363                        // hide-topic/delete-topic/suppress-topic
364                        // was removed. Since that is used for
365                        // listeners in FlowActions.php, we replace
366                        // restore-topic with edit-title and make a
367                        // null edit (we don't do null edits in the
368                        // normal application flow, but this
369                        // provides a way to replace restore).
370                        $oldChangeType = $revision->getChangeType();
371
372                        if ( $oldChangeType === 'restore-topic' ) {
373                            $this->changeTypeProperty->setValue( $revision, 'edit-title' );
374                        }
375
376                        if ( $oldChangeType === 'restore-post' ) {
377                            $this->changeTypeProperty->setValue( $revision, 'edit-post' );
378                        }
379
380                        $prevId = null;
381                    }
382                    $this->formatRevision( $revision );
383                } elseif ( $prevId === null ) {
384                    // if revision can't be dumped, store its parent id so we
385                    // can re-apply it to the next one that can be displayed, so
386                    // we don't have gaps
387                    $prevId = $revision->getPrevRevisionId();
388                }
389            }
390        } elseif ( $this->history === WikiExporter::CURRENT ) {
391            $first = $collection->getFirstRevision();
392
393            // storing only last revision won't work (it'll reference non-existing
394            // parents): we'll construct a bogus revision with most of the original
395            // metadata, but with the current content & id (= timestamp)
396            $first = $first->toStorageRow( $first );
397            $last = $revision->toStorageRow( $revision );
398            $first['rev_id'] = $last['rev_id'];
399            $first['rev_content'] = $last['rev_content'];
400            $first['rev_flags'] = $last['rev_flags'];
401            if ( isset( $first['tree_rev_id'] ) ) {
402                // PostRevision-only: tree_rev_id must match rev_id
403                $first['tree_rev_id'] = $first['rev_id'];
404            }
405
406            // clear buffered cache, to make sure it doesn't serve the existing (already
407            // loaded) revision when trying to turn our bogus mixed data into a revision
408            /** @var ManagerGroup $storage */
409            $storage = Container::get( 'storage' );
410            $storage->clear();
411
412            $mix = $revision->fromStorageRow( $first );
413
414            $this->formatRevision( $mix );
415        }
416
417        $output = Xml::closeElement( 'revisions' ) . "\n";
418        $this->sink->write( $output );
419    }
420
421    /**
422     * @param AbstractRevision $revision
423     * @suppress SecurityCheck-DoubleEscaped
424     */
425    protected function formatRevision( AbstractRevision $revision ) {
426        if ( !$this->isAllowed( $revision ) ) {
427            return;
428        }
429
430        $attribs = $revision->toStorageRow( $revision );
431
432        // make sure there are no leftover key columns (unknown to $attribs)
433        $keys = array_intersect_key( static::$map, $attribs );
434        // now make sure $values columns are in the same order as $keys are
435        // (array_merge) and there are no leftover columns (array_intersect_key)
436        $values = array_intersect_key( array_merge( $keys, $attribs ), $keys );
437        // combine them
438        $attribs = array_combine( $keys, $values );
439        // and get rid of columns with null values
440        $attribs = array_filter( $attribs, static function ( $value ) {
441            return $value !== null;
442        } );
443
444        // references to external store etc. are useless; we'll include the real
445        // content as node text
446        unset( $attribs['content'], $attribs['contenturl'] );
447        $format = $revision->getContentFormat();
448        $attribs['flags'] = 'utf-8,' . $format;
449
450        if ( $this->lookup ) {
451            $userIdFields = [ 'userid', 'treeoriguserid', 'moduserid', 'edituserid' ];
452            foreach ( $userIdFields as $userIdField ) {
453                if ( isset( $attribs[ $userIdField ] ) ) {
454                    $user = User::newFromId( (int)$attribs[ $userIdField ] );
455                    $globalUserId = $this->lookup->centralIdFromLocalUser(
456                        $user,
457                        CentralIdLookup::AUDIENCE_RAW
458                    );
459                    if ( $globalUserId ) {
460                        $attribs[ 'global' . $userIdField ] = $globalUserId;
461                    }
462                }
463            }
464        }
465
466        $output = Xml::element(
467            'revision',
468            $attribs,
469            $revision->getContent( $format )
470        ) . "\n";
471        // filter out bad characters that may have crept into old revisions
472        $output = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $output );
473        $this->sink->write( $output );
474    }
475
476    /**
477     * Test if anon users are allowed to view a particular revision.
478     *
479     * @param AbstractRevision $revision
480     * @return bool
481     */
482    protected function isAllowed( AbstractRevision $revision ) {
483        $user = User::newFromId( 0 );
484        $actions = Container::get( 'flow_actions' );
485        $permissions = new RevisionActionPermissions( $actions, $user );
486
487        return $permissions->isAllowed( $revision, 'view' );
488    }
489}