Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 206 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
Exporter | |
0.00% |
0 / 206 |
|
0.00% |
0 / 13 |
2256 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
6 | |||
schemaVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
openStream | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
getWorkflowIterator | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
56 | |||
dump | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
formatWorkflow | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
20 | |||
formatTopic | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
formatHeader | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
formatPost | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
formatSummary | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
formatRevisions | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
110 | |||
formatRevision | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
42 | |||
isAllowed | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Flow\Dump; |
4 | |
5 | use BatchRowIterator; |
6 | use Exception; |
7 | use Flow\Collection\PostSummaryCollection; |
8 | use Flow\Container; |
9 | use Flow\Data\ManagerGroup; |
10 | use Flow\Model\AbstractRevision; |
11 | use Flow\Model\Header; |
12 | use Flow\Model\PostRevision; |
13 | use Flow\Model\PostSummary; |
14 | use Flow\Model\UUID; |
15 | use Flow\Model\Workflow; |
16 | use Flow\RevisionActionPermissions; |
17 | use Flow\Search\Iterators\AbstractIterator; |
18 | use Flow\Search\Iterators\HeaderIterator; |
19 | use Flow\Search\Iterators\TopicIterator; |
20 | use MediaWiki\CommentStore\CommentStore; |
21 | use MediaWiki\HookContainer\HookContainer; |
22 | use MediaWiki\MediaWikiServices; |
23 | use MediaWiki\Revision\RevisionStore; |
24 | use MediaWiki\Title\Title; |
25 | use MediaWiki\Title\TitleParser; |
26 | use MediaWiki\User\CentralId\CentralIdLookup; |
27 | use MediaWiki\User\User; |
28 | use MediaWiki\WikiMap\WikiMap; |
29 | use ReflectionProperty; |
30 | use WikiExporter; |
31 | use Wikimedia\Rdbms\IDatabase; |
32 | use Wikimedia\Timestamp\TimestampException; |
33 | use Xml; |
34 | |
35 | class Exporter extends WikiExporter { |
36 | /** |
37 | * Map of [db column name => xml attribute name] |
38 | * |
39 | * @var array |
40 | */ |
41 | public static $map = [ |
42 | 'rev_id' => 'id', |
43 | 'rev_user_id' => 'userid', |
44 | 'rev_user_ip' => 'userip', |
45 | 'rev_user_wiki' => 'userwiki', |
46 | 'rev_parent_id' => 'parentid', |
47 | 'rev_change_type' => 'changetype', |
48 | 'rev_type' => 'type', |
49 | 'rev_type_id' => 'typeid', |
50 | 'rev_content' => 'content', |
51 | 'rev_content_url' => 'contenturl', |
52 | 'rev_flags' => 'flags', |
53 | 'rev_mod_state' => 'modstate', |
54 | 'rev_mod_user_id' => 'moduserid', |
55 | 'rev_mod_user_ip' => 'moduserip', |
56 | 'rev_mod_user_wiki' => 'moduserwiki', |
57 | 'rev_mod_timestamp' => 'modtimestamp', |
58 | 'rev_mod_reason' => 'modreason', |
59 | 'rev_last_edit_id' => 'lasteditid', |
60 | 'rev_edit_user_id' => 'edituserid', |
61 | 'rev_edit_user_ip' => 'edituserip', |
62 | 'rev_edit_user_wiki' => 'edituserwiki', |
63 | 'rev_content_length' => 'contentlength', |
64 | 'rev_previous_content_length' => 'previouscontentlength', |
65 | |
66 | 'tree_parent_id' => 'treeparentid', |
67 | 'tree_rev_descendant_id' => 'treedescendantid', |
68 | 'tree_rev_id' => 'treerevid', |
69 | 'tree_orig_user_id' => 'treeoriguserid', |
70 | 'tree_orig_user_ip' => 'treeoriguserip', |
71 | 'tree_orig_user_wiki' => 'treeoriguserwiki', |
72 | ]; |
73 | |
74 | /** |
75 | * @var ReflectionProperty Previous revision property |
76 | */ |
77 | protected $prevRevisionProperty; |
78 | |
79 | /** |
80 | * @var ReflectionProperty Change type property |
81 | */ |
82 | protected $changeTypeProperty; |
83 | |
84 | /** |
85 | * To convert between local and global user ids |
86 | * |
87 | * @var CentralIdLookup|null |
88 | */ |
89 | protected $lookup; |
90 | |
91 | /** |
92 | * @inheritDoc |
93 | */ |
94 | public function __construct( |
95 | $db, |
96 | CommentStore $commentStore, |
97 | HookContainer $hookContainer, |
98 | RevisionStore $revisionStore, |
99 | TitleParser $titleParser, |
100 | $history = WikiExporter::CURRENT, |
101 | $text = WikiExporter::TEXT, |
102 | $limitNamespaces = null |
103 | ) { |
104 | parent::__construct( |
105 | $db, |
106 | $commentStore, |
107 | $hookContainer, |
108 | $revisionStore, |
109 | $titleParser, |
110 | $history, |
111 | $text, |
112 | $limitNamespaces |
113 | ); |
114 | $this->prevRevisionProperty = new ReflectionProperty( AbstractRevision::class, 'prevRevision' ); |
115 | $this->prevRevisionProperty->setAccessible( true ); |
116 | |
117 | $this->changeTypeProperty = new ReflectionProperty( AbstractRevision::class, 'changeType' ); |
118 | $this->changeTypeProperty->setAccessible( true ); |
119 | |
120 | try { |
121 | $this->lookup = MediaWikiServices::getInstance() |
122 | ->getCentralIdLookupFactory() |
123 | ->getLookup( 'CentralAuth' ); |
124 | } catch ( \Throwable $unused ) { |
125 | $this->lookup = null; |
126 | } |
127 | } |
128 | |
129 | public static function schemaVersion() { |
130 | /* |
131 | * Be sure to also update the schema/namespace on mediawiki.org when |
132 | * making any changes: |
133 | * @see https://gerrit.wikimedia.org/r/#/c/281640/ |
134 | */ |
135 | return '1.0'; |
136 | } |
137 | |
138 | public function openStream() { |
139 | global $wgLanguageCode; |
140 | $version = static::schemaVersion(); |
141 | |
142 | $output = Xml::openElement( |
143 | 'mediawiki', |
144 | [ |
145 | 'xmlns' => "http://www.mediawiki.org/xml/flow-$version/", |
146 | 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', |
147 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/flow-$version/ https://www.mediawiki.org/xml/flow-$version.xsd", |
148 | 'version' => $version, |
149 | 'xml:lang' => $wgLanguageCode |
150 | ] |
151 | ) . "\n"; |
152 | $this->sink->write( $output ); |
153 | } |
154 | |
155 | /** |
156 | * @param string[]|null $pages Array of DB-prefixed page titles |
157 | * @param int|null $startId page_id to start from (inclusive) |
158 | * @param int|null $endId page_id to end (exclusive) |
159 | * @param string|null $workflowStartId workflow_id, b36-encoded, to start from (inclusive) |
160 | * @param string|null $workflowEndId wokflow_id, b36-encoded, to end (exclusive) |
161 | * @return BatchRowIterator |
162 | */ |
163 | public function getWorkflowIterator( array $pages = null, $startId = null, $endId = null, |
164 | $workflowStartId = null, $workflowEndId = null ) { |
165 | /** @var IDatabase $dbr */ |
166 | $dbr = Container::get( 'db.factory' )->getDB( DB_REPLICA ); |
167 | |
168 | $iterator = new BatchRowIterator( $dbr, 'flow_workflow', 'workflow_id', 300 ); |
169 | $iterator->setFetchColumns( [ '*' ] ); |
170 | $iterator->addConditions( [ 'workflow_wiki' => WikiMap::getCurrentWikiId() ] ); |
171 | $iterator->addConditions( [ 'workflow_type' => 'discussion' ] ); |
172 | $iterator->setCaller( __METHOD__ ); |
173 | |
174 | if ( $pages ) { |
175 | $pageConds = []; |
176 | foreach ( $pages as $page ) { |
177 | $title = Title::newFromDBkey( $page ); |
178 | $pageConds[] = $dbr->makeList( |
179 | [ |
180 | 'workflow_namespace' => $title->getNamespace(), |
181 | 'workflow_title_text' => $title->getDBkey() |
182 | ], |
183 | LIST_AND |
184 | ); |
185 | } |
186 | |
187 | $iterator->addConditions( [ $dbr->makeList( $pageConds, LIST_OR ) ] ); |
188 | } |
189 | if ( $startId ) { |
190 | $iterator->addConditions( [ 'workflow_page_id >= ' . $dbr->addQuotes( $startId ) ] ); |
191 | } |
192 | if ( $endId ) { |
193 | $iterator->addConditions( [ 'workflow_page_id < ' . $dbr->addQuotes( $endId ) ] ); |
194 | } |
195 | |
196 | if ( $workflowStartId ) { |
197 | $tempUUID = UUID::create( $workflowStartId ); |
198 | $decodedId = $tempUUID->getBinary(); |
199 | $iterator->addConditions( [ 'workflow_id >= ' . $dbr->addQuotes( $decodedId ) ] ); |
200 | } |
201 | if ( $workflowEndId ) { |
202 | $tempUUID = UUID::create( $workflowEndId ); |
203 | $decodedId = $tempUUID->getBinary(); |
204 | $iterator->addConditions( [ 'workflow_id < ' . $dbr->addQuotes( $decodedId ) ] ); |
205 | } |
206 | return $iterator; |
207 | } |
208 | |
209 | /** |
210 | * @param BatchRowIterator $workflowIterator |
211 | * @throws Exception |
212 | * @throws TimestampException |
213 | * @throws \Flow\Exception\InvalidInputException |
214 | */ |
215 | public function dump( BatchRowIterator $workflowIterator ) { |
216 | foreach ( $workflowIterator as $rows ) { |
217 | foreach ( $rows as $row ) { |
218 | $workflow = Workflow::fromStorageRow( (array)$row ); |
219 | |
220 | $headerIterator = Container::get( 'search.index.iterators.header' ); |
221 | $topicIterator = Container::get( 'search.index.iterators.topic' ); |
222 | $topicIterator->orderByUUID = true; |
223 | /** @var AbstractIterator $iterator */ |
224 | foreach ( [ $headerIterator, $topicIterator ] as $iterator ) { |
225 | $iterator->setPage( $row->workflow_page_id ); |
226 | } |
227 | |
228 | $this->formatWorkflow( $workflow, $headerIterator, $topicIterator ); |
229 | } |
230 | } |
231 | } |
232 | |
233 | protected function formatWorkflow( Workflow $workflow, HeaderIterator $headerIterator, TopicIterator $topicIterator ) { |
234 | if ( $workflow->isDeleted() ) { |
235 | return; |
236 | } |
237 | |
238 | $output = Xml::openElement( 'board', [ |
239 | 'id' => $workflow->getId()->getAlphadecimal(), |
240 | 'title' => $workflow->getOwnerTitle()->getPrefixedDBkey(), |
241 | ] ) . "\n"; |
242 | $this->sink->write( $output ); |
243 | |
244 | foreach ( $headerIterator as $revision ) { |
245 | /** @var Header $revision */ |
246 | '@phan-var Header $revision'; |
247 | $this->formatHeader( $revision ); |
248 | } |
249 | foreach ( $topicIterator as $revision ) { |
250 | /** @var PostRevision $revision */ |
251 | '@phan-var PostRevision $revision'; |
252 | $this->formatTopic( $revision ); |
253 | } |
254 | |
255 | $output = Xml::closeElement( 'board' ) . "\n"; |
256 | $this->sink->write( $output ); |
257 | } |
258 | |
259 | protected function formatTopic( PostRevision $revision ) { |
260 | if ( !$this->isAllowed( $revision ) ) { |
261 | return; |
262 | } |
263 | |
264 | $output = Xml::openElement( 'topic', [ |
265 | 'id' => $revision->getCollectionId()->getAlphadecimal(), |
266 | ] ) . "\n"; |
267 | $this->sink->write( $output ); |
268 | |
269 | $this->formatPost( $revision ); |
270 | |
271 | // find summary for this topic & add it as revision |
272 | $summaryCollection = PostSummaryCollection::newFromId( $revision->getCollectionId() ); |
273 | try { |
274 | /** @var PostSummary $summary */ |
275 | $summary = $summaryCollection->getLastRevision(); |
276 | '@phan-var PostSummary $summary'; |
277 | $this->formatSummary( $summary ); |
278 | } catch ( \Exception $e ) { |
279 | // no summary - that's ok! |
280 | } |
281 | |
282 | $output = Xml::closeElement( 'topic' ) . "\n"; |
283 | $this->sink->write( $output ); |
284 | } |
285 | |
286 | protected function formatHeader( Header $revision ) { |
287 | if ( !$this->isAllowed( $revision ) ) { |
288 | return; |
289 | } |
290 | |
291 | $output = Xml::openElement( 'description', [ |
292 | 'id' => $revision->getCollectionId()->getAlphadecimal() |
293 | ] ) . "\n"; |
294 | $this->sink->write( $output ); |
295 | |
296 | $this->formatRevisions( $revision ); |
297 | |
298 | $output = Xml::closeElement( 'description' ) . "\n"; |
299 | $this->sink->write( $output ); |
300 | } |
301 | |
302 | protected function formatPost( PostRevision $revision ) { |
303 | if ( !$this->isAllowed( $revision ) ) { |
304 | return; |
305 | } |
306 | |
307 | $output = Xml::openElement( 'post', [ |
308 | 'id' => $revision->getCollectionId()->getAlphadecimal() |
309 | ] ) . "\n"; |
310 | $this->sink->write( $output ); |
311 | |
312 | $this->formatRevisions( $revision ); |
313 | |
314 | if ( $revision->getChildren() ) { |
315 | $output = Xml::openElement( 'children' ) . "\n"; |
316 | $this->sink->write( $output ); |
317 | |
318 | foreach ( $revision->getChildren() as $child ) { |
319 | $this->formatPost( $child ); |
320 | } |
321 | |
322 | $output = Xml::closeElement( 'children' ) . "\n"; |
323 | $this->sink->write( $output ); |
324 | } |
325 | |
326 | $output = Xml::closeElement( 'post' ) . "\n"; |
327 | $this->sink->write( $output ); |
328 | } |
329 | |
330 | protected function formatSummary( PostSummary $revision ) { |
331 | if ( !$this->isAllowed( $revision ) ) { |
332 | return; |
333 | } |
334 | |
335 | $output = Xml::openElement( 'summary', [ |
336 | 'id' => $revision->getCollectionId()->getAlphadecimal() |
337 | ] ) . "\n"; |
338 | $this->sink->write( $output ); |
339 | |
340 | $this->formatRevisions( $revision ); |
341 | |
342 | $output = Xml::closeElement( 'summary' ) . "\n"; |
343 | $this->sink->write( $output ); |
344 | } |
345 | |
346 | protected function formatRevisions( AbstractRevision $revision ) { |
347 | $output = Xml::openElement( 'revisions' ) . "\n"; |
348 | $this->sink->write( $output ); |
349 | |
350 | $collection = $revision->getCollection(); |
351 | if ( $this->history === WikiExporter::FULL ) { |
352 | /** @var AbstractRevision[] $revisions */ |
353 | $revisions = array_reverse( $collection->getAllRevisions() ); |
354 | $prevId = null; |
355 | |
356 | foreach ( $revisions as $revision ) { |
357 | if ( $this->isAllowed( $revision ) ) { |
358 | if ( $prevId !== null ) { |
359 | // override parent id: this is used to get rid of gaps |
360 | // that are caused by moderated items, where the |
361 | // revision tree would be incorrect |
362 | $this->prevRevisionProperty->setValue( $revision, $prevId ); |
363 | |
364 | // Since $prevId is set, we know |
365 | // there was a gap, and the original |
366 | // hide-topic/delete-topic/suppress-topic |
367 | // was removed. Since that is used for |
368 | // listeners in FlowActions.php, we replace |
369 | // restore-topic with edit-title and make a |
370 | // null edit (we don't do null edits in the |
371 | // normal application flow, but this |
372 | // provides a way to replace restore). |
373 | $oldChangeType = $revision->getChangeType(); |
374 | |
375 | if ( $oldChangeType === 'restore-topic' ) { |
376 | $this->changeTypeProperty->setValue( $revision, 'edit-title' ); |
377 | } |
378 | |
379 | if ( $oldChangeType === 'restore-post' ) { |
380 | $this->changeTypeProperty->setValue( $revision, 'edit-post' ); |
381 | } |
382 | |
383 | $prevId = null; |
384 | } |
385 | $this->formatRevision( $revision ); |
386 | } elseif ( $prevId === null ) { |
387 | // if revision can't be dumped, store its parent id so we |
388 | // can re-apply it to the next one that can be displayed, so |
389 | // we don't have gaps |
390 | $prevId = $revision->getPrevRevisionId(); |
391 | } |
392 | } |
393 | } elseif ( $this->history === WikiExporter::CURRENT ) { |
394 | $first = $collection->getFirstRevision(); |
395 | |
396 | // storing only last revision won't work (it'll reference non-existing |
397 | // parents): we'll construct a bogus revision with most of the original |
398 | // metadata, but with the current content & id (= timestamp) |
399 | $first = $first->toStorageRow( $first ); |
400 | $last = $revision->toStorageRow( $revision ); |
401 | $first['rev_id'] = $last['rev_id']; |
402 | $first['rev_content'] = $last['rev_content']; |
403 | $first['rev_flags'] = $last['rev_flags']; |
404 | if ( isset( $first['tree_rev_id'] ) ) { |
405 | // PostRevision-only: tree_rev_id must match rev_id |
406 | $first['tree_rev_id'] = $first['rev_id']; |
407 | } |
408 | |
409 | // clear buffered cache, to make sure it doesn't serve the existing (already |
410 | // loaded) revision when trying to turn our bogus mixed data into a revision |
411 | /** @var ManagerGroup $storage */ |
412 | $storage = Container::get( 'storage' ); |
413 | $storage->clear(); |
414 | |
415 | $mix = $revision->fromStorageRow( $first ); |
416 | |
417 | $this->formatRevision( $mix ); |
418 | } |
419 | |
420 | $output = Xml::closeElement( 'revisions' ) . "\n"; |
421 | $this->sink->write( $output ); |
422 | } |
423 | |
424 | /** |
425 | * @param AbstractRevision $revision |
426 | * @suppress SecurityCheck-DoubleEscaped |
427 | */ |
428 | protected function formatRevision( AbstractRevision $revision ) { |
429 | if ( !$this->isAllowed( $revision ) ) { |
430 | return; |
431 | } |
432 | |
433 | $attribs = $revision->toStorageRow( $revision ); |
434 | |
435 | // make sure there are no leftover key columns (unknown to $attribs) |
436 | $keys = array_intersect_key( static::$map, $attribs ); |
437 | // now make sure $values columns are in the same order as $keys are |
438 | // (array_merge) and there are no leftover columns (array_intersect_key) |
439 | $values = array_intersect_key( array_merge( $keys, $attribs ), $keys ); |
440 | // combine them |
441 | $attribs = array_combine( $keys, $values ); |
442 | // and get rid of columns with null values |
443 | $attribs = array_filter( $attribs, static function ( $value ) { |
444 | return $value !== null; |
445 | } ); |
446 | |
447 | // references to external store etc. are useless; we'll include the real |
448 | // content as node text |
449 | unset( $attribs['content'], $attribs['contenturl'] ); |
450 | $format = $revision->getContentFormat(); |
451 | $attribs['flags'] = 'utf-8,' . $format; |
452 | |
453 | if ( $this->lookup ) { |
454 | $userIdFields = [ 'userid', 'treeoriguserid', 'moduserid', 'edituserid' ]; |
455 | foreach ( $userIdFields as $userIdField ) { |
456 | if ( isset( $attribs[ $userIdField ] ) ) { |
457 | $user = User::newFromId( (int)$attribs[ $userIdField ] ); |
458 | $globalUserId = $this->lookup->centralIdFromLocalUser( |
459 | $user, |
460 | CentralIdLookup::AUDIENCE_RAW |
461 | ); |
462 | if ( $globalUserId ) { |
463 | $attribs[ 'global' . $userIdField ] = $globalUserId; |
464 | } |
465 | } |
466 | } |
467 | } |
468 | |
469 | $output = Xml::element( |
470 | 'revision', |
471 | $attribs, |
472 | $revision->getContent( $format ) |
473 | ) . "\n"; |
474 | // filter out bad characters that may have crept into old revisions |
475 | $output = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $output ); |
476 | $this->sink->write( $output ); |
477 | } |
478 | |
479 | /** |
480 | * Test if anon users are allowed to view a particular revision. |
481 | * |
482 | * @param AbstractRevision $revision |
483 | * @return bool |
484 | */ |
485 | protected function isAllowed( AbstractRevision $revision ) { |
486 | $user = User::newFromId( 0 ); |
487 | $actions = Container::get( 'flow_actions' ); |
488 | $permissions = new RevisionActionPermissions( $actions, $user ); |
489 | |
490 | return $permissions->isAllowed( $revision, 'view' ); |
491 | } |
492 | } |