Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
2.62% |
8 / 305 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
XmlDumpWriter | |
2.62% |
8 / 305 |
|
0.00% |
0 / 24 |
5841.67 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
2 | |||
openStream | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
siteInfo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
sitename | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
dbname | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
generator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
homelink | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
caseSetting | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
namespaces | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
closeStream | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
openPage | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
20 | |||
closePage | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getRevisionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBlobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
invokeLenient | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
writeRevision | |
0.00% |
0 / 56 |
|
0.00% |
0 / 1 |
210 | |||
writeSlot | |
0.00% |
0 / 64 |
|
0.00% |
0 / 1 |
272 | |||
writeText | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
writeLogItem | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
writeTimestamp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
writeContributor | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
writeUploads | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
writeUpload | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
canonicalTitle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * XmlDumpWriter |
4 | * |
5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | use MediaWiki\CommentStore\CommentStore; |
27 | use MediaWiki\HookContainer\HookContainer; |
28 | use MediaWiki\HookContainer\HookRunner; |
29 | use MediaWiki\MainConfigNames; |
30 | use MediaWiki\MediaWikiServices; |
31 | use MediaWiki\Revision\RevisionAccessException; |
32 | use MediaWiki\Revision\RevisionRecord; |
33 | use MediaWiki\Revision\RevisionStore; |
34 | use MediaWiki\Revision\SlotRecord; |
35 | use MediaWiki\Revision\SuppressedDataException; |
36 | use MediaWiki\Storage\SqlBlobStore; |
37 | use MediaWiki\Title\Title; |
38 | use Wikimedia\Assert\Assert; |
39 | use Wikimedia\IPUtils; |
40 | |
41 | /** |
42 | * @ingroup Dump |
43 | */ |
44 | class XmlDumpWriter { |
45 | |
46 | /** Output serialized revision content. */ |
47 | public const WRITE_CONTENT = 0; |
48 | |
49 | /** Only output subs for revision content. */ |
50 | public const WRITE_STUB = 1; |
51 | |
52 | /** |
53 | * Only output subs for revision content, indicating that the content has been |
54 | * deleted/suppressed. |
55 | */ |
56 | private const WRITE_STUB_DELETED = 2; |
57 | |
58 | /** |
59 | * @var string[] the schema versions supported for output |
60 | * @final |
61 | */ |
62 | public static $supportedSchemas = [ |
63 | XML_DUMP_SCHEMA_VERSION_10, |
64 | XML_DUMP_SCHEMA_VERSION_11 |
65 | ]; |
66 | |
67 | /** |
68 | * @var string which schema version the generated XML should comply to. |
69 | * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX |
70 | * constants. |
71 | */ |
72 | private $schemaVersion; |
73 | |
74 | /** |
75 | * Title of the currently processed page |
76 | * |
77 | * @var Title|null |
78 | */ |
79 | private $currentTitle = null; |
80 | |
81 | /** |
82 | * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. |
83 | */ |
84 | private $contentMode; |
85 | |
86 | /** @var HookRunner */ |
87 | private $hookRunner; |
88 | |
89 | /** @var CommentStore */ |
90 | private $commentStore; |
91 | |
92 | /** |
93 | * @param int $contentMode WRITE_CONTENT or WRITE_STUB. |
94 | * @param string $schemaVersion which schema version the generated XML should comply to. |
95 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
96 | * constants. |
97 | * @param HookContainer|null $hookContainer |
98 | * @param CommentStore|null $commentStore |
99 | */ |
100 | public function __construct( |
101 | $contentMode = self::WRITE_CONTENT, |
102 | $schemaVersion = XML_DUMP_SCHEMA_VERSION_11, |
103 | ?HookContainer $hookContainer = null, |
104 | ?CommentStore $commentStore = null |
105 | ) { |
106 | Assert::parameter( |
107 | in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ], true ), |
108 | '$contentMode', |
109 | 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' |
110 | ); |
111 | |
112 | Assert::parameter( |
113 | in_array( $schemaVersion, self::$supportedSchemas, true ), |
114 | '$schemaVersion', |
115 | 'must be one of the following schema versions: ' |
116 | . implode( ',', self::$supportedSchemas ) |
117 | ); |
118 | |
119 | $this->contentMode = $contentMode; |
120 | $this->schemaVersion = $schemaVersion; |
121 | $this->hookRunner = new HookRunner( |
122 | $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer() |
123 | ); |
124 | $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore(); |
125 | } |
126 | |
127 | /** |
128 | * Opens the XML output stream's root "<mediawiki>" element. |
129 | * This does not include an xml directive, so is safe to include |
130 | * as a subelement in a larger XML stream. Namespace and XML Schema |
131 | * references are included. |
132 | * |
133 | * Output will be encoded in UTF-8. |
134 | * |
135 | * @return string |
136 | */ |
137 | public function openStream() { |
138 | $ver = $this->schemaVersion; |
139 | return Xml::element( 'mediawiki', [ |
140 | 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", |
141 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
142 | /* |
143 | * When a new version of the schema is created, it needs staging on mediawiki.org. |
144 | * This requires a change in the operations/mediawiki-config git repo. |
145 | * |
146 | * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which |
147 | * you copy in the new xsd file. |
148 | * |
149 | * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. |
150 | * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki |
151 | */ |
152 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . |
153 | "http://www.mediawiki.org/xml/export-$ver.xsd", |
154 | 'version' => $ver, |
155 | 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], |
156 | null ) . |
157 | "\n" . |
158 | $this->siteInfo(); |
159 | } |
160 | |
161 | /** |
162 | * @return string |
163 | */ |
164 | private function siteInfo() { |
165 | $info = [ |
166 | $this->sitename(), |
167 | $this->dbname(), |
168 | $this->homelink(), |
169 | $this->generator(), |
170 | $this->caseSetting(), |
171 | $this->namespaces() ]; |
172 | return " <siteinfo>\n " . |
173 | implode( "\n ", $info ) . |
174 | "\n </siteinfo>\n"; |
175 | } |
176 | |
177 | /** |
178 | * @return string |
179 | */ |
180 | private function sitename() { |
181 | $sitename = MediaWikiServices::getInstance()->getMainConfig()->get( |
182 | MainConfigNames::Sitename ); |
183 | return Xml::element( 'sitename', [], $sitename ); |
184 | } |
185 | |
186 | /** |
187 | * @return string |
188 | */ |
189 | private function dbname() { |
190 | $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname ); |
191 | return Xml::element( 'dbname', [], $dbname ); |
192 | } |
193 | |
194 | /** |
195 | * @return string |
196 | */ |
197 | private function generator() { |
198 | return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); |
199 | } |
200 | |
201 | /** |
202 | * @return string |
203 | */ |
204 | private function homelink() { |
205 | return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); |
206 | } |
207 | |
208 | /** |
209 | * @return string |
210 | */ |
211 | private function caseSetting() { |
212 | $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get( |
213 | MainConfigNames::CapitalLinks ); |
214 | // "case-insensitive" option is reserved for future |
215 | $sensitivity = $capitalLinks ? 'first-letter' : 'case-sensitive'; |
216 | return Xml::element( 'case', [], $sensitivity ); |
217 | } |
218 | |
219 | /** |
220 | * @return string |
221 | */ |
222 | private function namespaces() { |
223 | $spaces = "<namespaces>\n"; |
224 | $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); |
225 | foreach ( |
226 | MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() |
227 | as $ns => $title |
228 | ) { |
229 | $spaces .= ' ' . |
230 | Xml::element( 'namespace', |
231 | [ |
232 | 'key' => $ns, |
233 | 'case' => $nsInfo->isCapitalized( $ns ) |
234 | ? 'first-letter' : 'case-sensitive', |
235 | ], $title ) . "\n"; |
236 | } |
237 | $spaces .= " </namespaces>"; |
238 | return $spaces; |
239 | } |
240 | |
241 | /** |
242 | * Closes the output stream with the closing root element. |
243 | * Call when finished dumping things. |
244 | * |
245 | * @return string |
246 | */ |
247 | public function closeStream() { |
248 | return "</mediawiki>\n"; |
249 | } |
250 | |
251 | /** |
252 | * Opens a "<page>" section on the output stream, with data |
253 | * from the given database row. |
254 | * |
255 | * @param stdClass $row |
256 | * @return string |
257 | */ |
258 | public function openPage( $row ) { |
259 | $out = " <page>\n"; |
260 | $this->currentTitle = Title::newFromRow( $row ); |
261 | $canonicalTitle = self::canonicalTitle( $this->currentTitle ); |
262 | $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; |
263 | $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; |
264 | $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; |
265 | if ( $row->page_is_redirect ) { |
266 | $services = MediaWikiServices::getInstance(); |
267 | $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle ); |
268 | $redirectStore = $services->getRedirectStore(); |
269 | $redirect = $this->invokeLenient( |
270 | static function () use ( $page, $redirectStore ) { |
271 | return $redirectStore->getRedirectTarget( $page ); |
272 | }, |
273 | 'Failed to get redirect target of page ' . $page->getId() |
274 | ); |
275 | if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { |
276 | $out .= ' '; |
277 | $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); |
278 | $out .= "\n"; |
279 | } |
280 | } |
281 | $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); |
282 | |
283 | return $out; |
284 | } |
285 | |
286 | /** |
287 | * Closes a "<page>" section on the output stream. |
288 | * |
289 | * @internal |
290 | * @return string |
291 | */ |
292 | public function closePage() { |
293 | if ( $this->currentTitle !== null ) { |
294 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
295 | // In rare cases, link cache has the same key for some pages which |
296 | // might be read as part of the same batch. T220424 and T220316 |
297 | $linkCache->clearLink( $this->currentTitle ); |
298 | } |
299 | return " </page>\n"; |
300 | } |
301 | |
302 | /** |
303 | * @return RevisionStore |
304 | */ |
305 | private function getRevisionStore() { |
306 | return MediaWikiServices::getInstance()->getRevisionStore(); |
307 | } |
308 | |
309 | /** |
310 | * @return SqlBlobStore |
311 | */ |
312 | private function getBlobStore() { |
313 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
314 | return MediaWikiServices::getInstance()->getBlobStore(); |
315 | } |
316 | |
317 | /** |
318 | * Invokes the given callback, catching and logging any exceptions. |
319 | * |
320 | * @param callable $callback |
321 | * @param string $warning The warning to output in case of a storage related exception. |
322 | * |
323 | * @return mixed Returns the method's return value, or null in case of an exception. |
324 | * @throws Exception |
325 | */ |
326 | private function invokeLenient( $callback, $warning ) { |
327 | try { |
328 | return $callback(); |
329 | } catch ( SuppressedDataException $ex ) { |
330 | return null; |
331 | } catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) { |
332 | MWDebug::warning( $warning . ': ' . $ex->getMessage() ); |
333 | return null; |
334 | } |
335 | } |
336 | |
337 | /** |
338 | * Dumps a "<revision>" section on the output stream, with |
339 | * data filled in from the given database row. |
340 | * |
341 | * @param stdClass $row |
342 | * @param null|stdClass[] $slotRows |
343 | * |
344 | * @return string |
345 | * @throws RevisionAccessException |
346 | */ |
347 | public function writeRevision( $row, $slotRows = null ) { |
348 | $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( |
349 | $row, |
350 | $slotRows, |
351 | 0, |
352 | $this->currentTitle |
353 | ); |
354 | |
355 | $out = " <revision>\n"; |
356 | $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; |
357 | |
358 | if ( $rev->getParentId() ) { |
359 | $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; |
360 | } |
361 | |
362 | $out .= $this->writeTimestamp( $rev->getTimestamp() ); |
363 | |
364 | if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { |
365 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
366 | } else { |
367 | // empty values get written out as uid 0, see T224221 |
368 | $user = $rev->getUser(); |
369 | $out .= $this->writeContributor( |
370 | $user ? $user->getId() : 0, |
371 | $user ? $user->getName() : '' |
372 | ); |
373 | } |
374 | |
375 | if ( $rev->isMinor() ) { |
376 | $out .= " <minor/>\n"; |
377 | } |
378 | if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { |
379 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
380 | } else { |
381 | if ( $rev->getComment()->text != '' ) { |
382 | $out .= " " |
383 | . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) |
384 | . "\n"; |
385 | } |
386 | } |
387 | |
388 | $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED |
389 | : $this->contentMode; |
390 | |
391 | $slots = $rev->getSlots()->getSlots(); |
392 | |
393 | // use predictable order, put main slot first |
394 | ksort( $slots ); |
395 | $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); |
396 | |
397 | foreach ( $slots as $role => $slot ) { |
398 | if ( $role === SlotRecord::MAIN ) { |
399 | continue; |
400 | } |
401 | $out .= $this->writeSlot( $slot, $contentMode ); |
402 | } |
403 | |
404 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
405 | $out .= " <sha1/>\n"; |
406 | } else { |
407 | $sha1 = $this->invokeLenient( |
408 | static function () use ( $rev ) { |
409 | return $rev->getSha1(); |
410 | }, |
411 | 'failed to determine sha1 for revision ' . $rev->getId() |
412 | ); |
413 | $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; |
414 | } |
415 | |
416 | $text = ''; |
417 | if ( $contentMode === self::WRITE_CONTENT ) { |
418 | /** @var Content $content */ |
419 | $content = $this->invokeLenient( |
420 | static function () use ( $rev ) { |
421 | return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); |
422 | }, |
423 | 'Failed to load main slot content of revision ' . $rev->getId() |
424 | ); |
425 | |
426 | $text = $content ? $content->serialize() : ''; |
427 | } |
428 | $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev ); |
429 | |
430 | $out .= " </revision>\n"; |
431 | |
432 | return $out; |
433 | } |
434 | |
435 | /** |
436 | * @param SlotRecord $slot |
437 | * @param int $contentMode see the WRITE_XXX constants |
438 | * |
439 | * @return string |
440 | */ |
441 | private function writeSlot( SlotRecord $slot, $contentMode ) { |
442 | $isMain = $slot->getRole() === SlotRecord::MAIN; |
443 | $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; |
444 | |
445 | if ( !$isV11 && !$isMain ) { |
446 | // ignore extra slots |
447 | return ''; |
448 | } |
449 | |
450 | $out = ''; |
451 | $indent = ' '; |
452 | |
453 | if ( !$isMain ) { |
454 | // non-main slots are wrapped into an additional element. |
455 | $out .= ' ' . Xml::openElement( 'content' ) . "\n"; |
456 | $indent .= ' '; |
457 | $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; |
458 | } |
459 | |
460 | if ( $isV11 ) { |
461 | $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; |
462 | } |
463 | |
464 | $contentModel = $slot->getModel(); |
465 | $contentHandler = MediaWikiServices::getInstance() |
466 | ->getContentHandlerFactory() |
467 | ->getContentHandler( $contentModel ); |
468 | $contentFormat = $contentHandler->getDefaultFormat(); |
469 | |
470 | // XXX: The content format is only relevant when actually outputting serialized content. |
471 | // It should probably be an attribute on the text tag. |
472 | $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; |
473 | $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; |
474 | |
475 | $textAttributes = [ |
476 | 'bytes' => $this->invokeLenient( |
477 | static function () use ( $slot ) { |
478 | return $slot->getSize(); |
479 | }, |
480 | 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' |
481 | . $slot->getRevision() |
482 | ) ?: '0' |
483 | ]; |
484 | |
485 | if ( $isV11 ) { |
486 | $textAttributes['sha1'] = $this->invokeLenient( |
487 | static function () use ( $slot ) { |
488 | return $slot->getSha1(); |
489 | }, |
490 | 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' |
491 | . $slot->getRevision() |
492 | ) ?: ''; |
493 | } |
494 | |
495 | if ( $contentMode === self::WRITE_CONTENT ) { |
496 | $content = $this->invokeLenient( |
497 | static function () use ( $slot ) { |
498 | return $slot->getContent(); |
499 | }, |
500 | 'failed to load content for slot ' . $slot->getRole() . ' of revision ' |
501 | . $slot->getRevision() |
502 | ); |
503 | |
504 | if ( $content === null ) { |
505 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
506 | } else { |
507 | $out .= $this->writeText( $content, $textAttributes, $indent ); |
508 | } |
509 | } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { |
510 | // write <text> placeholder tag |
511 | $textAttributes['deleted'] = 'deleted'; |
512 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
513 | } else { |
514 | // write <text> stub tag |
515 | if ( $isV11 ) { |
516 | $textAttributes['location'] = $slot->getAddress(); |
517 | } |
518 | |
519 | if ( $isMain ) { |
520 | // Output the numerical text ID if possible, for backwards compatibility. |
521 | // Note that this is currently the ONLY reason we have a BlobStore here at all. |
522 | // When removing this line, check whether the BlobStore has become unused. |
523 | try { |
524 | // NOTE: this will only work for addresses of the form "tt:12345". |
525 | // If we want to support other kinds of addresses in the future, |
526 | // we will have to silently ignore failures here. |
527 | // For now, this fails for "tt:0", which is present in the WMF production |
528 | // database as of July 2019, due to data corruption. |
529 | $textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() ); |
530 | } catch ( InvalidArgumentException $ex ) { |
531 | MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() |
532 | . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); |
533 | $textId = 0; |
534 | } |
535 | |
536 | if ( is_int( $textId ) ) { |
537 | $textAttributes['id'] = $textId; |
538 | } |
539 | } |
540 | |
541 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
542 | } |
543 | |
544 | if ( !$isMain ) { |
545 | $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; |
546 | } |
547 | |
548 | return $out; |
549 | } |
550 | |
551 | /** |
552 | * @param Content $content |
553 | * @param string[] $textAttributes |
554 | * @param string $indent |
555 | * |
556 | * @return string |
557 | */ |
558 | private function writeText( Content $content, $textAttributes, $indent ) { |
559 | $contentHandler = $content->getContentHandler(); |
560 | $contentFormat = $contentHandler->getDefaultFormat(); |
561 | |
562 | if ( $content instanceof TextContent ) { |
563 | // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) |
564 | // that use incompatible combinations of serialization format and content model. |
565 | $data = $content->getText(); |
566 | } else { |
567 | $data = $content->serialize( $contentFormat ); |
568 | } |
569 | |
570 | $data = $contentHandler->exportTransform( $data, $contentFormat ); |
571 | // make sure to use the actual size |
572 | $textAttributes['bytes'] = strlen( $data ); |
573 | $textAttributes['xml:space'] = 'preserve'; |
574 | return $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; |
575 | } |
576 | |
577 | /** |
578 | * Dumps a "<logitem>" section on the output stream, with |
579 | * data filled in from the given database row. |
580 | * |
581 | * @param stdClass $row |
582 | * @return string |
583 | */ |
584 | public function writeLogItem( $row ) { |
585 | $out = " <logitem>\n"; |
586 | $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; |
587 | |
588 | $out .= $this->writeTimestamp( $row->log_timestamp, " " ); |
589 | |
590 | if ( $row->log_deleted & LogPage::DELETED_USER ) { |
591 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
592 | } else { |
593 | $out .= $this->writeContributor( $row->actor_user, $row->actor_name, " " ); |
594 | } |
595 | |
596 | if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { |
597 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
598 | } else { |
599 | $comment = $this->commentStore->getComment( 'log_comment', $row )->text; |
600 | if ( $comment != '' ) { |
601 | $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; |
602 | } |
603 | } |
604 | |
605 | $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; |
606 | $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; |
607 | |
608 | if ( $row->log_deleted & LogPage::DELETED_ACTION ) { |
609 | $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; |
610 | } else { |
611 | $title = Title::makeTitle( $row->log_namespace, $row->log_title ); |
612 | $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; |
613 | $out .= " " . Xml::elementClean( 'params', |
614 | [ 'xml:space' => 'preserve' ], |
615 | strval( $row->log_params ) ) . "\n"; |
616 | } |
617 | |
618 | $out .= " </logitem>\n"; |
619 | |
620 | return $out; |
621 | } |
622 | |
623 | /** |
624 | * @param string $timestamp |
625 | * @param string $indent Default to six spaces |
626 | * @return string |
627 | */ |
628 | public function writeTimestamp( $timestamp, $indent = " " ) { |
629 | $ts = wfTimestamp( TS_ISO_8601, $timestamp ); |
630 | return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; |
631 | } |
632 | |
633 | /** |
634 | * @param int $id |
635 | * @param string $text |
636 | * @param string $indent Default to six spaces |
637 | * @return string |
638 | */ |
639 | public function writeContributor( $id, $text, $indent = " " ) { |
640 | $out = $indent . "<contributor>\n"; |
641 | if ( $id || !IPUtils::isValid( $text ) ) { |
642 | $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; |
643 | $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; |
644 | } else { |
645 | $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; |
646 | } |
647 | $out .= $indent . "</contributor>\n"; |
648 | return $out; |
649 | } |
650 | |
651 | /** |
652 | * Warning! This data is potentially inconsistent. :( |
653 | * @param stdClass $row |
654 | * @param bool $dumpContents |
655 | * @return string |
656 | */ |
657 | public function writeUploads( $row, $dumpContents = false ) { |
658 | if ( $row->page_namespace == NS_FILE ) { |
659 | $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() |
660 | ->newFile( $row->page_title ); |
661 | if ( $img && $img->exists() ) { |
662 | $out = ''; |
663 | foreach ( array_reverse( $img->getHistory() ) as $ver ) { |
664 | $out .= $this->writeUpload( $ver, $dumpContents ); |
665 | } |
666 | $out .= $this->writeUpload( $img, $dumpContents ); |
667 | return $out; |
668 | } |
669 | } |
670 | return ''; |
671 | } |
672 | |
673 | /** |
674 | * @param File $file |
675 | * @param bool $dumpContents |
676 | * @return string |
677 | */ |
678 | private function writeUpload( $file, $dumpContents = false ) { |
679 | if ( $file->isOld() ) { |
680 | /** @var OldLocalFile $file */ |
681 | '@phan-var OldLocalFile $file'; |
682 | $archiveName = " " . |
683 | Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; |
684 | } else { |
685 | $archiveName = ''; |
686 | } |
687 | if ( $dumpContents ) { |
688 | $be = $file->getRepo()->getBackend(); |
689 | # Dump file as base64 |
690 | # Uses only XML-safe characters, so does not need escaping |
691 | # @todo Too bad this loads the contents into memory (script might swap) |
692 | $contents = ' <contents encoding="base64">' . |
693 | chunk_split( base64_encode( |
694 | $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . |
695 | " </contents>\n"; |
696 | } else { |
697 | $contents = ''; |
698 | } |
699 | $uploader = $file->getUploader( File::FOR_PUBLIC ); |
700 | if ( $uploader ) { |
701 | $uploader = $this->writeContributor( $uploader->getId(), $uploader->getName() ); |
702 | } else { |
703 | $uploader = Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
704 | } |
705 | $comment = $file->getDescription( File::FOR_PUBLIC ); |
706 | if ( ( $comment ?? '' ) !== '' ) { |
707 | $comment = Xml::elementClean( 'comment', null, $comment ); |
708 | } else { |
709 | $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); |
710 | } |
711 | return " <upload>\n" . |
712 | $this->writeTimestamp( $file->getTimestamp() ) . |
713 | $uploader . |
714 | " " . $comment . "\n" . |
715 | " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . |
716 | $archiveName . |
717 | " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . |
718 | " " . Xml::element( 'size', null, (string)( $file->getSize() ?: 0 ) ) . "\n" . |
719 | " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . |
720 | " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . |
721 | $contents . |
722 | " </upload>\n"; |
723 | } |
724 | |
725 | /** |
726 | * Return prefixed text form of title, but using the content language's |
727 | * canonical namespace. This skips any special-casing such as gendered |
728 | * user namespaces -- which while useful, are not yet listed in the |
729 | * XML "<siteinfo>" data so are unsafe in export. |
730 | * |
731 | * @param Title $title |
732 | * @return string |
733 | * @since 1.18 |
734 | */ |
735 | public static function canonicalTitle( Title $title ) { |
736 | if ( $title->isExternal() ) { |
737 | return $title->getPrefixedText(); |
738 | } |
739 | |
740 | $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> |
741 | getFormattedNsText( $title->getNamespace() ); |
742 | |
743 | // @todo Emit some kind of warning to the user if $title->getNamespace() !== |
744 | // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) |
745 | |
746 | if ( $prefix !== '' ) { |
747 | $prefix .= ':'; |
748 | } |
749 | |
750 | return $prefix . $title->getText(); |
751 | } |
752 | } |