Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
61.46% |
193 / 314 |
|
54.17% |
13 / 24 |
CRAP | |
0.00% |
0 / 1 |
XmlDumpWriter | |
61.46% |
193 / 314 |
|
54.17% |
13 / 24 |
456.44 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
1 | |||
openStream | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
siteInfo | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
sitename | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
dbname | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
generator | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
homelink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
caseSetting | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
namespaces | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
closeStream | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
openPage | |
39.13% |
9 / 23 |
|
0.00% |
0 / 1 |
7.61 | |||
closePage | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getRevisionStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getBlobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
invokeLenient | |
16.67% |
1 / 6 |
|
0.00% |
0 / 1 |
8.21 | |||
writeRevision | |
89.29% |
50 / 56 |
|
0.00% |
0 / 1 |
14.24 | |||
writeSlot | |
61.11% |
44 / 72 |
|
0.00% |
0 / 1 |
37.06 | |||
writeText | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
writeLogItem | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 | |||
writeTimestamp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
writeContributor | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
writeUploads | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
writeUpload | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
canonicalTitle | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
3.21 |
1 | <?php |
2 | /** |
3 | * XmlDumpWriter |
4 | * |
5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | use MediaWiki\CommentStore\CommentStore; |
27 | use MediaWiki\Content\Content; |
28 | use MediaWiki\Content\TextContent; |
29 | use MediaWiki\Debug\MWDebug; |
30 | use MediaWiki\Exception\MWException; |
31 | use MediaWiki\Exception\MWUnknownContentModelException; |
32 | use MediaWiki\FileRepo\File\File; |
33 | use MediaWiki\FileRepo\File\OldLocalFile; |
34 | use MediaWiki\HookContainer\HookContainer; |
35 | use MediaWiki\HookContainer\HookRunner; |
36 | use MediaWiki\Logging\LogPage; |
37 | use MediaWiki\MainConfigNames; |
38 | use MediaWiki\MediaWikiServices; |
39 | use MediaWiki\Revision\RevisionAccessException; |
40 | use MediaWiki\Revision\RevisionRecord; |
41 | use MediaWiki\Revision\RevisionStore; |
42 | use MediaWiki\Revision\SlotRecord; |
43 | use MediaWiki\Revision\SuppressedDataException; |
44 | use MediaWiki\Storage\SqlBlobStore; |
45 | use MediaWiki\Title\Title; |
46 | use MediaWiki\Xml\Xml; |
47 | use Wikimedia\Assert\Assert; |
48 | use Wikimedia\IPUtils; |
49 | |
50 | /** |
51 | * @ingroup Dump |
52 | */ |
53 | class XmlDumpWriter { |
54 | |
55 | /** Output serialized revision content. */ |
56 | public const WRITE_CONTENT = 0; |
57 | |
58 | /** Only output subs for revision content. */ |
59 | public const WRITE_STUB = 1; |
60 | |
61 | /** |
62 | * Only output subs for revision content, indicating that the content has been |
63 | * deleted/suppressed. |
64 | */ |
65 | private const WRITE_STUB_DELETED = 2; |
66 | |
67 | /** |
68 | * @var string[] the schema versions supported for output |
69 | * @final |
70 | */ |
71 | public static $supportedSchemas = [ |
72 | XML_DUMP_SCHEMA_VERSION_10, |
73 | XML_DUMP_SCHEMA_VERSION_11 |
74 | ]; |
75 | |
76 | /** |
77 | * @var string which schema version the generated XML should comply to. |
78 | * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX |
79 | * constants. |
80 | */ |
81 | private $schemaVersion; |
82 | |
83 | /** |
84 | * Title of the currently processed page |
85 | * |
86 | * @var Title|null |
87 | */ |
88 | private $currentTitle = null; |
89 | |
90 | /** |
91 | * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. |
92 | */ |
93 | private $contentMode; |
94 | |
95 | /** @var HookRunner */ |
96 | private $hookRunner; |
97 | |
98 | /** @var CommentStore */ |
99 | private $commentStore; |
100 | |
101 | /** |
102 | * @param int $contentMode WRITE_CONTENT or WRITE_STUB. |
103 | * @param string $schemaVersion which schema version the generated XML should comply to. |
104 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
105 | * constants. |
106 | * @param HookContainer|null $hookContainer |
107 | * @param CommentStore|null $commentStore |
108 | */ |
109 | public function __construct( |
110 | $contentMode = self::WRITE_CONTENT, |
111 | $schemaVersion = XML_DUMP_SCHEMA_VERSION_11, |
112 | ?HookContainer $hookContainer = null, |
113 | ?CommentStore $commentStore = null |
114 | ) { |
115 | Assert::parameter( |
116 | in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ], true ), |
117 | '$contentMode', |
118 | 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' |
119 | ); |
120 | |
121 | Assert::parameter( |
122 | in_array( $schemaVersion, self::$supportedSchemas, true ), |
123 | '$schemaVersion', |
124 | 'must be one of the following schema versions: ' |
125 | . implode( ',', self::$supportedSchemas ) |
126 | ); |
127 | |
128 | $this->contentMode = $contentMode; |
129 | $this->schemaVersion = $schemaVersion; |
130 | $this->hookRunner = new HookRunner( |
131 | $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer() |
132 | ); |
133 | $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore(); |
134 | } |
135 | |
136 | /** |
137 | * Opens the XML output stream's root "<mediawiki>" element. |
138 | * This does not include an xml directive, so is safe to include |
139 | * as a subelement in a larger XML stream. Namespace and XML Schema |
140 | * references are included. |
141 | * |
142 | * Output will be encoded in UTF-8. |
143 | * |
144 | * @return string |
145 | */ |
146 | public function openStream() { |
147 | $ver = $this->schemaVersion; |
148 | return Xml::element( 'mediawiki', [ |
149 | 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", |
150 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
151 | /* |
152 | * When a new version of the schema is created, it needs staging on mediawiki.org. |
153 | * This requires a change in the operations/mediawiki-config git repo. |
154 | * |
155 | * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which |
156 | * you copy in the new xsd file. |
157 | * |
158 | * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. |
159 | * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki |
160 | */ |
161 | 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . |
162 | "http://www.mediawiki.org/xml/export-$ver.xsd", |
163 | 'version' => $ver, |
164 | 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], |
165 | null ) . |
166 | "\n" . |
167 | $this->siteInfo(); |
168 | } |
169 | |
170 | /** |
171 | * @return string |
172 | */ |
173 | private function siteInfo() { |
174 | $info = [ |
175 | $this->sitename(), |
176 | $this->dbname(), |
177 | $this->homelink(), |
178 | $this->generator(), |
179 | $this->caseSetting(), |
180 | $this->namespaces() ]; |
181 | return " <siteinfo>\n " . |
182 | implode( "\n ", $info ) . |
183 | "\n </siteinfo>\n"; |
184 | } |
185 | |
186 | /** |
187 | * @return string |
188 | */ |
189 | private function sitename() { |
190 | $sitename = MediaWikiServices::getInstance()->getMainConfig()->get( |
191 | MainConfigNames::Sitename ); |
192 | return Xml::element( 'sitename', [], $sitename ); |
193 | } |
194 | |
195 | /** |
196 | * @return string |
197 | */ |
198 | private function dbname() { |
199 | $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname ); |
200 | return Xml::element( 'dbname', [], $dbname ); |
201 | } |
202 | |
203 | /** |
204 | * @return string |
205 | */ |
206 | private function generator() { |
207 | return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); |
208 | } |
209 | |
210 | /** |
211 | * @return string |
212 | */ |
213 | private function homelink() { |
214 | return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); |
215 | } |
216 | |
217 | /** |
218 | * @return string |
219 | */ |
220 | private function caseSetting() { |
221 | $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get( |
222 | MainConfigNames::CapitalLinks ); |
223 | // "case-insensitive" option is reserved for future |
224 | $sensitivity = $capitalLinks ? 'first-letter' : 'case-sensitive'; |
225 | return Xml::element( 'case', [], $sensitivity ); |
226 | } |
227 | |
228 | /** |
229 | * @return string |
230 | */ |
231 | private function namespaces() { |
232 | $spaces = "<namespaces>\n"; |
233 | $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); |
234 | foreach ( |
235 | MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() |
236 | as $ns => $title |
237 | ) { |
238 | $spaces .= ' ' . |
239 | Xml::element( 'namespace', |
240 | [ |
241 | 'key' => $ns, |
242 | 'case' => $nsInfo->isCapitalized( $ns ) |
243 | ? 'first-letter' : 'case-sensitive', |
244 | ], $title ) . "\n"; |
245 | } |
246 | $spaces .= " </namespaces>"; |
247 | return $spaces; |
248 | } |
249 | |
250 | /** |
251 | * Closes the output stream with the closing root element. |
252 | * Call when finished dumping things. |
253 | * |
254 | * @return string |
255 | */ |
256 | public function closeStream() { |
257 | return "</mediawiki>\n"; |
258 | } |
259 | |
260 | /** |
261 | * Opens a "<page>" section on the output stream, with data |
262 | * from the given database row. |
263 | * |
264 | * @param stdClass $row |
265 | * @return string |
266 | */ |
267 | public function openPage( $row ) { |
268 | $out = " <page>\n"; |
269 | $this->currentTitle = Title::newFromRow( $row ); |
270 | $canonicalTitle = self::canonicalTitle( $this->currentTitle ); |
271 | $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; |
272 | $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; |
273 | $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; |
274 | if ( $row->page_is_redirect ) { |
275 | $services = MediaWikiServices::getInstance(); |
276 | $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle ); |
277 | $redirectStore = $services->getRedirectStore(); |
278 | $redirect = $this->invokeLenient( |
279 | static function () use ( $page, $redirectStore ) { |
280 | return $redirectStore->getRedirectTarget( $page ); |
281 | }, |
282 | 'Failed to get redirect target of page ' . $page->getId() |
283 | ); |
284 | $redirect = Title::castFromLinkTarget( $redirect ); |
285 | if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { |
286 | $out .= ' '; |
287 | $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); |
288 | $out .= "\n"; |
289 | } |
290 | } |
291 | $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); |
292 | |
293 | return $out; |
294 | } |
295 | |
296 | /** |
297 | * Closes a "<page>" section on the output stream. |
298 | * |
299 | * @internal |
300 | * @return string |
301 | */ |
302 | public function closePage() { |
303 | if ( $this->currentTitle !== null ) { |
304 | $linkCache = MediaWikiServices::getInstance()->getLinkCache(); |
305 | // In rare cases, link cache has the same key for some pages which |
306 | // might be read as part of the same batch. T220424 and T220316 |
307 | $linkCache->clearLink( $this->currentTitle ); |
308 | } |
309 | return " </page>\n"; |
310 | } |
311 | |
312 | /** |
313 | * @return RevisionStore |
314 | */ |
315 | private function getRevisionStore() { |
316 | return MediaWikiServices::getInstance()->getRevisionStore(); |
317 | } |
318 | |
319 | /** |
320 | * @return SqlBlobStore |
321 | */ |
322 | private function getBlobStore() { |
323 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
324 | return MediaWikiServices::getInstance()->getBlobStore(); |
325 | } |
326 | |
327 | /** |
328 | * Invokes the given callback, catching and logging any exceptions. |
329 | * |
330 | * @param callable $callback |
331 | * @param string $warning The warning to output in case of a storage related exception. |
332 | * |
333 | * @return mixed Returns the method's return value, or null in case of an exception. |
334 | * @throws Exception |
335 | */ |
336 | private function invokeLenient( $callback, $warning ) { |
337 | try { |
338 | return $callback(); |
339 | } catch ( SuppressedDataException ) { |
340 | return null; |
341 | } catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) { |
342 | MWDebug::warning( $warning . ': ' . $ex->getMessage() ); |
343 | return null; |
344 | } |
345 | } |
346 | |
347 | /** |
348 | * Dumps a "<revision>" section on the output stream, with |
349 | * data filled in from the given database row. |
350 | * |
351 | * @param stdClass $row |
352 | * @param null|stdClass[] $slotRows |
353 | * |
354 | * @return string |
355 | * @throws RevisionAccessException |
356 | */ |
357 | public function writeRevision( $row, $slotRows = null ) { |
358 | $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( |
359 | $row, |
360 | $slotRows, |
361 | 0, |
362 | $this->currentTitle |
363 | ); |
364 | |
365 | $out = " <revision>\n"; |
366 | $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; |
367 | |
368 | if ( $rev->getParentId() ) { |
369 | $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; |
370 | } |
371 | |
372 | $out .= $this->writeTimestamp( $rev->getTimestamp() ); |
373 | |
374 | if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { |
375 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
376 | } else { |
377 | // empty values get written out as uid 0, see T224221 |
378 | $user = $rev->getUser(); |
379 | $out .= $this->writeContributor( |
380 | $user ? $user->getId() : 0, |
381 | $user ? $user->getName() : '' |
382 | ); |
383 | } |
384 | |
385 | if ( $rev->isMinor() ) { |
386 | $out .= " <minor/>\n"; |
387 | } |
388 | if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { |
389 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
390 | } else { |
391 | if ( $rev->getComment()->text != '' ) { |
392 | $out .= " " |
393 | . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) |
394 | . "\n"; |
395 | } |
396 | } |
397 | |
398 | $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED |
399 | : $this->contentMode; |
400 | |
401 | $slots = $rev->getSlots()->getSlots(); |
402 | |
403 | // use predictable order, put main slot first |
404 | ksort( $slots ); |
405 | $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); |
406 | |
407 | foreach ( $slots as $role => $slot ) { |
408 | if ( $role === SlotRecord::MAIN ) { |
409 | continue; |
410 | } |
411 | $out .= $this->writeSlot( $slot, $contentMode ); |
412 | } |
413 | |
414 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
415 | $out .= " <sha1/>\n"; |
416 | } else { |
417 | $sha1 = $this->invokeLenient( |
418 | static function () use ( $rev ) { |
419 | return $rev->getSha1(); |
420 | }, |
421 | 'failed to determine sha1 for revision ' . $rev->getId() |
422 | ); |
423 | $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; |
424 | } |
425 | |
426 | $text = ''; |
427 | if ( $contentMode === self::WRITE_CONTENT ) { |
428 | /** @var Content $content */ |
429 | $content = $this->invokeLenient( |
430 | static function () use ( $rev ) { |
431 | return $rev->getMainContentRaw(); |
432 | }, |
433 | 'Failed to load main slot content of revision ' . $rev->getId() |
434 | ); |
435 | |
436 | $text = $content ? $content->serialize() : ''; |
437 | } |
438 | $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev ); |
439 | |
440 | $out .= " </revision>\n"; |
441 | |
442 | return $out; |
443 | } |
444 | |
445 | /** |
446 | * @param SlotRecord $slot |
447 | * @param int $contentMode see the WRITE_XXX constants |
448 | * |
449 | * @return string |
450 | */ |
451 | private function writeSlot( SlotRecord $slot, $contentMode ) { |
452 | $isMain = $slot->getRole() === SlotRecord::MAIN; |
453 | $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; |
454 | |
455 | if ( !$isV11 && !$isMain ) { |
456 | // ignore extra slots |
457 | return ''; |
458 | } |
459 | |
460 | $out = ''; |
461 | $indent = ' '; |
462 | |
463 | if ( !$isMain ) { |
464 | // non-main slots are wrapped into an additional element. |
465 | $out .= ' ' . Xml::openElement( 'content' ) . "\n"; |
466 | $indent .= ' '; |
467 | $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; |
468 | } |
469 | |
470 | if ( $isV11 ) { |
471 | $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; |
472 | } |
473 | |
474 | $contentModel = $slot->getModel(); |
475 | $contentHandlerFactory = MediaWikiServices::getInstance()->getContentHandlerFactory(); |
476 | $contentHandler = null; |
477 | |
478 | try { |
479 | $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel ); |
480 | |
481 | } catch ( MWUnknownContentModelException ) { |
482 | // A content model should not be removed, as this would cause old revisions |
483 | // to fail to render. If this does happen, let dumps keep going but warn. |
484 | // To stop these warnings, register a fallback content model like so: |
485 | // $wgContentHandlers['Your.Removed.Handler'] = 'FallbackContentHandler' |
486 | MWDebug::warning( 'Revision ' . $slot->getRevision() . ' is using an unknown ' |
487 | . ' content model, falling back to FallbackContentHandler.' ); |
488 | $contentModel = CONTENT_MODEL_UNKNOWN; |
489 | $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel ); |
490 | } |
491 | $contentFormat = $contentHandler->getDefaultFormat(); |
492 | |
493 | // XXX: The content format is only relevant when actually outputting serialized content. |
494 | // It should probably be an attribute on the text tag. |
495 | $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; |
496 | $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; |
497 | |
498 | $textAttributes = [ |
499 | 'bytes' => $this->invokeLenient( |
500 | static function () use ( $slot ) { |
501 | return $slot->getSize(); |
502 | }, |
503 | 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' |
504 | . $slot->getRevision() |
505 | ) ?: '0' |
506 | ]; |
507 | |
508 | if ( $isV11 ) { |
509 | $textAttributes['sha1'] = $this->invokeLenient( |
510 | static function () use ( $slot ) { |
511 | return $slot->getSha1(); |
512 | }, |
513 | 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' |
514 | . $slot->getRevision() |
515 | ) ?: ''; |
516 | } |
517 | |
518 | if ( $contentMode === self::WRITE_CONTENT ) { |
519 | $content = $this->invokeLenient( |
520 | static function () use ( $slot ) { |
521 | return $slot->getContent(); |
522 | }, |
523 | 'failed to load content for slot ' . $slot->getRole() . ' of revision ' |
524 | . $slot->getRevision() |
525 | ); |
526 | |
527 | if ( $content === null ) { |
528 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
529 | } else { |
530 | $out .= $this->writeText( $content, $textAttributes, $indent ); |
531 | } |
532 | } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { |
533 | // write <text> placeholder tag |
534 | $textAttributes['deleted'] = 'deleted'; |
535 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
536 | } else { |
537 | // write <text> stub tag |
538 | if ( $isV11 ) { |
539 | $textAttributes['location'] = $slot->getAddress(); |
540 | } |
541 | $schema = null; |
542 | |
543 | if ( $isMain ) { |
544 | // Output the numerical text ID if possible, for backwards compatibility. |
545 | // Note that this is currently the ONLY reason we have a BlobStore here at all. |
546 | // When removing this line, check whether the BlobStore has become unused. |
547 | try { |
548 | // NOTE: this will only work for addresses of the form "tt:12345" or "es:DB://cluster1/1234". |
549 | // If we want to support other kinds of addresses in the future, |
550 | // we will have to silently ignore failures here. |
551 | // For now, this fails for "tt:0", which is present in the WMF production |
552 | // database as of July 2019, due to data corruption. |
553 | [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->getAddress() ); |
554 | } catch ( InvalidArgumentException $ex ) { |
555 | MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() |
556 | . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); |
557 | $textId = 0; |
558 | } |
559 | |
560 | if ( $schema === 'tt' ) { |
561 | $textAttributes['id'] = $textId; |
562 | } elseif ( $schema === 'es' ) { |
563 | $textAttributes['id'] = bin2hex( $textId ); |
564 | } |
565 | } |
566 | |
567 | $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; |
568 | } |
569 | |
570 | if ( !$isMain ) { |
571 | $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; |
572 | } |
573 | |
574 | return $out; |
575 | } |
576 | |
577 | /** |
578 | * @param Content $content |
579 | * @param string[] $textAttributes |
580 | * @param string $indent |
581 | * |
582 | * @return string |
583 | */ |
584 | private function writeText( Content $content, $textAttributes, $indent ) { |
585 | $contentHandler = $content->getContentHandler(); |
586 | $contentFormat = $contentHandler->getDefaultFormat(); |
587 | |
588 | if ( $content instanceof TextContent ) { |
589 | // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) |
590 | // that use incompatible combinations of serialization format and content model. |
591 | $data = $content->getText(); |
592 | } else { |
593 | $data = $content->serialize( $contentFormat ); |
594 | } |
595 | |
596 | $data = $contentHandler->exportTransform( $data, $contentFormat ); |
597 | // make sure to use the actual size |
598 | $textAttributes['bytes'] = strlen( $data ); |
599 | $textAttributes['xml:space'] = 'preserve'; |
600 | return $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; |
601 | } |
602 | |
603 | /** |
604 | * Dumps a "<logitem>" section on the output stream, with |
605 | * data filled in from the given database row. |
606 | * |
607 | * @param stdClass $row |
608 | * @return string |
609 | */ |
610 | public function writeLogItem( $row ) { |
611 | $out = " <logitem>\n"; |
612 | $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; |
613 | |
614 | $out .= $this->writeTimestamp( $row->log_timestamp, " " ); |
615 | |
616 | if ( $row->log_deleted & LogPage::DELETED_USER ) { |
617 | $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
618 | } else { |
619 | $out .= $this->writeContributor( $row->actor_user, $row->actor_name, " " ); |
620 | } |
621 | |
622 | if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { |
623 | $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; |
624 | } else { |
625 | $comment = $this->commentStore->getComment( 'log_comment', $row )->text; |
626 | if ( $comment != '' ) { |
627 | $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; |
628 | } |
629 | } |
630 | |
631 | $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; |
632 | $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; |
633 | |
634 | if ( $row->log_deleted & LogPage::DELETED_ACTION ) { |
635 | $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; |
636 | } else { |
637 | $title = Title::makeTitle( $row->log_namespace, $row->log_title ); |
638 | $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; |
639 | $out .= " " . Xml::elementClean( 'params', |
640 | [ 'xml:space' => 'preserve' ], |
641 | strval( $row->log_params ) ) . "\n"; |
642 | } |
643 | |
644 | $out .= " </logitem>\n"; |
645 | |
646 | return $out; |
647 | } |
648 | |
649 | /** |
650 | * @param string $timestamp |
651 | * @param string $indent Default to six spaces |
652 | * @return string |
653 | */ |
654 | public function writeTimestamp( $timestamp, $indent = " " ) { |
655 | $ts = wfTimestamp( TS_ISO_8601, $timestamp ); |
656 | return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; |
657 | } |
658 | |
659 | /** |
660 | * @param int $id |
661 | * @param string $text |
662 | * @param string $indent Default to six spaces |
663 | * @return string |
664 | */ |
665 | public function writeContributor( $id, $text, $indent = " " ) { |
666 | $out = $indent . "<contributor>\n"; |
667 | if ( $id || !IPUtils::isValid( $text ) ) { |
668 | $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; |
669 | $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; |
670 | } else { |
671 | $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; |
672 | } |
673 | $out .= $indent . "</contributor>\n"; |
674 | return $out; |
675 | } |
676 | |
677 | /** |
678 | * Warning! This data is potentially inconsistent. :( |
679 | * @param stdClass $row |
680 | * @param bool $dumpContents |
681 | * @return string |
682 | */ |
683 | public function writeUploads( $row, $dumpContents = false ) { |
684 | if ( $row->page_namespace == NS_FILE ) { |
685 | $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() |
686 | ->newFile( $row->page_title ); |
687 | if ( $img && $img->exists() ) { |
688 | $out = ''; |
689 | foreach ( array_reverse( $img->getHistory() ) as $ver ) { |
690 | $out .= $this->writeUpload( $ver, $dumpContents ); |
691 | } |
692 | $out .= $this->writeUpload( $img, $dumpContents ); |
693 | return $out; |
694 | } |
695 | } |
696 | return ''; |
697 | } |
698 | |
699 | /** |
700 | * @param File $file |
701 | * @param bool $dumpContents |
702 | * @return string |
703 | */ |
704 | private function writeUpload( $file, $dumpContents = false ) { |
705 | if ( $file->isOld() ) { |
706 | /** @var OldLocalFile $file */ |
707 | '@phan-var OldLocalFile $file'; |
708 | $archiveName = " " . |
709 | Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; |
710 | } else { |
711 | $archiveName = ''; |
712 | } |
713 | if ( $dumpContents ) { |
714 | $be = $file->getRepo()->getBackend(); |
715 | # Dump file as base64 |
716 | # Uses only XML-safe characters, so does not need escaping |
717 | # @todo Too bad this loads the contents into memory (script might swap) |
718 | $contents = ' <contents encoding="base64">' . |
719 | chunk_split( base64_encode( |
720 | $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . |
721 | " </contents>\n"; |
722 | } else { |
723 | $contents = ''; |
724 | } |
725 | $uploader = $file->getUploader( File::FOR_PUBLIC ); |
726 | if ( $uploader ) { |
727 | $uploader = $this->writeContributor( $uploader->getId(), $uploader->getName() ); |
728 | } else { |
729 | $uploader = Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; |
730 | } |
731 | $comment = $file->getDescription( File::FOR_PUBLIC ); |
732 | if ( ( $comment ?? '' ) !== '' ) { |
733 | $comment = Xml::elementClean( 'comment', null, $comment ); |
734 | } else { |
735 | $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); |
736 | } |
737 | return " <upload>\n" . |
738 | $this->writeTimestamp( $file->getTimestamp() ) . |
739 | $uploader . |
740 | " " . $comment . "\n" . |
741 | " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . |
742 | $archiveName . |
743 | " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . |
744 | " " . Xml::element( 'size', null, (string)( $file->getSize() ?: 0 ) ) . "\n" . |
745 | " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . |
746 | " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . |
747 | $contents . |
748 | " </upload>\n"; |
749 | } |
750 | |
751 | /** |
752 | * Return prefixed text form of title, but using the content language's |
753 | * canonical namespace. This skips any special-casing such as gendered |
754 | * user namespaces -- which while useful, are not yet listed in the |
755 | * XML "<siteinfo>" data so are unsafe in export. |
756 | * |
757 | * @param Title $title |
758 | * @return string |
759 | * @since 1.18 |
760 | */ |
761 | public static function canonicalTitle( Title $title ) { |
762 | if ( $title->isExternal() ) { |
763 | return $title->getPrefixedText(); |
764 | } |
765 | |
766 | $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> |
767 | getFormattedNsText( $title->getNamespace() ); |
768 | |
769 | // @todo Emit some kind of warning to the user if $title->getNamespace() !== |
770 | // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) |
771 | |
772 | if ( $prefix !== '' ) { |
773 | $prefix .= ':'; |
774 | } |
775 | |
776 | return $prefix . $title->getText(); |
777 | } |
778 | } |