15use InvalidArgumentException;
38use Wikimedia\Assert\Assert;
40use Wikimedia\Timestamp\TimestampFormat as TS;
57 private const WRITE_STUB_DELETED = 2;
73 private $schemaVersion;
80 private $currentTitle =
null;
91 private $commentStore;
102 $contentMode = self::WRITE_CONTENT,
108 in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ],
true ),
110 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
114 in_array( $schemaVersion, self::$supportedSchemas,
true ),
116 'must be one of the following schema versions: '
117 . implode(
',', self::$supportedSchemas )
120 $this->contentMode = $contentMode;
121 $this->schemaVersion = $schemaVersion;
139 $ver = $this->schemaVersion;
141 'xmlns' =>
"http://www.mediawiki.org/xml/export-$ver/",
142 'xmlns:xsi' =>
"http://www.w3.org/2001/XMLSchema-instance",
153 'xsi:schemaLocation' =>
"http://www.mediawiki.org/xml/export-$ver/ " .
154 "http://www.mediawiki.org/xml/export-$ver.xsd",
165 private function siteInfo() {
171 $this->caseSetting(),
172 $this->namespaces() ];
173 return " <siteinfo>\n " .
174 implode(
"\n ", $info ) .
181 private function sitename() {
190 private function dbname() {
198 private function generator() {
205 private function homelink() {
206 return Xml::element(
'base', [], Title::newMainPage()->getCanonicalURL() );
212 private function caseSetting() {
216 $sensitivity = $capitalLinks ?
'first-letter' :
'case-sensitive';
223 private function namespaces() {
224 $spaces =
"<namespaces>\n";
234 'case' => $nsInfo->isCapitalized( $ns )
235 ?
'first-letter' :
'case-sensitive',
238 $spaces .=
" </namespaces>";
249 return "</mediawiki>\n";
261 $this->currentTitle = Title::newFromRow( $row );
263 $out .=
' ' . Xml::elementClean(
'title', [], $canonicalTitle ) .
"\n";
264 $out .=
' ' .
Xml::element(
'ns', [], strval( $row->page_namespace ) ) .
"\n";
265 $out .=
' ' .
Xml::element(
'id', [], strval( $row->page_id ) ) .
"\n";
266 if ( $row->page_is_redirect ) {
268 $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle );
269 $redirectStore = $services->getRedirectStore();
270 $redirect = $this->invokeLenient(
271 static function () use ( $page, $redirectStore ) {
272 return $redirectStore->getRedirectTarget( $page );
274 'Failed to get redirect target of page ' . $page->getId()
276 $redirect = Title::castFromLinkTarget( $redirect );
279 $out .=
Xml::element(
'redirect', [
'title' => self::canonicalTitle( $redirect ) ] );
283 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
295 if ( $this->currentTitle !==
null ) {
299 $linkCache->clearLink( $this->currentTitle );
307 private function getRevisionStore() {
314 private function getBlobStore() {
328 private function invokeLenient( $callback, $warning ) {
331 }
catch ( SuppressedDataException ) {
333 }
catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) {
334 MWDebug::warning( $warning .
': ' . $ex->getMessage() );
350 $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots(
357 $out =
" <revision>\n";
358 $out .=
" " .
Xml::element(
'id',
null, strval( $rev->getId() ) ) .
"\n";
360 if ( $rev->getParentId() ) {
361 $out .=
" " .
Xml::element(
'parentid',
null, strval( $rev->getParentId() ) ) .
"\n";
366 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
367 $out .=
" " .
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
370 $user = $rev->getUser();
372 $user ? $user->getId() : 0,
373 $user ? $user->getName() :
''
377 if ( $rev->isMinor() ) {
378 $out .=
" <minor/>\n";
380 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
381 $out .=
" " .
Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
383 if ( $rev->getComment()->text !=
'' ) {
385 . Xml::elementClean(
'comment', [], strval( $rev->getComment()->text ) )
390 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
391 : $this->contentMode;
393 $slots = $rev->getSlots()->getSlots();
397 $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode );
399 foreach ( $slots as $role => $slot ) {
400 if ( $role === SlotRecord::MAIN ) {
403 $out .= $this->writeSlot( $slot, $contentMode );
406 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
407 $out .=
" <sha1/>\n";
409 $sha1 = $this->invokeLenient(
410 static function () use ( $rev ) {
411 return $rev->getSha1();
413 'failed to determine sha1 for revision ' . $rev->getId()
415 $out .=
" " .
Xml::element(
'sha1',
null, strval( $sha1 ) ) .
"\n";
419 if ( $contentMode === self::WRITE_CONTENT ) {
421 $content = $this->invokeLenient(
422 static function () use ( $rev ) {
423 return $rev->getMainContentRaw();
425 'Failed to load main slot content of revision ' . $rev->getId()
428 $text = $content ? $content->serialize() :
'';
430 $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev );
432 $out .=
" </revision>\n";
443 private function writeSlot(
SlotRecord $slot, $contentMode ) {
444 $isMain = $slot->
getRole() === SlotRecord::MAIN;
447 if ( !$isV11 && !$isMain ) {
468 $contentHandler =
null;
471 $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel );
473 }
catch ( MWUnknownContentModelException ) {
478 MWDebug::warning(
'Revision ' . $slot->
getRevision() .
' is using an unknown '
479 .
' content model, falling back to FallbackContentHandler.' );
481 $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel );
483 $contentFormat = $contentHandler->getDefaultFormat();
487 $out .= $indent .
Xml::element(
'model',
null, strval( $contentModel ) ) .
"\n";
488 $out .= $indent .
Xml::element(
'format',
null, strval( $contentFormat ) ) .
"\n";
491 'bytes' => $this->invokeLenient(
492 static function () use ( $slot ) {
495 'failed to determine size for slot ' . $slot->
getRole() .
' of revision '
501 $textAttributes[
'sha1'] = $this->invokeLenient(
502 static function () use ( $slot ) {
505 'failed to determine sha1 for slot ' . $slot->
getRole() .
' of revision '
510 if ( $contentMode === self::WRITE_CONTENT ) {
511 $content = $this->invokeLenient(
512 static function () use ( $slot ) {
515 'failed to load content for slot ' . $slot->
getRole() .
' of revision '
519 if ( $content ===
null ) {
520 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
522 $out .= $this->writeText( $content, $textAttributes, $indent );
524 } elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
526 $textAttributes[
'deleted'] =
'deleted';
527 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
531 $textAttributes[
'location'] = $slot->
getAddress();
545 [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->
getAddress() );
546 }
catch ( InvalidArgumentException $ex ) {
547 MWDebug::warning(
'Bad content address for slot ' . $slot->
getRole()
548 .
' of revision ' . $slot->
getRevision() .
': ' . $ex->getMessage() );
552 if ( $schema ===
'tt' ) {
553 $textAttributes[
'id'] = $textId;
554 } elseif ( $schema ===
'es' ) {
555 $textAttributes[
'id'] = bin2hex( $textId );
559 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
576 private function writeText( Content $content, $textAttributes, $indent ) {
577 $contentHandler = $content->getContentHandler();
578 $contentFormat = $contentHandler->getDefaultFormat();
580 if ( $content instanceof TextContent ) {
583 $data = $content->getText();
585 $data = $content->serialize( $contentFormat );
588 $data = $contentHandler->exportTransform( $data, $contentFormat );
590 $textAttributes[
'bytes'] = strlen( $data );
591 $textAttributes[
'xml:space'] =
'preserve';
592 return $indent . Xml::elementClean(
'text', $textAttributes, strval( $data ) ) .
"\n";
603 $out =
" <logitem>\n";
604 $out .=
" " .
Xml::element(
'id',
null, strval( $row->log_id ) ) .
"\n";
608 if ( $row->log_deleted & LogPage::DELETED_USER ) {
609 $out .=
" " .
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
614 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
615 $out .=
" " .
Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
617 $comment = $this->commentStore->getComment(
'log_comment', $row )->text;
618 if ( $comment !=
'' ) {
619 $out .=
" " . Xml::elementClean(
'comment',
null, strval( $comment ) ) .
"\n";
623 $out .=
" " .
Xml::element(
'type',
null, strval( $row->log_type ) ) .
"\n";
624 $out .=
" " .
Xml::element(
'action',
null, strval( $row->log_action ) ) .
"\n";
626 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
627 $out .=
" " .
Xml::element(
'text', [
'deleted' =>
'deleted' ] ) .
"\n";
630 $out .=
" " . Xml::elementClean(
'logtitle',
null, self::canonicalTitle( $title ) ) .
"\n";
631 $out .=
" " . Xml::elementClean(
'params',
632 [
'xml:space' =>
'preserve' ],
633 strval( $row->log_params ) ) .
"\n";
636 $out .=
" </logitem>\n";
648 return $indent .
Xml::element(
'timestamp',
null, $ts ) .
"\n";
658 $out = $indent .
"<contributor>\n";
659 if ( $id || !IPUtils::isValid( $text ) ) {
660 $out .= $indent .
" " . Xml::elementClean(
'username',
null, strval( $text ) ) .
"\n";
661 $out .= $indent .
" " .
Xml::element(
'id',
null, strval( $id ) ) .
"\n";
663 $out .= $indent .
" " . Xml::elementClean(
'ip',
null, strval( $text ) ) .
"\n";
665 $out .= $indent .
"</contributor>\n";
676 if ( $row->page_namespace ==
NS_FILE ) {
678 ->newFile( $row->page_title );
679 if ( $img && $img->exists() ) {
681 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
682 $out .= $this->writeUpload( $ver, $dumpContents );
684 $out .= $this->writeUpload( $img, $dumpContents );
696 private function writeUpload( $file, $dumpContents =
false ) {
697 if ( $file->isOld() ) {
699 '@phan-var OldLocalFile $file';
701 Xml::element(
'archivename',
null, $file->getArchiveName() ) .
"\n";
705 if ( $dumpContents ) {
706 $be = $file->getRepo()->getBackend();
707 # Dump file as base64
708 # Uses only XML-safe characters, so does not need escaping
709 # @todo Too bad this loads the contents into memory (script might swap)
710 $contents =
' <contents encoding="base64">' .
711 chunk_split( base64_encode(
712 $be->getFileContents( [
'src' => $file->getPath() ] ) ) ) .
717 $uploader = $file->getUploader( File::FOR_PUBLIC );
719 $uploader = $this->
writeContributor( $uploader->getId(), $uploader->getName() );
721 $uploader =
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
723 $comment = $file->getDescription( File::FOR_PUBLIC );
724 if ( ( $comment ??
'' ) !==
'' ) {
725 $comment = Xml::elementClean(
'comment',
null, $comment );
727 $comment =
Xml::element(
'comment', [
'deleted' =>
'deleted' ] );
729 return " <upload>\n" .
732 " " . $comment .
"\n" .
733 " " .
Xml::element(
'filename',
null, $file->getName() ) .
"\n" .
735 " " .
Xml::element(
'src',
null, $file->getCanonicalUrl() ) .
"\n" .
736 " " .
Xml::element(
'size',
null, (
string)( $file->getSize() ?: 0 ) ) .
"\n" .
737 " " . Xml::
element(
'sha1base36', null, $file->getSha1() ) .
"\n" .
738 " " . Xml::
element(
'rel', null, $file->getRel() ) .
"\n" .
764 if ( $prefix !==
'' ) {
768 return $prefix . $title->
getText();
773class_alias( XmlDumpWriter::class,
'XmlDumpWriter' );
const MW_VERSION
The running version of MediaWiki.
const XML_DUMP_SCHEMA_VERSION_11
const XML_DUMP_SCHEMA_VERSION_10
const CONTENT_MODEL_UNKNOWN
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Content object implementation for representing flat text.
Exception thrown when an unregistered content model is requested.
Class to simplify the use of log pages.
A class containing constants representing the names of configuration variables.
const CapitalLinks
Name constant for the CapitalLinks setting, for use with Config::get()
const DBname
Name constant for the DBname setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
Content objects represent page content, e.g.