15use InvalidArgumentException;
39use Wikimedia\Assert\Assert;
41use Wikimedia\Timestamp\TimestampFormat as TS;
58 private const WRITE_STUB_DELETED = 2;
74 private $schemaVersion;
81 private $currentTitle =
null;
92 private $commentStore;
103 $contentMode = self::WRITE_CONTENT,
109 in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ],
true ),
111 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
115 in_array( $schemaVersion, self::$supportedSchemas,
true ),
117 'must be one of the following schema versions: '
118 . implode(
',', self::$supportedSchemas )
121 $this->contentMode = $contentMode;
122 $this->schemaVersion = $schemaVersion;
140 $ver = $this->schemaVersion;
142 'xmlns' =>
"http://www.mediawiki.org/xml/export-$ver/",
143 'xmlns:xsi' =>
"http://www.w3.org/2001/XMLSchema-instance",
154 'xsi:schemaLocation' =>
"http://www.mediawiki.org/xml/export-$ver/ " .
155 "http://www.mediawiki.org/xml/export-$ver.xsd",
166 private function siteInfo() {
172 $this->caseSetting(),
173 $this->namespaces() ];
174 return " <siteinfo>\n " .
175 implode(
"\n ", $info ) .
182 private function sitename() {
191 private function dbname() {
199 private function generator() {
206 private function homelink() {
207 return Xml::element(
'base', [], Title::newMainPage()->getCanonicalURL() );
213 private function caseSetting() {
217 $sensitivity = $capitalLinks ?
'first-letter' :
'case-sensitive';
224 private function namespaces() {
225 $spaces =
"<namespaces>\n";
235 'case' => $nsInfo->isCapitalized( $ns )
236 ?
'first-letter' :
'case-sensitive',
239 $spaces .=
" </namespaces>";
250 return "</mediawiki>\n";
262 $this->currentTitle = Title::newFromRow( $row );
264 $out .=
' ' . Xml::elementClean(
'title', [], $canonicalTitle ) .
"\n";
265 $out .=
' ' .
Xml::element(
'ns', [], strval( $row->page_namespace ) ) .
"\n";
266 $out .=
' ' .
Xml::element(
'id', [], strval( $row->page_id ) ) .
"\n";
267 if ( $row->page_is_redirect ) {
269 $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle );
270 $redirectStore = $services->getRedirectStore();
271 $redirect = $this->invokeLenient(
272 static function () use ( $page, $redirectStore ) {
273 return $redirectStore->getRedirectTarget( $page );
275 'Failed to get redirect target of page ' . $page->getId()
277 $redirect = Title::castFromLinkTarget( $redirect );
280 $out .=
Xml::element(
'redirect', [
'title' => self::canonicalTitle( $redirect ) ] );
284 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
296 if ( $this->currentTitle !==
null ) {
300 $linkCache->clearLink( $this->currentTitle );
308 private function getRevisionStore() {
315 private function getBlobStore() {
329 private function invokeLenient( $callback, $warning ) {
332 }
catch ( SuppressedDataException ) {
334 }
catch ( MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) {
335 MWDebug::warning( $warning .
': ' . $ex->getMessage() );
351 $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots(
358 $out =
" <revision>\n";
359 $out .=
" " .
Xml::element(
'id',
null, strval( $rev->getId() ) ) .
"\n";
361 if ( $rev->getParentId() ) {
362 $out .=
" " .
Xml::element(
'parentid',
null, strval( $rev->getParentId() ) ) .
"\n";
367 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
368 $out .=
" " .
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
371 $user = $rev->getUser();
373 $user ? $user->getId() : 0,
374 $user ? $user->getName() :
''
378 if ( $rev->isMinor() ) {
379 $out .=
" <minor/>\n";
381 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
382 $out .=
" " .
Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
384 if ( $rev->getComment()->text !=
'' ) {
386 . Xml::elementClean(
'comment', [], strval( $rev->getComment()->text ) )
391 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
392 : $this->contentMode;
394 $slots = $rev->getSlots()->getSlots();
398 $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode );
400 foreach ( $slots as $role => $slot ) {
401 if ( $role === SlotRecord::MAIN ) {
404 $out .= $this->writeSlot( $slot, $contentMode );
407 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
408 $out .=
" <sha1/>\n";
410 $sha1 = $this->invokeLenient(
411 static function () use ( $rev ) {
412 return $rev->getSha1();
414 'failed to determine sha1 for revision ' . $rev->getId()
416 $out .=
" " .
Xml::element(
'sha1',
null, strval( $sha1 ) ) .
"\n";
420 if ( $contentMode === self::WRITE_CONTENT ) {
422 $content = $this->invokeLenient(
423 static function () use ( $rev ) {
424 return $rev->getMainContentRaw();
426 'Failed to load main slot content of revision ' . $rev->getId()
429 $text = $content ? $content->serialize() :
'';
431 $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev );
433 $out .=
" </revision>\n";
444 private function writeSlot(
SlotRecord $slot, $contentMode ) {
445 $isMain = $slot->
getRole() === SlotRecord::MAIN;
448 if ( !$isV11 && !$isMain ) {
469 $contentHandler =
null;
472 $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel );
474 }
catch ( UnknownContentModelException ) {
479 MWDebug::warning(
'Revision ' . $slot->
getRevision() .
' is using an unknown '
480 .
' content model, falling back to FallbackContentHandler.' );
482 $contentHandler = $contentHandlerFactory->getContentHandler( $contentModel );
484 $contentFormat = $contentHandler->getDefaultFormat();
488 $out .= $indent .
Xml::element(
'model',
null, strval( $contentModel ) ) .
"\n";
489 $out .= $indent .
Xml::element(
'format',
null, strval( $contentFormat ) ) .
"\n";
492 'bytes' => $this->invokeLenient(
493 static function () use ( $slot ) {
496 'failed to determine size for slot ' . $slot->
getRole() .
' of revision '
502 $textAttributes[
'sha1'] = $this->invokeLenient(
503 static function () use ( $slot ) {
506 'failed to determine sha1 for slot ' . $slot->
getRole() .
' of revision '
511 if ( $contentMode === self::WRITE_CONTENT ) {
512 $content = $this->invokeLenient(
513 static function () use ( $slot ) {
516 'failed to load content for slot ' . $slot->
getRole() .
' of revision '
520 if ( $content ===
null ) {
521 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
523 $out .= $this->writeText( $content, $textAttributes, $indent, $contentHandler, $contentFormat );
525 } elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
527 $textAttributes[
'deleted'] =
'deleted';
528 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
532 $textAttributes[
'location'] = $slot->
getAddress();
546 [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->
getAddress() );
547 }
catch ( InvalidArgumentException $ex ) {
548 MWDebug::warning(
'Bad content address for slot ' . $slot->
getRole()
549 .
' of revision ' . $slot->
getRevision() .
': ' . $ex->getMessage() );
553 if ( $schema ===
'tt' ) {
554 $textAttributes[
'id'] = $textId;
555 } elseif ( $schema ===
'es' ) {
556 $textAttributes[
'id'] = bin2hex( $textId );
560 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
582 private function writeText(
586 ContentHandler $contentHandler,
587 string $contentFormat
589 if ( $content instanceof TextContent ) {
592 $data = $content->getText();
594 $data = $content->serialize( $contentFormat );
597 $data = $contentHandler->exportTransform( $data, $contentFormat );
599 $textAttributes[
'bytes'] = strlen( $data );
600 $textAttributes[
'xml:space'] =
'preserve';
601 return $indent . Xml::elementClean(
'text', $textAttributes, strval( $data ) ) .
"\n";
612 $out =
" <logitem>\n";
613 $out .=
" " .
Xml::element(
'id',
null, strval( $row->log_id ) ) .
"\n";
617 if ( $row->log_deleted & LogPage::DELETED_USER ) {
618 $out .=
" " .
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
623 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
624 $out .=
" " .
Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
626 $comment = $this->commentStore->getComment(
'log_comment', $row )->text;
627 if ( $comment !=
'' ) {
628 $out .=
" " . Xml::elementClean(
'comment',
null, strval( $comment ) ) .
"\n";
632 $out .=
" " .
Xml::element(
'type',
null, strval( $row->log_type ) ) .
"\n";
633 $out .=
" " .
Xml::element(
'action',
null, strval( $row->log_action ) ) .
"\n";
635 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
636 $out .=
" " .
Xml::element(
'text', [
'deleted' =>
'deleted' ] ) .
"\n";
639 $out .=
" " . Xml::elementClean(
'logtitle',
null, self::canonicalTitle( $title ) ) .
"\n";
640 $out .=
" " . Xml::elementClean(
'params',
641 [
'xml:space' =>
'preserve' ],
642 strval( $row->log_params ) ) .
"\n";
645 $out .=
" </logitem>\n";
657 return $indent .
Xml::element(
'timestamp',
null, $ts ) .
"\n";
667 $out = $indent .
"<contributor>\n";
668 if ( $id || !IPUtils::isValid( $text ) ) {
669 $out .= $indent .
" " . Xml::elementClean(
'username',
null, strval( $text ) ) .
"\n";
670 $out .= $indent .
" " .
Xml::element(
'id',
null, strval( $id ) ) .
"\n";
672 $out .= $indent .
" " . Xml::elementClean(
'ip',
null, strval( $text ) ) .
"\n";
674 $out .= $indent .
"</contributor>\n";
685 if ( $row->page_namespace ==
NS_FILE ) {
687 ->newFile( $row->page_title );
688 if ( $img && $img->exists() ) {
690 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
691 $out .= $this->writeUpload( $ver, $dumpContents );
693 $out .= $this->writeUpload( $img, $dumpContents );
705 private function writeUpload( $file, $dumpContents =
false ) {
706 if ( $file->isOld() ) {
708 '@phan-var OldLocalFile $file';
710 Xml::element(
'archivename',
null, $file->getArchiveName() ) .
"\n";
714 if ( $dumpContents ) {
715 $be = $file->getRepo()->getBackend();
716 # Dump file as base64
717 # Uses only XML-safe characters, so does not need escaping
718 # @todo Too bad this loads the contents into memory (script might swap)
719 $contents =
' <contents encoding="base64">' .
720 chunk_split( base64_encode(
721 $be->getFileContents( [
'src' => $file->getPath() ] ) ) ) .
726 $uploader = $file->getUploader( File::FOR_PUBLIC );
728 $uploader = $this->
writeContributor( $uploader->getId(), $uploader->getName() );
730 $uploader =
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
732 $comment = $file->getDescription( File::FOR_PUBLIC );
733 if ( ( $comment ??
'' ) !==
'' ) {
734 $comment = Xml::elementClean(
'comment',
null, $comment );
736 $comment =
Xml::element(
'comment', [
'deleted' =>
'deleted' ] );
738 return " <upload>\n" .
741 " " . $comment .
"\n" .
742 " " .
Xml::element(
'filename',
null, $file->getName() ) .
"\n" .
744 " " .
Xml::element(
'src',
null, $file->getCanonicalUrl() ) .
"\n" .
745 " " .
Xml::element(
'size',
null, (
string)( $file->getSize() ?: 0 ) ) .
"\n" .
746 " " . Xml::
element(
'sha1base36', null, $file->getSha1() ) .
"\n" .
747 " " . Xml::
element(
'rel', null, $file->getRel() ) .
"\n" .
773 if ( $prefix !==
'' ) {
777 return $prefix . $title->
getText();
782class_alias( XmlDumpWriter::class,
'XmlDumpWriter' );
const MW_VERSION
The running version of MediaWiki.
const XML_DUMP_SCHEMA_VERSION_11
const XML_DUMP_SCHEMA_VERSION_10
const CONTENT_MODEL_UNKNOWN
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Base class for content handling.
Content object implementation for representing flat text.
Exception thrown when an unregistered content model is requested.
Class to simplify the use of log pages.
A class containing constants representing the names of configuration variables.
const CapitalLinks
Name constant for the CapitalLinks setting, for use with Config::get()
const DBname
Name constant for the DBname setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
Content objects represent page content, e.g.