64 private const WRITE_STUB_DELETED = 2;
70 public static $supportedSchemas = [
80 private $schemaVersion;
87 private $currentTitle =
null;
98 private $commentStore;
109 $contentMode = self::WRITE_CONTENT,
115 in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ],
true ),
117 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
121 in_array( $schemaVersion, self::$supportedSchemas,
true ),
123 'must be one of the following schema versions: '
124 . implode(
',', self::$supportedSchemas )
127 $this->contentMode = $contentMode;
128 $this->schemaVersion = $schemaVersion;
130 $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer()
132 $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore();
146 $ver = $this->schemaVersion;
147 return Xml::element(
'mediawiki', [
148 'xmlns' =>
"http://www.mediawiki.org/xml/export-$ver/",
149 'xmlns:xsi' =>
"http://www.w3.org/2001/XMLSchema-instance",
160 'xsi:schemaLocation' =>
"http://www.mediawiki.org/xml/export-$ver/ " .
161 "http://www.mediawiki.org/xml/export-$ver.xsd",
163 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
172 private function siteInfo() {
178 $this->caseSetting(),
179 $this->namespaces() ];
180 return " <siteinfo>\n " .
181 implode(
"\n ", $info ) .
188 private function sitename() {
189 $sitename = MediaWikiServices::getInstance()->getMainConfig()->get(
190 MainConfigNames::Sitename );
191 return Xml::element(
'sitename', [], $sitename );
197 private function dbname() {
198 $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname );
199 return Xml::element(
'dbname', [], $dbname );
205 private function generator() {
206 return Xml::element(
'generator', [],
'MediaWiki ' .
MW_VERSION );
212 private function homelink() {
213 return Xml::element(
'base', [], Title::newMainPage()->getCanonicalURL() );
219 private function caseSetting() {
220 $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get(
221 MainConfigNames::CapitalLinks );
223 $sensitivity = $capitalLinks ?
'first-letter' :
'case-sensitive';
224 return Xml::element(
'case', [], $sensitivity );
230 private function namespaces() {
231 $spaces =
"<namespaces>\n";
232 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
234 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
238 Xml::element(
'namespace',
241 'case' => $nsInfo->isCapitalized( $ns )
242 ?
'first-letter' :
'case-sensitive',
245 $spaces .=
" </namespaces>";
256 return "</mediawiki>\n";
268 $this->currentTitle = Title::newFromRow( $row );
269 $canonicalTitle = self::canonicalTitle( $this->currentTitle );
270 $out .=
' ' . Xml::elementClean(
'title', [], $canonicalTitle ) .
"\n";
271 $out .=
' ' . Xml::element(
'ns', [], strval( $row->page_namespace ) ) .
"\n";
272 $out .=
' ' . Xml::element(
'id', [], strval( $row->page_id ) ) .
"\n";
273 if ( $row->page_is_redirect ) {
274 $services = MediaWikiServices::getInstance();
275 $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle );
276 $redirectStore = $services->getRedirectStore();
277 $redirect = $this->invokeLenient(
278 static function () use ( $page, $redirectStore ) {
279 return $redirectStore->getRedirectTarget( $page );
281 'Failed to get redirect target of page ' . $page->getId()
283 $redirect = Title::castFromLinkTarget( $redirect );
286 $out .= Xml::element(
'redirect', [
'title' => self::canonicalTitle( $redirect ) ] );
290 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
302 if ( $this->currentTitle !==
null ) {
303 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
306 $linkCache->clearLink( $this->currentTitle );
314 private function getRevisionStore() {
315 return MediaWikiServices::getInstance()->getRevisionStore();
321 private function getBlobStore() {
323 return MediaWikiServices::getInstance()->getBlobStore();
335 private function invokeLenient( $callback, $warning ) {
340 }
catch (
MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) {
341 MWDebug::warning( $warning .
': ' . $ex->getMessage() );
357 $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots(
364 $out =
" <revision>\n";
365 $out .=
" " . Xml::element(
'id',
null, strval( $rev->getId() ) ) .
"\n";
367 if ( $rev->getParentId() ) {
368 $out .=
" " . Xml::element(
'parentid',
null, strval( $rev->getParentId() ) ) .
"\n";
373 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
374 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
377 $user = $rev->getUser();
379 $user ? $user->getId() : 0,
380 $user ? $user->getName() :
''
384 if ( $rev->isMinor() ) {
385 $out .=
" <minor/>\n";
387 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
388 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
390 if ( $rev->getComment()->text !=
'' ) {
392 . Xml::elementClean(
'comment', [], strval( $rev->getComment()->text ) )
397 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
398 : $this->contentMode;
400 $slots = $rev->getSlots()->getSlots();
404 $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode );
406 foreach ( $slots as $role => $slot ) {
407 if ( $role === SlotRecord::MAIN ) {
410 $out .= $this->writeSlot( $slot, $contentMode );
413 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
414 $out .=
" <sha1/>\n";
416 $sha1 = $this->invokeLenient(
417 static function () use ( $rev ) {
418 return $rev->getSha1();
420 'failed to determine sha1 for revision ' . $rev->getId()
422 $out .=
" " . Xml::element(
'sha1',
null, strval( $sha1 ) ) .
"\n";
426 if ( $contentMode === self::WRITE_CONTENT ) {
428 $content = $this->invokeLenient(
429 static function () use ( $rev ) {
430 return $rev->getMainContentRaw();
432 'Failed to load main slot content of revision ' . $rev->getId()
435 $text = $content ? $content->
serialize() :
'';
437 $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev );
439 $out .=
" </revision>\n";
450 private function writeSlot(
SlotRecord $slot, $contentMode ) {
451 $isMain = $slot->
getRole() === SlotRecord::MAIN;
454 if ( !$isV11 && !$isMain ) {
464 $out .=
' ' . Xml::openElement(
'content' ) .
"\n";
466 $out .= $indent . Xml::element(
'role',
null, strval( $slot->
getRole() ) ) .
"\n";
470 $out .= $indent . Xml::element(
'origin',
null, strval( $slot->
getOrigin() ) ) .
"\n";
474 $contentHandler = MediaWikiServices::getInstance()
475 ->getContentHandlerFactory()
476 ->getContentHandler( $contentModel );
477 $contentFormat = $contentHandler->getDefaultFormat();
481 $out .= $indent . Xml::element(
'model',
null, strval( $contentModel ) ) .
"\n";
482 $out .= $indent . Xml::element(
'format',
null, strval( $contentFormat ) ) .
"\n";
485 'bytes' => $this->invokeLenient(
486 static function () use ( $slot ) {
489 'failed to determine size for slot ' . $slot->
getRole() .
' of revision '
495 $textAttributes[
'sha1'] = $this->invokeLenient(
496 static function () use ( $slot ) {
499 'failed to determine sha1 for slot ' . $slot->
getRole() .
' of revision '
504 if ( $contentMode === self::WRITE_CONTENT ) {
505 $content = $this->invokeLenient(
506 static function () use ( $slot ) {
509 'failed to load content for slot ' . $slot->
getRole() .
' of revision '
513 if ( $content ===
null ) {
514 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
516 $out .= $this->writeText( $content, $textAttributes, $indent );
518 } elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
520 $textAttributes[
'deleted'] =
'deleted';
521 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
525 $textAttributes[
'location'] = $slot->
getAddress();
539 [ $schema, $textId ] = $this->getBlobStore()->splitBlobAddress( $slot->
getAddress() );
540 }
catch ( InvalidArgumentException $ex ) {
541 MWDebug::warning(
'Bad content address for slot ' . $slot->
getRole()
542 .
' of revision ' . $slot->
getRevision() .
': ' . $ex->getMessage() );
546 if ( $schema ===
'tt' ) {
547 $textAttributes[
'id'] = $textId;
548 } elseif ( $schema ===
'es' ) {
549 $textAttributes[
'id'] = bin2hex( $textId );
553 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
557 $out .=
' ' . Xml::closeElement(
'content' ) .
"\n";
570 private function writeText(
Content $content, $textAttributes, $indent ) {
572 $contentFormat = $contentHandler->getDefaultFormat();
577 $data = $content->getText();
579 $data = $content->
serialize( $contentFormat );
582 $data = $contentHandler->exportTransform( $data, $contentFormat );
584 $textAttributes[
'bytes'] = strlen( $data );
585 $textAttributes[
'xml:space'] =
'preserve';
586 return $indent . Xml::elementClean(
'text', $textAttributes, strval( $data ) ) .
"\n";
597 $out =
" <logitem>\n";
598 $out .=
" " . Xml::element(
'id',
null, strval( $row->log_id ) ) .
"\n";
602 if ( $row->log_deleted & LogPage::DELETED_USER ) {
603 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
608 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
609 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
611 $comment = $this->commentStore->getComment(
'log_comment', $row )->text;
612 if ( $comment !=
'' ) {
613 $out .=
" " . Xml::elementClean(
'comment',
null, strval( $comment ) ) .
"\n";
617 $out .=
" " . Xml::element(
'type',
null, strval( $row->log_type ) ) .
"\n";
618 $out .=
" " . Xml::element(
'action',
null, strval( $row->log_action ) ) .
"\n";
620 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
621 $out .=
" " . Xml::element(
'text', [
'deleted' =>
'deleted' ] ) .
"\n";
623 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
624 $out .=
" " . Xml::elementClean(
'logtitle',
null, self::canonicalTitle( $title ) ) .
"\n";
625 $out .=
" " . Xml::elementClean(
'params',
626 [
'xml:space' =>
'preserve' ],
627 strval( $row->log_params ) ) .
"\n";
630 $out .=
" </logitem>\n";
642 return $indent . Xml::element(
'timestamp',
null, $ts ) .
"\n";
652 $out = $indent .
"<contributor>\n";
653 if ( $id || !IPUtils::isValid( $text ) ) {
654 $out .= $indent .
" " . Xml::elementClean(
'username',
null, strval( $text ) ) .
"\n";
655 $out .= $indent .
" " . Xml::element(
'id',
null, strval( $id ) ) .
"\n";
657 $out .= $indent .
" " . Xml::elementClean(
'ip',
null, strval( $text ) ) .
"\n";
659 $out .= $indent .
"</contributor>\n";
670 if ( $row->page_namespace ==
NS_FILE ) {
671 $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
672 ->newFile( $row->page_title );
673 if ( $img && $img->exists() ) {
675 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
676 $out .= $this->writeUpload( $ver, $dumpContents );
678 $out .= $this->writeUpload( $img, $dumpContents );
690 private function writeUpload( $file, $dumpContents =
false ) {
691 if ( $file->isOld() ) {
693 '@phan-var OldLocalFile $file';
695 Xml::element(
'archivename',
null, $file->getArchiveName() ) .
"\n";
699 if ( $dumpContents ) {
700 $be = $file->getRepo()->getBackend();
701 # Dump file as base64
702 # Uses only XML-safe characters, so does not need escaping
703 # @todo Too bad this loads the contents into memory (script might swap)
704 $contents =
' <contents encoding="base64">' .
705 chunk_split( base64_encode(
706 $be->getFileContents( [
'src' => $file->getPath() ] ) ) ) .
711 $uploader = $file->getUploader( File::FOR_PUBLIC );
713 $uploader = $this->
writeContributor( $uploader->getId(), $uploader->getName() );
715 $uploader = Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
717 $comment = $file->getDescription( File::FOR_PUBLIC );
718 if ( ( $comment ??
'' ) !==
'' ) {
719 $comment = Xml::elementClean(
'comment',
null, $comment );
721 $comment = Xml::element(
'comment', [
'deleted' =>
'deleted' ] );
723 return " <upload>\n" .
726 " " . $comment .
"\n" .
727 " " . Xml::element(
'filename',
null, $file->getName() ) .
"\n" .
729 " " . Xml::element(
'src',
null, $file->getCanonicalUrl() ) .
"\n" .
730 " " . Xml::element(
'size',
null, (
string)( $file->getSize() ?: 0 ) ) .
"\n" .
731 " " .
Xml::
element(
'sha1base36', null, $file->getSha1() ) .
"\n" .
732 " " .
Xml::
element(
'rel', null, $file->getRel() ) .
"\n" .
752 $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
758 if ( $prefix !==
'' ) {
762 return $prefix . $title->
getText();