38use Wikimedia\Assert\Assert;
56 private const WRITE_STUB_DELETED = 2;
62 public static $supportedSchemas = [
72 private $schemaVersion;
79 private $currentTitle =
null;
90 private $commentStore;
101 $contentMode = self::WRITE_CONTENT,
107 in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ],
true ),
109 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
113 in_array( $schemaVersion, self::$supportedSchemas,
true ),
115 'must be one of the following schema versions: '
116 . implode(
',', self::$supportedSchemas )
119 $this->contentMode = $contentMode;
120 $this->schemaVersion = $schemaVersion;
122 $hookContainer ?? MediaWikiServices::getInstance()->getHookContainer()
124 $this->commentStore = $commentStore ?? MediaWikiServices::getInstance()->getCommentStore();
138 $ver = $this->schemaVersion;
139 return Xml::element(
'mediawiki', [
140 'xmlns' =>
"http://www.mediawiki.org/xml/export-$ver/",
141 'xmlns:xsi' =>
"http://www.w3.org/2001/XMLSchema-instance",
152 'xsi:schemaLocation' =>
"http://www.mediawiki.org/xml/export-$ver/ " .
153 "http://www.mediawiki.org/xml/export-$ver.xsd",
155 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
164 private function siteInfo() {
170 $this->caseSetting(),
171 $this->namespaces() ];
172 return " <siteinfo>\n " .
173 implode(
"\n ", $info ) .
180 private function sitename() {
181 $sitename = MediaWikiServices::getInstance()->getMainConfig()->get(
182 MainConfigNames::Sitename );
189 private function dbname() {
190 $dbname = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DBname );
197 private function generator() {
204 private function homelink() {
205 return Xml::element(
'base', [], Title::newMainPage()->getCanonicalURL() );
211 private function caseSetting() {
212 $capitalLinks = MediaWikiServices::getInstance()->getMainConfig()->get(
213 MainConfigNames::CapitalLinks );
215 $sensitivity = $capitalLinks ?
'first-letter' :
'case-sensitive';
222 private function namespaces() {
223 $spaces =
"<namespaces>\n";
224 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
226 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
233 'case' => $nsInfo->isCapitalized( $ns )
234 ?
'first-letter' :
'case-sensitive',
237 $spaces .=
" </namespaces>";
248 return "</mediawiki>\n";
260 $this->currentTitle = Title::newFromRow( $row );
261 $canonicalTitle = self::canonicalTitle( $this->currentTitle );
262 $out .=
' ' . Xml::elementClean(
'title', [], $canonicalTitle ) .
"\n";
263 $out .=
' ' . Xml::element(
'ns', [], strval( $row->page_namespace ) ) .
"\n";
264 $out .=
' ' . Xml::element(
'id', [], strval( $row->page_id ) ) .
"\n";
265 if ( $row->page_is_redirect ) {
266 $services = MediaWikiServices::getInstance();
267 $page = $services->getWikiPageFactory()->newFromTitle( $this->currentTitle );
268 $redirectStore = $services->getRedirectStore();
269 $redirect = $this->invokeLenient(
270 static function () use ( $page, $redirectStore ) {
271 return $redirectStore->getRedirectTarget( $page );
273 'Failed to get redirect target of page ' . $page->getId()
277 $out .= Xml::element(
'redirect', [
'title' => self::canonicalTitle( $redirect ) ] );
281 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
293 if ( $this->currentTitle !==
null ) {
294 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
297 $linkCache->clearLink( $this->currentTitle );
305 private function getRevisionStore() {
306 return MediaWikiServices::getInstance()->getRevisionStore();
312 private function getBlobStore() {
314 return MediaWikiServices::getInstance()->getBlobStore();
326 private function invokeLenient( $callback, $warning ) {
331 }
catch (
MWException | RuntimeException | InvalidArgumentException | ErrorException $ex ) {
348 $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots(
355 $out =
" <revision>\n";
356 $out .=
" " . Xml::element(
'id',
null, strval( $rev->getId() ) ) .
"\n";
358 if ( $rev->getParentId() ) {
359 $out .=
" " . Xml::element(
'parentid',
null, strval( $rev->getParentId() ) ) .
"\n";
364 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
365 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
368 $user = $rev->getUser();
370 $user ? $user->getId() : 0,
371 $user ? $user->getName() :
''
375 if ( $rev->isMinor() ) {
376 $out .=
" <minor/>\n";
378 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
379 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
381 if ( $rev->getComment()->text !=
'' ) {
383 . Xml::elementClean(
'comment', [], strval( $rev->getComment()->text ) )
388 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
389 : $this->contentMode;
391 $slots = $rev->getSlots()->getSlots();
395 $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode );
397 foreach ( $slots as $role => $slot ) {
398 if ( $role === SlotRecord::MAIN ) {
401 $out .= $this->writeSlot( $slot, $contentMode );
404 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
405 $out .=
" <sha1/>\n";
407 $sha1 = $this->invokeLenient(
408 static function () use ( $rev ) {
409 return $rev->getSha1();
411 'failed to determine sha1 for revision ' . $rev->getId()
413 $out .=
" " . Xml::element(
'sha1',
null, strval( $sha1 ) ) .
"\n";
417 if ( $contentMode === self::WRITE_CONTENT ) {
419 $content = $this->invokeLenient(
420 static function () use ( $rev ) {
421 return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
423 'Failed to load main slot content of revision ' . $rev->getId()
426 $text = $content ? $content->
serialize() :
'';
428 $this->hookRunner->onXmlDumpWriterWriteRevision( $this, $out, $row, $text, $rev );
430 $out .=
" </revision>\n";
441 private function writeSlot(
SlotRecord $slot, $contentMode ) {
442 $isMain = $slot->
getRole() === SlotRecord::MAIN;
445 if ( !$isV11 && !$isMain ) {
465 $contentHandler = MediaWikiServices::getInstance()
466 ->getContentHandlerFactory()
467 ->getContentHandler( $contentModel );
468 $contentFormat = $contentHandler->getDefaultFormat();
472 $out .= $indent .
Xml::element(
'model',
null, strval( $contentModel ) ) .
"\n";
473 $out .= $indent .
Xml::element(
'format',
null, strval( $contentFormat ) ) .
"\n";
476 'bytes' => $this->invokeLenient(
477 static function () use ( $slot ) {
480 'failed to determine size for slot ' . $slot->
getRole() .
' of revision '
486 $textAttributes[
'sha1'] = $this->invokeLenient(
487 static function () use ( $slot ) {
490 'failed to determine sha1 for slot ' . $slot->
getRole() .
' of revision '
495 if ( $contentMode === self::WRITE_CONTENT ) {
496 $content = $this->invokeLenient(
497 static function () use ( $slot ) {
500 'failed to load content for slot ' . $slot->
getRole() .
' of revision '
504 if ( $content ===
null ) {
505 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
507 $out .= $this->writeText( $content, $textAttributes, $indent );
509 } elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
511 $textAttributes[
'deleted'] =
'deleted';
512 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
516 $textAttributes[
'location'] = $slot->
getAddress();
529 $textId = $this->getBlobStore()->getTextIdFromAddress( $slot->
getAddress() );
530 }
catch ( InvalidArgumentException $ex ) {
532 .
' of revision ' . $slot->
getRevision() .
': ' . $ex->getMessage() );
536 if ( is_int( $textId ) ) {
537 $textAttributes[
'id'] = $textId;
541 $out .= $indent .
Xml::element(
'text', $textAttributes ) .
"\n";
558 private function writeText(
Content $content, $textAttributes, $indent ) {
560 $contentFormat = $contentHandler->getDefaultFormat();
565 $data = $content->getText();
567 $data = $content->
serialize( $contentFormat );
570 $data = $contentHandler->exportTransform( $data, $contentFormat );
572 $textAttributes[
'bytes'] = strlen( $data );
573 $textAttributes[
'xml:space'] =
'preserve';
574 return $indent .
Xml::elementClean(
'text', $textAttributes, strval( $data ) ) .
"\n";
585 $out =
" <logitem>\n";
586 $out .=
" " . Xml::element(
'id',
null, strval( $row->log_id ) ) .
"\n";
590 if ( $row->log_deleted & LogPage::DELETED_USER ) {
591 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
596 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
597 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
599 $comment = $this->commentStore->getComment(
'log_comment', $row )->text;
600 if ( $comment !=
'' ) {
601 $out .=
" " . Xml::elementClean(
'comment',
null, strval( $comment ) ) .
"\n";
605 $out .=
" " . Xml::element(
'type',
null, strval( $row->log_type ) ) .
"\n";
606 $out .=
" " . Xml::element(
'action',
null, strval( $row->log_action ) ) .
"\n";
608 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
609 $out .=
" " . Xml::element(
'text', [
'deleted' =>
'deleted' ] ) .
"\n";
611 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
612 $out .=
" " . Xml::elementClean(
'logtitle',
null, self::canonicalTitle( $title ) ) .
"\n";
613 $out .=
" " . Xml::elementClean(
'params',
614 [
'xml:space' =>
'preserve' ],
615 strval( $row->log_params ) ) .
"\n";
618 $out .=
" </logitem>\n";
630 return $indent . Xml::element(
'timestamp',
null, $ts ) .
"\n";
640 $out = $indent .
"<contributor>\n";
641 if ( $id || !IPUtils::isValid( $text ) ) {
642 $out .= $indent .
" " . Xml::elementClean(
'username',
null, strval( $text ) ) .
"\n";
643 $out .= $indent .
" " . Xml::element(
'id',
null, strval( $id ) ) .
"\n";
645 $out .= $indent .
" " . Xml::elementClean(
'ip',
null, strval( $text ) ) .
"\n";
647 $out .= $indent .
"</contributor>\n";
658 if ( $row->page_namespace ==
NS_FILE ) {
659 $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
660 ->newFile( $row->page_title );
661 if ( $img && $img->exists() ) {
663 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
664 $out .= $this->writeUpload( $ver, $dumpContents );
666 $out .= $this->writeUpload( $img, $dumpContents );
678 private function writeUpload( $file, $dumpContents =
false ) {
679 if ( $file->isOld() ) {
681 '@phan-var OldLocalFile $file';
683 Xml::element(
'archivename',
null, $file->getArchiveName() ) .
"\n";
687 if ( $dumpContents ) {
688 $be = $file->getRepo()->getBackend();
689 # Dump file as base64
690 # Uses only XML-safe characters, so does not need escaping
691 # @todo Too bad this loads the contents into memory (script might swap)
692 $contents =
' <contents encoding="base64">' .
693 chunk_split( base64_encode(
694 $be->getFileContents( [
'src' => $file->getPath() ] ) ) ) .
699 $uploader = $file->getUploader( File::FOR_PUBLIC );
701 $uploader = $this->
writeContributor( $uploader->getId(), $uploader->getName() );
703 $uploader =
Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
705 $comment = $file->getDescription( File::FOR_PUBLIC );
706 if ( ( $comment ??
'' ) !==
'' ) {
709 $comment =
Xml::element(
'comment', [
'deleted' =>
'deleted' ] );
711 return " <upload>\n" .
714 " " . $comment .
"\n" .
715 " " .
Xml::element(
'filename',
null, $file->getName() ) .
"\n" .
717 " " .
Xml::element(
'src',
null, $file->getCanonicalUrl() ) .
"\n" .
718 " " .
Xml::element(
'size',
null, (
string)( $file->getSize() ?: 0 ) ) .
"\n" .
719 " " .
Xml::
element(
'sha1base36', null, $file->getSha1() ) .
"\n" .
720 " " .
Xml::
element(
'rel', null, $file->getRel() ) .
"\n" .
740 $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
746 if ( $prefix !==
'' ) {
750 return $prefix . $title->
getText();
const MW_VERSION
The running version of MediaWiki.
const XML_DUMP_SCHEMA_VERSION_11
const XML_DUMP_SCHEMA_VERSION_10
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static warning( $msg, $callerOffset=1, $level=E_USER_NOTICE, $log='auto')
Adds a warning entry to the log.
A class containing constants representing the names of configuration variables.
Content object implementation for representing flat text.
closeStream()
Closes the output stream with the closing root element.
static string[] $supportedSchemas
the schema versions supported for output @final
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
const WRITE_STUB
Only output subs for revision content.
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
const WRITE_CONTENT
Output serialized revision content.
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
writeRevision( $row, $slotRows=null)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
__construct( $contentMode=self::WRITE_CONTENT, $schemaVersion=XML_DUMP_SCHEMA_VERSION_11, ?HookContainer $hookContainer=null, ?CommentStore $commentStore=null)
writeContributor( $id, $text, $indent=" ")
Module of static functions for generating XML.
static closeElement( $element)
Shortcut to close an XML element.
static openElement( $element, $attribs=null)
This opens an XML element.
static element( $element, $attribs=null, $contents='', $allowShortTag=true)
Format an XML element with given attributes and, optionally, text content.
static elementClean( $element, $attribs=[], $contents='')
Format an XML element as with self::element(), but run text through the content language's normalize(...
Base interface for representing page content.
getContentHandler()
Convenience method that returns the ContentHandler singleton for handling the content model that this...
serialize( $format=null)
Convenience method for serializing this Content object.