33use Wikimedia\Assert\Assert;
91 $contentMode = self::WRITE_CONTENT,
95 in_array(
$contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ],
true ),
97 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
103 'must be one of the following schema versions: '
104 . implode(
',', self::$supportedSchemas )
109 $this->hookRunner =
new HookRunner( MediaWikiServices::getInstance()->getHookContainer() );
123 $ver = $this->schemaVersion;
124 return Xml::element(
'mediawiki', [
125 'xmlns' =>
"http://www.mediawiki.org/xml/export-$ver/",
126 'xmlns:xsi' =>
"http://www.w3.org/2001/XMLSchema-instance",
137 'xsi:schemaLocation' =>
"http://www.mediawiki.org/xml/export-$ver/ " .
138 "http://www.mediawiki.org/xml/export-$ver.xsd",
140 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
157 return " <siteinfo>\n " .
158 implode(
"\n ", $info ) .
167 return Xml::element(
'sitename', [],
$wgSitename );
175 return Xml::element(
'dbname', [],
$wgDBname );
182 return Xml::element(
'generator', [],
'MediaWiki ' .
MW_VERSION );
189 return Xml::element(
'base', [], Title::newMainPage()->getCanonicalURL() );
199 return Xml::element(
'case', [], $sensitivity );
206 $spaces =
"<namespaces>\n";
207 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
209 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
213 Xml::element(
'namespace',
216 'case' => $nsInfo->isCapitalized( $ns )
217 ?
'first-letter' :
'case-sensitive',
220 $spaces .=
" </namespaces>";
231 return "</mediawiki>\n";
243 $this->currentTitle = Title::newFromRow( $row );
244 $canonicalTitle = self::canonicalTitle( $this->currentTitle );
245 $out .=
' ' . Xml::elementClean(
'title', [], $canonicalTitle ) .
"\n";
246 $out .=
' ' . Xml::element(
'ns', [], strval( $row->page_namespace ) ) .
"\n";
247 $out .=
' ' . Xml::element(
'id', [], strval( $row->page_id ) ) .
"\n";
248 if ( $row->page_is_redirect ) {
249 $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $this->currentTitle );
251 static function () use ( $page ) {
252 return $page->getRedirectTarget();
254 'Failed to get redirect target of page ' . $page->getId()
258 $out .= Xml::element(
'redirect', [
'title' => self::canonicalTitle( $redirect ) ] );
263 if ( $row->page_restrictions !=
'' ) {
264 $out .=
' ' . Xml::element(
'restrictions', [],
265 strval( $row->page_restrictions ) ) .
"\n";
268 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
280 if ( $this->currentTitle !==
null ) {
281 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
284 $linkCache->clearLink( $this->currentTitle );
293 return MediaWikiServices::getInstance()->getRevisionStore();
301 return MediaWikiServices::getInstance()->getBlobStore();
320 }
catch ( Exception $ex ) {
321 if ( $ex instanceof
MWException || $ex instanceof RuntimeException ||
322 $ex instanceof InvalidArgumentException ) {
323 MWDebug::warning( $warning .
': ' . $ex->getMessage() );
350 $out =
" <revision>\n";
351 $out .=
" " . Xml::element(
'id',
null, strval( $rev->getId() ) ) .
"\n";
353 if ( $rev->getParentId() ) {
354 $out .=
" " . Xml::element(
'parentid',
null, strval( $rev->getParentId() ) ) .
"\n";
359 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
360 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
363 $user = $rev->getUser();
365 $user ? $user->getId() : 0,
366 $user ? $user->getName() :
''
370 if ( $rev->isMinor() ) {
371 $out .=
" <minor/>\n";
373 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
374 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
376 if ( $rev->getComment()->text !=
'' ) {
379 . Xml::elementClean(
'comment', [], strval( $rev->getComment()->text ) )
384 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
385 : $this->contentMode;
387 $slots = $rev->getSlots()->getSlots();
393 foreach ( $slots as $role => $slot ) {
394 if ( $role === SlotRecord::MAIN ) {
400 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
401 $out .=
" <sha1/>\n";
404 static function () use ( $rev ) {
405 return $rev->getSha1();
407 'failed to determine sha1 for revision ' . $rev->getId()
409 $out .=
" " . Xml::element(
'sha1',
null, strval( $sha1 ) ) .
"\n";
418 static function () use ( $rev ) {
419 return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
421 'Failed to load main slot content of revision ' . $rev->getId()
426 $this->hookRunner->onXmlDumpWriterWriteRevision( $writer, $out, $row, $text, $rev );
428 $out .=
" </revision>\n";
440 $isMain = $slot->
getRole() === SlotRecord::MAIN;
443 if ( !$isV11 && !$isMain ) {
453 $out .=
' ' . Xml::openElement(
'content' ) .
"\n";
455 $out .= $indent . Xml::element(
'role',
null, strval( $slot->
getRole() ) ) .
"\n";
459 $out .= $indent . Xml::element(
'origin',
null, strval( $slot->
getOrigin() ) ) .
"\n";
463 $contentHandler = MediaWikiServices::getInstance()
464 ->getContentHandlerFactory()
465 ->getContentHandler( $contentModel );
466 $contentFormat = $contentHandler->getDefaultFormat();
470 $out .= $indent . Xml::element(
'model',
null, strval( $contentModel ) ) .
"\n";
471 $out .= $indent . Xml::element(
'format',
null, strval( $contentFormat ) ) .
"\n";
475 static function () use ( $slot ) {
478 'failed to determine size for slot ' . $slot->
getRole() .
' of revision '
485 static function () use ( $slot ) {
488 'failed to determine sha1 for slot ' . $slot->
getRole() .
' of revision '
495 static function () use ( $slot ) {
498 'failed to load content for slot ' . $slot->
getRole() .
' of revision '
503 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
507 } elseif (
$contentMode === self::WRITE_STUB_DELETED ) {
509 $textAttributes[
'deleted'] =
'deleted';
510 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
514 $textAttributes[
'location'] = $slot->
getAddress();
528 }
catch ( InvalidArgumentException $ex ) {
529 MWDebug::warning(
'Bad content address for slot ' . $slot->
getRole()
530 .
' of revision ' . $slot->
getRevision() .
': ' . $ex->getMessage() );
534 if ( is_int( $textId ) ) {
535 $textAttributes[
'id'] = $textId;
539 $out .= $indent . Xml::element(
'text', $textAttributes ) .
"\n";
543 $out .=
' ' . Xml::closeElement(
'content' ) .
"\n";
559 $contentHandler =
$content->getContentHandler();
560 $contentFormat = $contentHandler->getDefaultFormat();
567 $data =
$content->serialize( $contentFormat );
570 $data = $contentHandler->exportTransform( $data, $contentFormat );
571 $textAttributes[
'bytes'] = $size = strlen( $data );
572 $textAttributes[
'xml:space'] =
'preserve';
573 $out .= $indent . Xml::elementClean(
'text', $textAttributes, strval( $data ) ) .
"\n";
586 $out =
" <logitem>\n";
587 $out .=
" " . Xml::element(
'id',
null, strval( $row->log_id ) ) .
"\n";
592 $out .=
" " . Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
598 $out .=
" " . Xml::element(
'comment', [
'deleted' =>
'deleted' ] ) .
"\n";
600 $comment = CommentStore::getStore()->getComment(
'log_comment', $row )->text;
601 if ( $comment !=
'' ) {
603 $out .=
" " . Xml::elementClean(
'comment',
null, strval( $comment ) ) .
"\n";
607 $out .=
" " . Xml::element(
'type',
null, strval( $row->log_type ) ) .
"\n";
608 $out .=
" " . Xml::element(
'action',
null, strval( $row->log_action ) ) .
"\n";
611 $out .=
" " . Xml::element(
'text', [
'deleted' =>
'deleted' ] ) .
"\n";
613 $title = Title::makeTitle( $row->log_namespace, $row->log_title );
614 $out .=
" " . Xml::elementClean(
'logtitle',
null, self::canonicalTitle(
$title ) ) .
"\n";
615 $out .=
" " . Xml::elementClean(
'params',
616 [
'xml:space' =>
'preserve' ],
617 strval( $row->log_params ) ) .
"\n";
620 $out .=
" </logitem>\n";
632 return $indent . Xml::element(
'timestamp',
null, $ts ) .
"\n";
642 $out = $indent .
"<contributor>\n";
643 if ( $id || !IPUtils::isValid( $text ) ) {
644 $out .= $indent .
" " . Xml::elementClean(
'username',
null, strval( $text ) ) .
"\n";
645 $out .= $indent .
" " . Xml::element(
'id',
null, strval( $id ) ) .
"\n";
647 $out .= $indent .
" " . Xml::elementClean(
'ip',
null, strval( $text ) ) .
"\n";
649 $out .= $indent .
"</contributor>\n";
660 if ( $row->page_namespace ==
NS_FILE ) {
661 $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
662 ->newFile( $row->page_title );
663 if ( $img && $img->exists() ) {
665 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
681 if (
$file->isOld() ) {
683 '@phan-var OldLocalFile $file';
685 Xml::element(
'archivename',
null,
$file->getArchiveName() ) .
"\n";
689 if ( $dumpContents ) {
690 $be =
$file->getRepo()->getBackend();
691 # Dump file as base64
692 # Uses only XML-safe characters, so does not need escaping
693 # @todo Too bad this loads the contents into memory (script might swap)
694 $contents =
' <contents encoding="base64">' .
695 chunk_split( base64_encode(
696 $be->getFileContents( [
'src' =>
$file->getPath() ] ) ) ) .
701 $uploader =
$file->getUploader( File::FOR_PUBLIC );
703 $uploader = $this->
writeContributor( $uploader->getId(), $uploader->getName() );
705 $uploader = Xml::element(
'contributor', [
'deleted' =>
'deleted' ] ) .
"\n";
707 $comment =
$file->getDescription( File::FOR_PUBLIC );
709 $comment = Xml::elementClean(
'comment',
null, $comment );
711 $comment = Xml::element(
'comment', [
'deleted' =>
'deleted' ] );
713 return " <upload>\n" .
716 " " . $comment .
"\n" .
717 " " . Xml::element(
'filename',
null,
$file->getName() ) .
"\n" .
719 " " . Xml::element(
'src',
null,
$file->getCanonicalUrl() ) .
"\n" .
720 " " . Xml::element(
'size',
null,
$file->getSize() ) .
"\n" .
721 " " . Xml::element(
'sha1base36',
null,
$file->getSha1() ) .
"\n" .
722 " " . Xml::element(
'rel',
null,
$file->getRel() ) .
"\n" .
738 if (
$title->isExternal() ) {
739 return $title->getPrefixedText();
742 $prefix = MediaWikiServices::getInstance()->getContentLanguage()->
743 getFormattedNsText(
$title->getNamespace() );
748 if ( $prefix !==
'' ) {
752 return $prefix .
$title->getText();
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgDBname
Current wiki database name.
$wgSitename
Name of the site.
const MW_VERSION
The running version of MediaWiki.
const XML_DUMP_SCHEMA_VERSION_11
const XML_DUMP_SCHEMA_VERSION_10
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Content object implementation for representing flat text.
Represents a title within MediaWiki.
isValidRedirectTarget()
Check if this Title is a valid redirect target.
closeStream()
Closes the output stream with the closing root element.
__construct( $contentMode=self::WRITE_CONTENT, $schemaVersion=XML_DUMP_SCHEMA_VERSION_11)
static string[] $supportedSchemas
the schema versions supported for output @final
const WRITE_STUB_DELETED
Only output subs for revision content, indicating that the content has been deleted/suppressed.
static canonicalTitle(Title $title)
Return prefixed text form of title, but using the content language's canonical namespace.
int $contentMode
Whether to output revision content or just stubs.
const WRITE_STUB
Only output subs for revision content.
string $schemaVersion
which schema version the generated XML should comply to.
writeUpload( $file, $dumpContents=false)
invokeLenient( $callback, $warning)
Invokes the given callback, catching and logging any storage related exceptions.
writeLogItem( $row)
Dumps a "<logitem>" section on the output stream, with data filled in from the given database row.
writeTimestamp( $timestamp, $indent=" ")
const WRITE_CONTENT
Output serialized revision content.
writeUploads( $row, $dumpContents=false)
Warning! This data is potentially inconsistent.
closePage()
Closes a "<page>" section on the output stream.
openStream()
Opens the XML output stream's root "<mediawiki>" element.
writeRevision( $row, $slotRows=null)
Dumps a "<revision>" section on the output stream, with data filled in from the given database row.
openPage( $row)
Opens a "<page>" section on the output stream, with data from the given database row.
writeText(Content $content, $textAttributes, $indent)
Title null $currentTitle
Title of the currently processed page.
writeSlot(SlotRecord $slot, $contentMode)
writeContributor( $id, $text, $indent=" ")
Base interface for content objects.
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.