51 private $foreignNamespaces =
null;
54 private $mLogItemCallback;
57 private $mUploadCallback;
60 private $mRevisionCallback;
63 private $mPageCallback;
66 private $mSiteInfoCallback;
69 private $mPageOutCallback;
72 private $mNoticeCallback;
78 private $mImportUploads;
81 private $mImageBasePath;
84 private $mNoUpdates =
false;
87 private $pageOffset = 0;
93 private $importTitleFactory;
99 private $countableCache = [];
102 private $disableStatisticsUpdate =
false;
105 private $externalUserNames;
108 private $contentLanguage;
111 private $namespaceInfo;
114 private $titleFactory;
117 private $wikiPageFactory;
120 private $uploadRevisionImporter;
123 private $permissionManager;
126 private $contentHandlerFactory;
129 private $slotRoleRegistry;
160 $this->config = $config;
161 $this->hookRunner =
new HookRunner( $hookContainer );
162 $this->contentLanguage = $contentLanguage;
163 $this->namespaceInfo = $namespaceInfo;
164 $this->titleFactory = $titleFactory;
165 $this->wikiPageFactory = $wikiPageFactory;
166 $this->uploadRevisionImporter = $uploadRevisionImporter;
167 $this->permissionManager = $permissionManager;
168 $this->contentHandlerFactory = $contentHandlerFactory;
169 $this->slotRoleRegistry = $slotRoleRegistry;
171 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
172 stream_wrapper_register(
'uploadsource', UploadSourceAdapter::class );
179 $oldDisable = @libxml_disable_entity_loader(
false );
180 if ( PHP_VERSION_ID >= 80000 ) {
182 $reader = XMLReader::open(
183 "uploadsource://$id",
null, LIBXML_PARSEHUGE );
184 if ( $reader instanceof XMLReader ) {
185 $this->reader = $reader;
192 $this->reader =
new XMLReader;
193 $status = $this->reader->open(
194 "uploadsource://$id",
null, LIBXML_PARSEHUGE );
197 $error = libxml_get_last_error();
199 @libxml_disable_entity_loader( $oldDisable );
200 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
204 @libxml_disable_entity_loader( $oldDisable );
214 $this->contentLanguage,
215 $this->namespaceInfo,
225 return $this->reader;
232 $this->
debug(
"FAILURE: $err" );
233 wfDebug(
"WikiImporter XML error: $err" );
240 if ( $this->mDebug ) {
248 public function warn( $data ) {
256 public function notice( $msg, ...$params ) {
257 if ( is_callable( $this->mNoticeCallback ) ) {
258 call_user_func( $this->mNoticeCallback, $msg, $params );
279 $this->mNoUpdates = $noupdates;
289 $this->pageOffset = $nthPage;
299 return wfSetVar( $this->mNoticeCallback, $callback );
308 $previous = $this->mPageCallback;
309 $this->mPageCallback = $callback;
323 $previous = $this->mPageOutCallback;
324 $this->mPageOutCallback = $callback;
334 $previous = $this->mRevisionCallback;
335 $this->mRevisionCallback = $callback;
345 $previous = $this->mUploadCallback;
346 $this->mUploadCallback = $callback;
356 $previous = $this->mLogItemCallback;
357 $this->mLogItemCallback = $callback;
367 $previous = $this->mSiteInfoCallback;
368 $this->mSiteInfoCallback = $callback;
378 $this->importTitleFactory = $factory;
387 if ( $namespace ===
null ) {
391 $this->contentLanguage,
392 $this->namespaceInfo,
399 $this->namespaceInfo->exists( intval( $namespace ) )
401 $namespace = intval( $namespace );
404 $this->namespaceInfo,
421 $status = Status::newGood();
422 $nsInfo = $this->namespaceInfo;
423 if ( $rootpage ===
null ) {
427 $this->contentLanguage,
432 } elseif ( $rootpage !==
'' ) {
433 $rootpage = rtrim( $rootpage,
'/' );
434 $title = Title::newFromText( $rootpage );
437 $status->fatal(
'import-rootpage-invalid' );
438 } elseif ( !$nsInfo->hasSubpages(
$title->getNamespace() ) ) {
441 : $this->contentLanguage->getNsText(
$title->getNamespace() );
442 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
462 $this->mImageBasePath = $dir;
469 $this->mImportUploads = $import;
478 $this->externalUserNames =
new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
496 $title = $titleAndForeignTitle[0];
497 $page = $this->wikiPageFactory->newFromTitle(
$title );
498 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
508 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
509 $this->
notice(
'import-error-bad-location',
510 $revision->getTitle()->getPrefixedText(),
512 $revision->getModel(),
513 $revision->getFormat()
520 return $revision->importOldRevision();
522 $this->
notice(
'import-error-unserialize',
523 $revision->getTitle()->getPrefixedText(),
525 $revision->getModel(),
526 $revision->getFormat()
539 return $revision->importLogItem();
548 $status = $this->uploadRevisionImporter->import( $revision );
549 return $status->isGood();
562 $sRevCount, $pageInfo
571 $page = $this->wikiPageFactory->newFromTitle( $pageIdentity );
573 $page->loadPageData( WikiPage::READ_LATEST );
574 $rev = $page->getRevisionRecord();
575 if ( $rev ===
null ) {
577 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' . $pageIdentity .
578 ' because WikiPage::getRevisionRecord() returned null' );
580 $user = RequestContext::getMain()->getUser();
581 $update = $page->newPageUpdater( $user )->prepareUpdate();
582 $countKey =
'title_' . CacheKeyHelper::getKeyForPage( $pageIdentity );
583 $countable = $update->isCountable();
584 if ( array_key_exists( $countKey, $this->countableCache ) &&
585 $countable != $this->countableCache[$countKey] ) {
586 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
587 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
593 $title = Title::castFromPageIdentity( $pageIdentity );
595 return $this->hookRunner->onAfterImportPage(
$title, $foreignTitle,
596 $revCount, $sRevCount, $pageInfo );
604 private function siteInfoCallback( $siteInfo ) {
605 if ( isset( $this->mSiteInfoCallback ) ) {
606 return call_user_func_array(
607 $this->mSiteInfoCallback,
620 if ( isset( $this->mPageCallback ) ) {
621 call_user_func( $this->mPageCallback,
$title );
633 private function pageOutCallback(
PageIdentity $pageIdentity, $foreignTitle, $revCount,
634 $sucCount, $pageInfo ) {
635 if ( isset( $this->mPageOutCallback ) ) {
636 call_user_func_array( $this->mPageOutCallback, func_get_args() );
645 private function revisionCallback( $revision ) {
646 if ( isset( $this->mRevisionCallback ) ) {
647 return call_user_func_array(
648 $this->mRevisionCallback,
661 private function logItemCallback( $revision ) {
662 if ( isset( $this->mLogItemCallback ) ) {
663 return call_user_func_array(
664 $this->mLogItemCallback,
679 return $this->reader->getAttribute( $attr ) ??
'';
690 if ( $this->reader->isEmptyElement ) {
694 while ( $this->reader->read() ) {
695 switch ( $this->reader->nodeType ) {
696 case XMLReader::TEXT:
697 case XMLReader::CDATA:
698 case XMLReader::SIGNIFICANT_WHITESPACE:
699 $buffer .= $this->reader->value;
701 case XMLReader::END_ELEMENT:
706 $this->reader->close();
721 $oldDisable = @libxml_disable_entity_loader(
true );
723 $this->reader->read();
725 if ( $this->reader->localName !=
'mediawiki' ) {
727 @libxml_disable_entity_loader( $oldDisable );
728 $error = libxml_get_last_error();
730 throw new NormalizedException(
"XML error at line {line}: {message}", [
731 'line' => $error->line,
732 'message' => $error->message,
736 "Expected '<mediawiki>' tag, got '<{$this->reader->localName}>' tag."
740 $this->
debug(
"<mediawiki> tag is correct." );
742 $this->
debug(
"Starting primary dump processing loop." );
744 $keepReading = $this->reader->read();
747 while ( $keepReading ) {
748 $tag = $this->reader->localName;
749 if ( $this->pageOffset ) {
750 if ( $tag ===
'page' ) {
753 if ( $pageCount < $this->pageOffset ) {
754 $keepReading = $this->reader->next();
758 $type = $this->reader->nodeType;
760 if ( !$this->hookRunner->onImportHandleToplevelXMLTag( $this ) ) {
762 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
764 } elseif ( $tag ==
'siteinfo' ) {
765 $this->handleSiteInfo();
766 } elseif ( $tag ==
'page' ) {
768 } elseif ( $tag ==
'logitem' ) {
769 $this->handleLogItem();
770 } elseif ( $tag !=
'#text' ) {
771 $this->
warn(
"Unhandled top-level XML tag $tag" );
777 $keepReading = $this->reader->next();
779 $this->
debug(
"Skip" );
781 $keepReading = $this->reader->read();
786 @libxml_disable_entity_loader( $oldDisable );
787 $this->reader->close();
793 private function handleSiteInfo() {
794 $this->debug(
"Enter site info handler." );
798 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
800 while ( $this->reader->read() ) {
801 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
802 $this->reader->localName ==
'siteinfo' ) {
806 $tag = $this->reader->localName;
808 if ( $tag ==
'namespace' ) {
811 } elseif ( in_array( $tag, $normalFields ) ) {
816 $siteInfo[
'_namespaces'] = $this->foreignNamespaces;
817 $this->siteInfoCallback( $siteInfo );
820 private function handleLogItem() {
821 $this->
debug(
"Enter log item handler." );
825 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
826 'logtitle',
'params' ];
828 while ( $this->reader->read() ) {
829 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
830 $this->reader->localName ==
'logitem' ) {
834 $tag = $this->reader->localName;
836 if ( !$this->hookRunner->onImportHandleLogItemXMLTag( $this, $logInfo ) ) {
838 } elseif ( in_array( $tag, $normalFields ) ) {
840 } elseif ( $tag ==
'contributor' ) {
841 $logInfo[
'contributor'] = $this->handleContributor();
842 } elseif ( $tag !=
'#text' ) {
843 $this->
warn(
"Unhandled log-item XML tag $tag" );
847 $this->processLogItem( $logInfo );
854 private function processLogItem( $logInfo ) {
857 if ( isset( $logInfo[
'id'] ) ) {
858 $revision->setID( $logInfo[
'id'] );
860 $revision->setType( $logInfo[
'type'] );
861 $revision->setAction( $logInfo[
'action'] );
862 if ( isset( $logInfo[
'timestamp'] ) ) {
863 $revision->setTimestamp( $logInfo[
'timestamp'] );
865 if ( isset( $logInfo[
'params'] ) ) {
866 $revision->setParams( $logInfo[
'params'] );
868 if ( isset( $logInfo[
'logtitle'] ) ) {
871 $revision->setTitle( Title::newFromText( $logInfo[
'logtitle'] ) );
874 $revision->setNoUpdates( $this->mNoUpdates );
876 if ( isset( $logInfo[
'comment'] ) ) {
877 $revision->setComment( $logInfo[
'comment'] );
880 if ( isset( $logInfo[
'contributor'][
'username'] ) ) {
881 $revision->setUsername(
882 $this->externalUserNames->applyPrefix( $logInfo[
'contributor'][
'username'] )
884 } elseif ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
885 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
887 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
890 return $this->logItemCallback( $revision );
893 private function handlePage() {
895 $this->
debug(
"Enter page handler." );
896 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
899 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
904 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
905 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
906 $this->reader->localName ==
'page' ) {
912 $tag = $this->reader->localName;
917 } elseif ( !$this->hookRunner->onImportHandlePageXMLTag( $this, $pageInfo ) ) {
919 } elseif ( in_array( $tag, $normalFields ) ) {
927 if ( $tag ==
'redirect' ) {
932 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
934 $title = $this->processTitle( $pageInfo[
'title'],
935 $pageInfo[
'ns'] ??
null );
938 if ( is_array(
$title ) ) {
940 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
948 if ( $tag ==
'revision' ) {
949 $this->handleRevision( $pageInfo );
951 $this->handleUpload( $pageInfo );
954 } elseif ( $tag !=
'#text' ) {
955 $this->
warn(
"Unhandled page XML tag $tag" );
965 if ( array_key_exists(
'_title', $pageInfo ) ) {
967 $title = $pageInfo[
'_title'];
968 $this->pageOutCallback(
972 $pageInfo[
'revisionCount'],
973 $pageInfo[
'successfulRevisionCount'],
982 private function handleRevision( &$pageInfo ) {
983 $this->
debug(
"Enter revision handler" );
986 $normalFields = [
'id',
'parentid',
'timestamp',
'comment',
'minor',
'origin',
987 'model',
'format',
'text',
'sha1' ];
991 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
992 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
993 $this->reader->localName ==
'revision' ) {
997 $tag = $this->reader->localName;
999 if ( !$this->hookRunner->onImportHandleRevisionXMLTag(
1000 $this, $pageInfo, $revisionInfo )
1003 } elseif ( in_array( $tag, $normalFields ) ) {
1005 } elseif ( $tag ==
'content' ) {
1007 $revisionInfo[$tag][] = $this->handleContent();
1008 } elseif ( $tag ==
'contributor' ) {
1009 $revisionInfo[
'contributor'] = $this->handleContributor();
1010 } elseif ( $tag !=
'#text' ) {
1011 $this->
warn(
"Unhandled revision XML tag $tag" );
1016 $pageInfo[
'revisionCount']++;
1017 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
1018 $pageInfo[
'successfulRevisionCount']++;
1022 private function handleContent() {
1023 $this->
debug(
"Enter content handler" );
1026 $normalFields = [
'role',
'origin',
'model',
'format',
'text' ];
1030 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
1031 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1032 $this->reader->localName ==
'content' ) {
1036 $tag = $this->reader->localName;
1038 if ( !$this->hookRunner->onImportHandleContentXMLTag(
1039 $this, $contentInfo )
1042 } elseif ( in_array( $tag, $normalFields ) ) {
1044 } elseif ( $tag !=
'#text' ) {
1045 $this->
warn(
"Unhandled content XML tag $tag" );
1050 return $contentInfo;
1061 private function makeContent(
Title $title, $revisionId, $contentInfo ) {
1062 $maxArticleSize = MediaWikiServices::getInstance()->getMainConfig()->get(
1063 MainConfigNames::MaxArticleSize );
1065 if ( !isset( $contentInfo[
'text'] ) ) {
1066 throw new MWException(
'Missing text field in import.' );
1073 if ( ( !isset( $contentInfo[
'model'] ) ||
1074 in_array( $contentInfo[
'model'], [
1082 strlen( $contentInfo[
'text'] ) > $maxArticleSize * 1024
1086 "the revision with ID $revisionId" :
1088 ) .
" exceeds the maximum allowable size ({$maxArticleSize} KiB)" );
1091 $role = $contentInfo[
'role'] ?? SlotRecord::MAIN;
1092 $model = $contentInfo[
'model'] ?? $this->getDefaultContentModel(
$title, $role );
1093 $handler = $this->getContentHandler( $model );
1095 $text = $handler->importTransform( $contentInfo[
'text'] );
1097 return $handler->unserializeContent( $text );
1106 private function processRevision( $pageInfo, $revisionInfo ) {
1109 $revId = $revisionInfo[
'id'] ?? 0;
1111 $revision->setID( $revisionInfo[
'id'] );
1114 $title = $pageInfo[
'_title'];
1115 $revision->setTitle(
$title );
1118 $revision->setContent( SlotRecord::MAIN,
$content );
1120 foreach ( $revisionInfo[
'content'] ?? [] as $slotInfo ) {
1121 if ( !isset( $slotInfo[
'role'] ) ) {
1122 throw new MWException(
"Missing role for imported slot." );
1126 $revision->setContent( $slotInfo[
'role'],
$content );
1128 $revision->setTimestamp( $revisionInfo[
'timestamp'] ??
wfTimestampNow() );
1130 if ( isset( $revisionInfo[
'comment'] ) ) {
1131 $revision->setComment( $revisionInfo[
'comment'] );
1134 if ( isset( $revisionInfo[
'minor'] ) ) {
1135 $revision->setMinor(
true );
1137 if ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
1138 $revision->setUsername(
1139 $this->externalUserNames->applyPrefix( $revisionInfo[
'contributor'][
'username'] )
1141 } elseif ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
1142 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
1144 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
1146 if ( isset( $revisionInfo[
'sha1'] ) ) {
1147 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
1149 $revision->setNoUpdates( $this->mNoUpdates );
1151 return $this->revisionCallback( $revision );
1158 private function handleUpload( &$pageInfo ) {
1159 $this->
debug(
"Enter upload handler" );
1162 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
1163 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
1167 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
1168 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1169 $this->reader->localName ==
'upload' ) {
1173 $tag = $this->reader->localName;
1175 if ( !$this->hookRunner->onImportHandleUploadXMLTag( $this, $pageInfo ) ) {
1177 } elseif ( in_array( $tag, $normalFields ) ) {
1179 } elseif ( $tag ==
'contributor' ) {
1180 $uploadInfo[
'contributor'] = $this->handleContributor();
1181 } elseif ( $tag ==
'contents' ) {
1183 $encoding = $this->reader->getAttribute(
'encoding' );
1184 if ( $encoding ===
'base64' ) {
1185 $uploadInfo[
'fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
1186 $uploadInfo[
'isTempSrc'] =
true;
1188 } elseif ( $tag !=
'#text' ) {
1189 $this->
warn(
"Unhandled upload XML tag $tag" );
1194 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
1195 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
1196 if ( file_exists(
$path ) ) {
1197 $uploadInfo[
'fileSrc'] =
$path;
1198 $uploadInfo[
'isTempSrc'] =
false;
1202 if ( $this->mImportUploads ) {
1203 return $this->processUpload( $pageInfo, $uploadInfo );
1211 private function dumpTemp( $contents ) {
1212 $filename = tempnam(
wfTempDir(),
'importupload' );
1213 file_put_contents( $filename, $contents );
1222 private function processUpload( $pageInfo, $uploadInfo ) {
1224 $revId = $pageInfo[
'id'];
1225 $title = $pageInfo[
'_title'];
1227 $uploadInfo[
'text'] = $uploadInfo[
'text'] ??
'';
1230 $revision->setTitle(
$title );
1231 $revision->setID( $revId );
1232 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1233 $revision->setContent( SlotRecord::MAIN,
$content );
1234 $revision->setFilename( $uploadInfo[
'filename'] );
1235 if ( isset( $uploadInfo[
'archivename'] ) ) {
1236 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1238 $revision->setSrc( $uploadInfo[
'src'] );
1239 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1240 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1241 !empty( $uploadInfo[
'isTempSrc'] )
1244 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1245 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1247 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1248 $revision->setComment( $uploadInfo[
'comment'] );
1250 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1251 $revision->setUsername(
1252 $this->externalUserNames->applyPrefix( $uploadInfo[
'contributor'][
'username'] )
1254 } elseif ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1255 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1257 $revision->setNoUpdates( $this->mNoUpdates );
1259 return call_user_func( $this->mUploadCallback, $revision );
1265 private function handleContributor() {
1266 $this->
debug(
"Enter contributor handler." );
1268 if ( $this->reader->isEmptyElement ) {
1272 $fields = [
'id',
'ip',
'username' ];
1275 while ( $this->reader->read() ) {
1276 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1277 $this->reader->localName ==
'contributor' ) {
1281 $tag = $this->reader->localName;
1283 if ( in_array( $tag, $fields ) ) {
1296 private function processTitle( $text, $ns =
null ) {
1297 if ( $this->foreignNamespaces ===
null ) {
1299 $this->contentLanguage
1303 $this->foreignNamespaces );
1306 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1309 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1312 $commandLineMode = $this->config->get(
'CommandLineMode' );
1314 # Invalid page title? Ignore the page
1315 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1317 } elseif (
$title->isExternal() ) {
1318 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1320 } elseif ( !
$title->canExist() ) {
1321 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1323 } elseif ( !$commandLineMode ) {
1326 if ( !$this->permissionManager->userCan(
'edit', $user,
$title ) ) {
1327 # Do not import if the importing wiki user cannot edit this page
1328 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1334 return [
$title, $foreignTitle ];
1341 private function getContentHandler( $model ) {
1342 return $this->contentHandlerFactory->getContentHandler( $model );
1351 private function getDefaultContentModel(
$title, $role ) {
1352 return $this->slotRoleRegistry
1353 ->getRoleHandler( $role )
1354 ->getDefaultModel(
$title );