63 if ( !class_exists(
'XMLReader' ) ) {
64 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
67 $this->reader =
new XMLReader();
70 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
71 stream_wrapper_register(
'uploadsource', UploadSourceAdapter::class );
77 $oldDisable = libxml_disable_entity_loader(
false );
78 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
79 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
81 $status = $this->reader->open(
"uploadsource://$id" );
84 $error = libxml_get_last_error();
85 libxml_disable_entity_loader( $oldDisable );
86 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
89 libxml_disable_entity_loader( $oldDisable );
110 $this->
debug(
"FAILURE: $err" );
111 wfDebug(
"WikiImporter XML error: $err\n" );
115 if ( $this->mDebug ) {
120 public function warn( $data ) {
124 public function notice( $msg, ...$params ) {
125 if ( is_callable( $this->mNoticeCallback ) ) {
126 call_user_func( $this->mNoticeCallback, $msg, $params );
147 $this->mNoUpdates = $noupdates;
157 $this->pageOffset = $nthPage;
167 return wfSetVar( $this->mNoticeCallback, $callback );
177 $this->mPageCallback = $callback;
192 $this->mPageOutCallback = $callback;
203 $this->mRevisionCallback = $callback;
214 $this->mUploadCallback = $callback;
225 $this->mLogItemCallback = $callback;
236 $this->mSiteInfoCallback = $callback;
246 $this->importTitleFactory = $factory;
255 if ( is_null( $namespace ) ) {
261 MediaWikiServices::getInstance()->getNamespaceInfo()->exists( intval( $namespace ) )
263 $namespace = intval( $namespace );
277 $status = Status::newGood();
278 if ( is_null( $rootpage ) ) {
281 } elseif ( $rootpage !==
'' ) {
282 $rootpage = rtrim( $rootpage,
'/' );
283 $title = Title::newFromText( $rootpage );
286 $status->fatal(
'import-rootpage-invalid' );
288 !MediaWikiServices::getInstance()->getNamespaceInfo()->
289 hasSubpages(
$title->getNamespace() )
293 : MediaWikiServices::getInstance()->getContentLanguage()->
294 getNsText(
$title->getNamespace() );
295 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
309 $this->mImageBasePath = $dir;
316 $this->mImportUploads = $import;
325 $this->externalUserNames =
new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
343 $title = $titleAndForeignTitle[0];
344 $page = WikiPage::factory(
$title );
345 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
355 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
356 $this->
notice(
'import-error-bad-location',
357 $revision->getTitle()->getPrefixedText(),
359 $revision->getModel(),
360 $revision->getFormat() );
366 return $revision->importOldRevision();
368 $this->
notice(
'import-error-unserialize',
369 $revision->getTitle()->getPrefixedText(),
371 $revision->getModel(),
372 $revision->getFormat() );
384 return $revision->importLogItem();
393 return $revision->importUpload();
406 $sRevCount, $pageInfo
415 $page = WikiPage::factory(
$title );
416 $page->loadPageData(
'fromdbmaster' );
419 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
420 ' because WikiPage::getContent() returned null' );
422 $editInfo = $page->prepareContentForEdit(
$content );
423 $countKey =
'title_' .
$title->getPrefixedText();
424 $countable = $page->isCountable( $editInfo );
425 if ( array_key_exists( $countKey, $this->countableCache ) &&
426 $countable != $this->countableCache[$countKey] ) {
427 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
428 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
434 return Hooks::run(
'AfterImportPage', func_get_args() );
442 $this->
debug(
"Got revision:" );
443 if ( is_object( $revision->title ) ) {
444 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
446 $this->
debug(
"-- Title: <invalid>" );
448 $this->
debug(
"-- User: " . $revision->user_text );
449 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
450 $this->
debug(
"-- Comment: " . $revision->comment );
451 $this->
debug(
"-- Text: " . $revision->text );
460 if ( isset( $this->mSiteInfoCallback ) ) {
461 return call_user_func_array( $this->mSiteInfoCallback,
462 [ $siteInfo, $this ] );
473 if ( isset( $this->mPageCallback ) ) {
474 call_user_func( $this->mPageCallback,
$title );
487 $sucCount, $pageInfo ) {
488 if ( isset( $this->mPageOutCallback ) ) {
489 call_user_func_array( $this->mPageOutCallback, func_get_args() );
499 if ( isset( $this->mRevisionCallback ) ) {
500 return call_user_func_array( $this->mRevisionCallback,
501 [ $revision, $this ] );
513 if ( isset( $this->mLogItemCallback ) ) {
514 return call_user_func_array( $this->mLogItemCallback,
515 [ $revision, $this ] );
528 return $this->reader->getAttribute( $attr );
539 if ( $this->reader->isEmptyElement ) {
543 while ( $this->reader->read() ) {
544 switch ( $this->reader->nodeType ) {
545 case XMLReader::TEXT:
546 case XMLReader::CDATA:
547 case XMLReader::SIGNIFICANT_WHITESPACE:
548 $buffer .= $this->reader->value;
550 case XMLReader::END_ELEMENT:
555 $this->reader->close();
569 $oldDisable = libxml_disable_entity_loader(
true );
570 $this->reader->read();
572 if ( $this->reader->localName !=
'mediawiki' ) {
573 libxml_disable_entity_loader( $oldDisable );
574 throw new MWException(
"Expected <mediawiki> tag, got " .
575 $this->reader->localName );
577 $this->
debug(
"<mediawiki> tag is correct." );
579 $this->
debug(
"Starting primary dump processing loop." );
581 $keepReading = $this->reader->read();
586 while ( $keepReading ) {
587 $tag = $this->reader->localName;
588 if ( $this->pageOffset ) {
589 if ( $tag ===
'page' ) {
592 if ( $pageCount < $this->pageOffset ) {
593 $keepReading = $this->reader->next();
597 $type = $this->reader->nodeType;
599 if ( !Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
601 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
603 } elseif ( $tag ==
'siteinfo' ) {
605 } elseif ( $tag ==
'page' ) {
607 } elseif ( $tag ==
'logitem' ) {
609 } elseif ( $tag !=
'#text' ) {
610 $this->
warn(
"Unhandled top-level XML tag $tag" );
616 $keepReading = $this->reader->next();
618 $this->
debug(
"Skip" );
620 $keepReading = $this->reader->read();
623 }
catch ( Exception $ex ) {
628 libxml_disable_entity_loader( $oldDisable );
629 $this->reader->close();
639 $this->
debug(
"Enter site info handler." );
643 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
645 while ( $this->reader->read() ) {
646 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
647 $this->reader->localName ==
'siteinfo' ) {
651 $tag = $this->reader->localName;
653 if ( $tag ==
'namespace' ) {
656 } elseif ( in_array( $tag, $normalFields ) ) {
666 $this->
debug(
"Enter log item handler." );
670 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
671 'logtitle',
'params' ];
673 while ( $this->reader->read() ) {
674 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
675 $this->reader->localName ==
'logitem' ) {
679 $tag = $this->reader->localName;
681 if ( !Hooks::run(
'ImportHandleLogItemXMLTag', [
685 } elseif ( in_array( $tag, $normalFields ) ) {
687 } elseif ( $tag ==
'contributor' ) {
689 } elseif ( $tag !=
'#text' ) {
690 $this->
warn(
"Unhandled log-item XML tag $tag" );
704 if ( isset( $logInfo[
'id'] ) ) {
705 $revision->setID( $logInfo[
'id'] );
707 $revision->setType( $logInfo[
'type'] );
708 $revision->setAction( $logInfo[
'action'] );
709 if ( isset( $logInfo[
'timestamp'] ) ) {
710 $revision->setTimestamp( $logInfo[
'timestamp'] );
712 if ( isset( $logInfo[
'params'] ) ) {
713 $revision->setParams( $logInfo[
'params'] );
715 if ( isset( $logInfo[
'logtitle'] ) ) {
718 $revision->setTitle( Title::newFromText( $logInfo[
'logtitle'] ) );
721 $revision->setNoUpdates( $this->mNoUpdates );
723 if ( isset( $logInfo[
'comment'] ) ) {
724 $revision->setComment( $logInfo[
'comment'] );
727 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
728 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
731 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
732 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
734 $revision->setUsername(
735 $this->externalUserNames->applyPrefix( $logInfo[
'contributor'][
'username'] )
747 $this->
debug(
"Enter page handler." );
748 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
751 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
756 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
757 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
758 $this->reader->localName ==
'page' ) {
764 $tag = $this->reader->localName;
769 } elseif ( !Hooks::run(
'ImportHandlePageXMLTag', [ $this,
772 } elseif ( in_array( $tag, $normalFields ) ) {
780 if ( $tag ==
'redirect' ) {
785 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
788 $pageInfo[
'ns'] ??
null );
791 if ( is_array(
$title ) ) {
793 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
801 if ( $tag ==
'revision' ) {
807 } elseif ( $tag !=
'#text' ) {
808 $this->
warn(
"Unhandled page XML tag $tag" );
818 if ( array_key_exists(
'_title', $pageInfo ) ) {
820 $pageInfo[
'revisionCount'],
821 $pageInfo[
'successfulRevisionCount'],
830 $this->
debug(
"Enter revision handler" );
833 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text',
'sha1' ];
837 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
838 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
839 $this->reader->localName ==
'revision' ) {
843 $tag = $this->reader->localName;
845 if ( !Hooks::run(
'ImportHandleRevisionXMLTag', [
846 $this, $pageInfo, $revisionInfo
849 } elseif ( in_array( $tag, $normalFields ) ) {
851 } elseif ( $tag ==
'contributor' ) {
853 } elseif ( $tag !=
'#text' ) {
854 $this->
warn(
"Unhandled revision XML tag $tag" );
859 $pageInfo[
'revisionCount']++;
861 $pageInfo[
'successfulRevisionCount']++;
878 if ( ( !isset( $revisionInfo[
'model'] ) ||
879 in_array( $revisionInfo[
'model'], [
890 ( isset( $revisionInfo[
'id'] ) ?
891 "the revision with ID $revisionInfo[id]" :
893 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
899 if ( isset( $revisionInfo[
'id'] ) ) {
900 $revision->setID( $revisionInfo[
'id'] );
902 if ( isset( $revisionInfo[
'model'] ) ) {
903 $revision->setModel( $revisionInfo[
'model'] );
905 if ( isset( $revisionInfo[
'format'] ) ) {
906 $revision->setFormat( $revisionInfo[
'format'] );
908 $revision->setTitle( $pageInfo[
'_title'] );
910 if ( isset( $revisionInfo[
'text'] ) ) {
911 $handler = $revision->getContentHandler();
912 $text = $handler->importTransform(
913 $revisionInfo[
'text'],
914 $revision->getFormat() );
916 $revision->setText( $text );
918 $revision->setTimestamp( $revisionInfo[
'timestamp'] ??
wfTimestampNow() );
920 if ( isset( $revisionInfo[
'comment'] ) ) {
921 $revision->setComment( $revisionInfo[
'comment'] );
924 if ( isset( $revisionInfo[
'minor'] ) ) {
925 $revision->setMinor(
true );
927 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
928 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
929 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
930 $revision->setUsername(
931 $this->externalUserNames->applyPrefix( $revisionInfo[
'contributor'][
'username'] )
934 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
936 if ( isset( $revisionInfo[
'sha1'] ) ) {
937 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
939 $revision->setNoUpdates( $this->mNoUpdates );
949 $this->
debug(
"Enter upload handler" );
952 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
953 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
957 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
958 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
959 $this->reader->localName ==
'upload' ) {
963 $tag = $this->reader->localName;
965 if ( !Hooks::run(
'ImportHandleUploadXMLTag', [
969 } elseif ( in_array( $tag, $normalFields ) ) {
971 } elseif ( $tag ==
'contributor' ) {
973 } elseif ( $tag ==
'contents' ) {
975 $encoding = $this->reader->getAttribute(
'encoding' );
976 if ( $encoding ===
'base64' ) {
977 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
978 $uploadInfo[
'isTempSrc'] =
true;
980 } elseif ( $tag !=
'#text' ) {
981 $this->
warn(
"Unhandled upload XML tag $tag" );
986 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
987 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
988 if ( file_exists(
$path ) ) {
989 $uploadInfo[
'fileSrc'] =
$path;
990 $uploadInfo[
'isTempSrc'] =
false;
994 if ( $this->mImportUploads ) {
1004 $filename = tempnam(
wfTempDir(),
'importupload' );
1005 file_put_contents( $filename, $contents );
1016 $text = $uploadInfo[
'text'] ??
'';
1018 $revision->setTitle( $pageInfo[
'_title'] );
1019 $revision->setID( $pageInfo[
'id'] );
1020 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1021 $revision->setText( $text );
1022 $revision->setFilename( $uploadInfo[
'filename'] );
1023 if ( isset( $uploadInfo[
'archivename'] ) ) {
1024 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1026 $revision->setSrc( $uploadInfo[
'src'] );
1027 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1028 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1029 !empty( $uploadInfo[
'isTempSrc'] ) );
1031 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1032 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1034 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1035 $revision->setComment( $uploadInfo[
'comment'] );
1037 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1038 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1040 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1041 $revision->setUsername(
1042 $this->externalUserNames->applyPrefix( $uploadInfo[
'contributor'][
'username'] )
1045 $revision->setNoUpdates( $this->mNoUpdates );
1047 return call_user_func( $this->mUploadCallback, $revision );
1054 $fields = [
'id',
'ip',
'username' ];
1057 if ( $this->reader->isEmptyElement ) {
1060 while ( $this->reader->read() ) {
1061 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1062 $this->reader->localName ==
'contributor' ) {
1066 $tag = $this->reader->localName;
1068 if ( in_array( $tag, $fields ) ) {
1082 if ( is_null( $this->foreignNamespaces ) ) {
1086 $this->foreignNamespaces );
1089 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1092 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1095 $commandLineMode = $this->config->get(
'CommandLineMode' );
1096 if ( is_null(
$title ) ) {
1097 # Invalid page title? Ignore the page
1098 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1100 } elseif (
$title->isExternal() ) {
1101 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1103 } elseif ( !
$title->canExist() ) {
1104 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1106 } elseif ( !$commandLineMode ) {
1107 $permissionManager = MediaWikiServices::getInstance()->getPermissionManager();
1108 $user = RequestContext::getMain()->getUser();
1110 if ( !$permissionManager->userCan(
'edit', $user,
$title ) ) {
1111 # Do not import if the importing wiki user cannot edit this page
1112 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1117 if ( !
$title->exists() && !$permissionManager->userCan(
'create', $user,
$title ) ) {
1118 # Do not import if the importing wiki user cannot create this page
1119 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1125 return [
$title, $foreignTitle ];
$wgMaxArticleSize
Maximum article size in kilobytes.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTempDir()
Tries to get the system directory for temporary files.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Class to parse and build external user names.
Exception representing a failure to serialize or unserialize a content object.
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static registerSource(ImportSource $source)
XML file reader for the page data importer.
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
handlePage()
PhanTypeInvalidDimOffset Phan not reading the reference inside the hook.
setImportUploads( $import)
doImport()
Primary entry point.
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
setUsernamePrefix( $usernamePrefix, $assignKnownUsers)
ExternalUserNames $externalUserNames
setNoUpdates( $noupdates)
Set 'no updates' mode.
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
importUpload( $revision)
Dummy for now...
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
processLogItem( $logInfo)
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
setPageOffset( $nthPage)
Sets 'pageOffset' value.
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
importLogItem( $revision)
Default per-revision callback, performs the import.
handleRevision(&$pageInfo)
revisionCallback( $revision)
Notify the callback function of a revision.
logItemCallback( $revision)
Notify the callback function of a new log item.
setDebug( $debug)
Set debug mode...
processRevision( $pageInfo, $revisionInfo)
processUpload( $pageInfo, $uploadInfo)
bool $disableStatisticsUpdate
processTitle( $text, $ns=null)
notice( $msg,... $params)
importRevision( $revision)
Default per-revision callback, performs the import.
ImportTitleFactory $importTitleFactory
__construct(ImportSource $source, Config $config)
Creates an ImportXMLReader drawing from the source provided.
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
setNoticeCallback( $callback)
Set a callback that displays notice messages.
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
disableStatisticsUpdate()
Statistics update can cause a lot of time.
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Represents a revision, log entry or upload during the import process.
Interface for configuration instances.
Source interface for XML import.
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...