62 if ( !class_exists(
'XMLReader' ) ) {
63 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
66 $this->reader =
new XMLReader();
69 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
76 $oldDisable = libxml_disable_entity_loader(
false );
77 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
78 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
80 $status = $this->reader->open(
"uploadsource://$id" );
83 $error = libxml_get_last_error();
84 libxml_disable_entity_loader( $oldDisable );
85 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
88 libxml_disable_entity_loader( $oldDisable );
109 $this->
debug(
"FAILURE: $err" );
110 wfDebug(
"WikiImporter XML error: $err\n" );
114 if ( $this->mDebug ) {
127 if ( is_callable( $this->mNoticeCallback ) ) {
128 call_user_func( $this->mNoticeCallback, $msg,
$params );
149 $this->mNoUpdates = $noupdates;
159 $this->pageOffset = $nthPage;
169 return wfSetVar( $this->mNoticeCallback, $callback );
179 $this->mPageCallback = $callback;
194 $this->mPageOutCallback = $callback;
205 $this->mRevisionCallback = $callback;
216 $this->mUploadCallback = $callback;
227 $this->mLogItemCallback = $callback;
238 $this->mSiteInfoCallback = $callback;
248 $this->importTitleFactory = $factory;
257 if ( is_null( $namespace ) ) {
265 $namespace = intval( $namespace );
280 if ( is_null( $rootpage ) ) {
283 } elseif ( $rootpage !==
'' ) {
284 $rootpage = rtrim( $rootpage,
'/' );
288 $status->fatal(
'import-rootpage-invalid' );
292 : MediaWikiServices::getInstance()->getContentLanguage()->
293 getNsText(
$title->getNamespace() );
294 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
308 $this->mImageBasePath = $dir;
315 $this->mImportUploads = $import;
324 $this->externalUserNames =
new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
342 $title = $titleAndForeignTitle[0];
344 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
354 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
355 $this->
notice(
'import-error-bad-location',
356 $revision->getTitle()->getPrefixedText(),
358 $revision->getModel(),
359 $revision->getFormat() );
365 return $revision->importOldRevision();
367 $this->
notice(
'import-error-unserialize',
368 $revision->getTitle()->getPrefixedText(),
370 $revision->getModel(),
371 $revision->getFormat() );
383 return $revision->importLogItem();
392 return $revision->importUpload();
405 $sRevCount, $pageInfo
415 $page->loadPageData(
'fromdbmaster' );
418 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
419 ' because WikiPage::getContent() returned null' );
421 $editInfo = $page->prepareContentForEdit(
$content );
422 $countKey =
'title_' .
$title->getPrefixedText();
423 $countable = $page->isCountable( $editInfo );
424 if ( array_key_exists( $countKey, $this->countableCache ) &&
425 $countable != $this->countableCache[$countKey] ) {
427 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
433 $args = func_get_args();
442 $this->
debug(
"Got revision:" );
443 if ( is_object( $revision->title ) ) {
444 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
446 $this->
debug(
"-- Title: <invalid>" );
448 $this->
debug(
"-- User: " . $revision->user_text );
449 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
450 $this->
debug(
"-- Comment: " . $revision->comment );
451 $this->
debug(
"-- Text: " . $revision->text );
460 if ( isset( $this->mSiteInfoCallback ) ) {
461 return call_user_func_array( $this->mSiteInfoCallback,
462 [ $siteInfo, $this ] );
473 if ( isset( $this->mPageCallback ) ) {
474 call_user_func( $this->mPageCallback,
$title );
487 $sucCount, $pageInfo ) {
488 if ( isset( $this->mPageOutCallback ) ) {
489 $args = func_get_args();
490 call_user_func_array( $this->mPageOutCallback,
$args );
500 if ( isset( $this->mRevisionCallback ) ) {
501 return call_user_func_array( $this->mRevisionCallback,
502 [ $revision, $this ] );
514 if ( isset( $this->mLogItemCallback ) ) {
515 return call_user_func_array( $this->mLogItemCallback,
516 [ $revision, $this ] );
529 return $this->reader->getAttribute( $attr );
540 if ( $this->reader->isEmptyElement ) {
544 while ( $this->reader->read() ) {
545 switch ( $this->reader->nodeType ) {
546 case XMLReader::TEXT:
547 case XMLReader::CDATA:
548 case XMLReader::SIGNIFICANT_WHITESPACE:
549 $buffer .= $this->reader->value;
551 case XMLReader::END_ELEMENT:
556 $this->reader->close();
570 $oldDisable = libxml_disable_entity_loader(
true );
571 $this->reader->read();
573 if ( $this->reader->localName !=
'mediawiki' ) {
574 libxml_disable_entity_loader( $oldDisable );
575 throw new MWException(
"Expected <mediawiki> tag, got " .
576 $this->reader->localName );
578 $this->
debug(
"<mediawiki> tag is correct." );
580 $this->
debug(
"Starting primary dump processing loop." );
582 $keepReading = $this->reader->read();
587 while ( $keepReading ) {
588 $tag = $this->reader->localName;
589 if ( $this->pageOffset ) {
590 if ( $tag ===
'page' ) {
593 if ( $pageCount < $this->pageOffset ) {
594 $keepReading = $this->reader->next();
598 $type = $this->reader->nodeType;
600 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
602 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
604 } elseif ( $tag ==
'siteinfo' ) {
606 } elseif ( $tag ==
'page' ) {
608 } elseif ( $tag ==
'logitem' ) {
610 } elseif ( $tag !=
'#text' ) {
611 $this->
warn(
"Unhandled top-level XML tag $tag" );
617 $keepReading = $this->reader->next();
619 $this->
debug(
"Skip" );
621 $keepReading = $this->reader->read();
624 }
catch ( Exception $ex ) {
629 libxml_disable_entity_loader( $oldDisable );
630 $this->reader->close();
640 $this->
debug(
"Enter site info handler." );
644 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
646 while ( $this->reader->read() ) {
647 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
648 $this->reader->localName ==
'siteinfo' ) {
652 $tag = $this->reader->localName;
654 if ( $tag ==
'namespace' ) {
657 } elseif ( in_array( $tag, $normalFields ) ) {
667 $this->
debug(
"Enter log item handler." );
671 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
672 'logtitle',
'params' ];
674 while ( $this->reader->read() ) {
675 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
676 $this->reader->localName ==
'logitem' ) {
680 $tag = $this->reader->localName;
682 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
686 } elseif ( in_array( $tag, $normalFields ) ) {
688 } elseif ( $tag ==
'contributor' ) {
690 } elseif ( $tag !=
'#text' ) {
691 $this->
warn(
"Unhandled log-item XML tag $tag" );
705 if ( isset( $logInfo[
'id'] ) ) {
706 $revision->setID( $logInfo[
'id'] );
708 $revision->setType( $logInfo[
'type'] );
709 $revision->setAction( $logInfo[
'action'] );
710 if ( isset( $logInfo[
'timestamp'] ) ) {
711 $revision->setTimestamp( $logInfo[
'timestamp'] );
713 if ( isset( $logInfo[
'params'] ) ) {
714 $revision->setParams( $logInfo[
'params'] );
716 if ( isset( $logInfo[
'logtitle'] ) ) {
722 $revision->setNoUpdates( $this->mNoUpdates );
724 if ( isset( $logInfo[
'comment'] ) ) {
725 $revision->setComment( $logInfo[
'comment'] );
728 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
729 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
732 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
733 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
735 $revision->setUsername(
736 $this->externalUserNames->applyPrefix( $logInfo[
'contributor'][
'username'] )
745 $this->
debug(
"Enter page handler." );
746 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
749 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
754 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
755 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
756 $this->reader->localName ==
'page' ) {
762 $tag = $this->reader->localName;
767 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
770 } elseif ( in_array( $tag, $normalFields ) ) {
778 if ( $tag ==
'redirect' ) {
783 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
786 $pageInfo[
'ns'] ??
null );
789 if ( is_array(
$title ) ) {
791 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
799 if ( $tag ==
'revision' ) {
805 } elseif ( $tag !=
'#text' ) {
806 $this->
warn(
"Unhandled page XML tag $tag" );
816 if ( array_key_exists(
'_title', $pageInfo ) ) {
818 $pageInfo[
'revisionCount'],
819 $pageInfo[
'successfulRevisionCount'],
828 $this->
debug(
"Enter revision handler" );
831 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text',
'sha1' ];
835 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
836 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
837 $this->reader->localName ==
'revision' ) {
841 $tag = $this->reader->localName;
843 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
844 $this, $pageInfo, $revisionInfo
847 } elseif ( in_array( $tag, $normalFields ) ) {
849 } elseif ( $tag ==
'contributor' ) {
851 } elseif ( $tag !=
'#text' ) {
852 $this->
warn(
"Unhandled revision XML tag $tag" );
857 $pageInfo[
'revisionCount']++;
859 $pageInfo[
'successfulRevisionCount']++;
876 if ( ( !isset( $revisionInfo[
'model'] ) ||
877 in_array( $revisionInfo[
'model'], [
888 ( isset( $revisionInfo[
'id'] ) ?
889 "the revision with ID $revisionInfo[id]" :
891 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
897 if ( isset( $revisionInfo[
'id'] ) ) {
898 $revision->setID( $revisionInfo[
'id'] );
900 if ( isset( $revisionInfo[
'model'] ) ) {
901 $revision->setModel( $revisionInfo[
'model'] );
903 if ( isset( $revisionInfo[
'format'] ) ) {
904 $revision->setFormat( $revisionInfo[
'format'] );
906 $revision->setTitle( $pageInfo[
'_title'] );
908 if ( isset( $revisionInfo[
'text'] ) ) {
909 $handler = $revision->getContentHandler();
911 $revisionInfo[
'text'],
912 $revision->getFormat() );
914 $revision->setText( $text );
916 $revision->setTimestamp( $revisionInfo[
'timestamp'] ??
wfTimestampNow() );
918 if ( isset( $revisionInfo[
'comment'] ) ) {
919 $revision->setComment( $revisionInfo[
'comment'] );
922 if ( isset( $revisionInfo[
'minor'] ) ) {
923 $revision->setMinor(
true );
925 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
926 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
927 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
928 $revision->setUsername(
929 $this->externalUserNames->applyPrefix( $revisionInfo[
'contributor'][
'username'] )
932 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
934 if ( isset( $revisionInfo[
'sha1'] ) ) {
935 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
937 $revision->setNoUpdates( $this->mNoUpdates );
947 $this->
debug(
"Enter upload handler" );
950 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
951 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
955 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
956 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
957 $this->reader->localName ==
'upload' ) {
961 $tag = $this->reader->localName;
963 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
967 } elseif ( in_array( $tag, $normalFields ) ) {
969 } elseif ( $tag ==
'contributor' ) {
971 } elseif ( $tag ==
'contents' ) {
973 $encoding = $this->reader->getAttribute(
'encoding' );
974 if ( $encoding ===
'base64' ) {
975 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
976 $uploadInfo[
'isTempSrc'] =
true;
978 } elseif ( $tag !=
'#text' ) {
979 $this->
warn(
"Unhandled upload XML tag $tag" );
984 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
985 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
986 if ( file_exists(
$path ) ) {
987 $uploadInfo[
'fileSrc'] =
$path;
988 $uploadInfo[
'isTempSrc'] =
false;
992 if ( $this->mImportUploads ) {
1002 $filename = tempnam(
wfTempDir(),
'importupload' );
1003 file_put_contents( $filename, $contents );
1014 $text = $uploadInfo[
'text'] ??
'';
1016 $revision->setTitle( $pageInfo[
'_title'] );
1017 $revision->setID( $pageInfo[
'id'] );
1018 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1019 $revision->setText( $text );
1020 $revision->setFilename( $uploadInfo[
'filename'] );
1021 if ( isset( $uploadInfo[
'archivename'] ) ) {
1022 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1024 $revision->setSrc( $uploadInfo[
'src'] );
1025 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1026 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1027 !empty( $uploadInfo[
'isTempSrc'] ) );
1029 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1030 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1032 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1033 $revision->setComment( $uploadInfo[
'comment'] );
1035 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1036 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1038 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1039 $revision->setUsername(
1040 $this->externalUserNames->applyPrefix( $uploadInfo[
'contributor'][
'username'] )
1043 $revision->setNoUpdates( $this->mNoUpdates );
1045 return call_user_func( $this->mUploadCallback, $revision );
1052 $fields = [
'id',
'ip',
'username' ];
1055 if ( $this->reader->isEmptyElement ) {
1058 while ( $this->reader->read() ) {
1059 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1060 $this->reader->localName ==
'contributor' ) {
1064 $tag = $this->reader->localName;
1066 if ( in_array( $tag, $fields ) ) {
1080 if ( is_null( $this->foreignNamespaces ) ) {
1084 $this->foreignNamespaces );
1087 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1090 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1093 $commandLineMode = $this->config->get(
'CommandLineMode' );
1094 if ( is_null(
$title ) ) {
1095 # Invalid page title? Ignore the page
1096 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1098 } elseif (
$title->isExternal() ) {
1099 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1101 } elseif ( !
$title->canExist() ) {
1102 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1104 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1105 # Do not import if the importing wiki user cannot edit this page
1106 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1108 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1109 # Do not import if the importing wiki user cannot create this page
1110 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1114 return [
$title, $foreignTitle ];