58 if ( !class_exists(
'XMLReader' ) ) {
59 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
62 $this->reader =
new XMLReader();
64 wfDeprecated( __METHOD__ .
' without a Config instance',
'1.25' );
65 $config = MediaWikiServices::getInstance()->getMainConfig();
69 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
70 stream_wrapper_register(
'uploadsource',
'UploadSourceAdapter' );
76 $oldDisable = libxml_disable_entity_loader(
false );
77 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
78 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
80 $status = $this->reader->open(
"uploadsource://$id" );
83 $error = libxml_get_last_error();
84 libxml_disable_entity_loader( $oldDisable );
85 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
88 libxml_disable_entity_loader( $oldDisable );
108 $this->
debug(
"FAILURE: $err" );
109 wfDebug(
"WikiImporter XML error: $err\n" );
113 if ( $this->mDebug ) {
118 public function warn( $data ) {
126 if ( is_callable( $this->mNoticeCallback ) ) {
127 call_user_func( $this->mNoticeCallback, $msg,
$params );
146 $this->mNoUpdates = $noupdates;
156 return wfSetVar( $this->mNoticeCallback, $callback );
166 $this->mPageCallback = $callback;
181 $this->mPageOutCallback = $callback;
192 $this->mRevisionCallback = $callback;
203 $this->mUploadCallback = $callback;
214 $this->mLogItemCallback = $callback;
225 $this->mSiteInfoCallback = $callback;
235 $this->importTitleFactory = $factory;
244 if ( is_null( $namespace ) ) {
252 $namespace = intval( $namespace );
267 if ( is_null( $rootpage ) ) {
270 } elseif ( $rootpage !==
'' ) {
271 $rootpage = rtrim( $rootpage,
'/' );
275 $status->fatal(
'import-rootpage-invalid' );
283 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
298 $this->mImageBasePath =
$dir;
305 $this->mImportUploads = $import;
323 $title = $titleAndForeignTitle[0];
325 $this->countableCache[
'title_' .
$title->getPrefixedText()] =
$page->isCountable();
335 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
336 $this->
notice(
'import-error-bad-location',
337 $revision->getTitle()->getPrefixedText(),
339 $revision->getModel(),
340 $revision->getFormat() );
346 return $revision->importOldRevision();
348 $this->
notice(
'import-error-unserialize',
349 $revision->getTitle()->getPrefixedText(),
351 $revision->getModel(),
352 $revision->getFormat() );
364 return $revision->importLogItem();
373 return $revision->importUpload();
386 $sRevCount, $pageInfo ) {
396 $page->loadPageData(
'fromdbmaster' );
399 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
400 ' because WikiPage::getContent() returned null' );
403 $countKey =
'title_' .
$title->getPrefixedText();
404 $countable =
$page->isCountable( $editInfo );
405 if ( array_key_exists( $countKey, $this->countableCache ) &&
406 $countable != $this->countableCache[$countKey] ) {
408 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
414 $args = func_get_args();
423 $this->
debug(
"Got revision:" );
424 if ( is_object( $revision->title ) ) {
425 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
427 $this->
debug(
"-- Title: <invalid>" );
429 $this->
debug(
"-- User: " . $revision->user_text );
430 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
431 $this->
debug(
"-- Comment: " . $revision->comment );
432 $this->
debug(
"-- Text: " . $revision->text );
441 if ( isset( $this->mSiteInfoCallback ) ) {
442 return call_user_func_array( $this->mSiteInfoCallback,
443 [ $siteInfo, $this ] );
454 if ( isset( $this->mPageCallback ) ) {
455 call_user_func( $this->mPageCallback,
$title );
468 $sucCount, $pageInfo ) {
469 if ( isset( $this->mPageOutCallback ) ) {
470 $args = func_get_args();
471 call_user_func_array( $this->mPageOutCallback,
$args );
481 if ( isset( $this->mRevisionCallback ) ) {
482 return call_user_func_array( $this->mRevisionCallback,
483 [ $revision, $this ] );
495 if ( isset( $this->mLogItemCallback ) ) {
496 return call_user_func_array( $this->mLogItemCallback,
497 [ $revision, $this ] );
510 return $this->reader->getAttribute( $attr );
521 if ( $this->reader->isEmptyElement ) {
525 while ( $this->reader->read() ) {
526 switch ( $this->reader->nodeType ) {
527 case XMLReader::TEXT:
528 case XMLReader::CDATA:
529 case XMLReader::SIGNIFICANT_WHITESPACE:
530 $buffer .= $this->reader->value;
532 case XMLReader::END_ELEMENT:
537 $this->reader->close();
550 $oldDisable = libxml_disable_entity_loader(
true );
551 $this->reader->read();
553 if ( $this->reader->localName !=
'mediawiki' ) {
554 libxml_disable_entity_loader( $oldDisable );
555 throw new MWException(
"Expected <mediawiki> tag, got " .
556 $this->reader->localName );
558 $this->
debug(
"<mediawiki> tag is correct." );
560 $this->
debug(
"Starting primary dump processing loop." );
562 $keepReading = $this->reader->read();
566 while ( $keepReading ) {
567 $tag = $this->reader->localName;
568 $type = $this->reader->nodeType;
570 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
572 } elseif (
$tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
574 } elseif (
$tag ==
'siteinfo' ) {
576 } elseif (
$tag ==
'page' ) {
578 } elseif (
$tag ==
'logitem' ) {
580 } elseif (
$tag !=
'#text' ) {
581 $this->
warn(
"Unhandled top-level XML tag $tag" );
587 $keepReading = $this->reader->next();
589 $this->
debug(
"Skip" );
591 $keepReading = $this->reader->read();
594 }
catch ( Exception $ex ) {
599 libxml_disable_entity_loader( $oldDisable );
600 $this->reader->close();
610 $this->
debug(
"Enter site info handler." );
614 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
616 while ( $this->reader->read() ) {
617 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
618 $this->reader->localName ==
'siteinfo' ) {
622 $tag = $this->reader->localName;
624 if (
$tag ==
'namespace' ) {
627 } elseif ( in_array(
$tag, $normalFields ) ) {
637 $this->
debug(
"Enter log item handler." );
641 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
642 'logtitle',
'params' ];
644 while ( $this->reader->read() ) {
645 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
646 $this->reader->localName ==
'logitem' ) {
650 $tag = $this->reader->localName;
652 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
656 } elseif ( in_array(
$tag, $normalFields ) ) {
658 } elseif (
$tag ==
'contributor' ) {
660 } elseif (
$tag !=
'#text' ) {
661 $this->
warn(
"Unhandled log-item XML tag $tag" );
676 if ( isset( $logInfo[
'id'] ) ) {
677 $revision->setID( $logInfo[
'id'] );
679 $revision->setType( $logInfo[
'type'] );
680 $revision->setAction( $logInfo[
'action'] );
681 if ( isset( $logInfo[
'timestamp'] ) ) {
682 $revision->setTimestamp( $logInfo[
'timestamp'] );
684 if ( isset( $logInfo[
'params'] ) ) {
685 $revision->setParams( $logInfo[
'params'] );
687 if ( isset( $logInfo[
'logtitle'] ) ) {
693 $revision->setNoUpdates( $this->mNoUpdates );
695 if ( isset( $logInfo[
'comment'] ) ) {
696 $revision->setComment( $logInfo[
'comment'] );
699 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
700 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
703 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
704 $revision->setUsername(
'Unknown user' );
706 $revision->setUsername( $logInfo[
'contributor'][
'username'] );
714 $this->
debug(
"Enter page handler." );
715 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
718 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
723 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
724 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
725 $this->reader->localName ==
'page' ) {
731 $tag = $this->reader->localName;
736 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
739 } elseif ( in_array(
$tag, $normalFields ) ) {
747 if (
$tag ==
'redirect' ) {
752 } elseif (
$tag ==
'revision' ||
$tag ==
'upload' ) {
755 isset( $pageInfo[
'ns'] ) ? $pageInfo[
'ns'] :
null );
758 if ( is_array(
$title ) ) {
760 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
768 if (
$tag ==
'revision' ) {
774 } elseif (
$tag !=
'#text' ) {
775 $this->
warn(
"Unhandled page XML tag $tag" );
785 if ( array_key_exists(
'_title', $pageInfo ) ) {
787 $pageInfo[
'revisionCount'],
788 $pageInfo[
'successfulRevisionCount'],
797 $this->
debug(
"Enter revision handler" );
800 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text' ];
804 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
805 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
806 $this->reader->localName ==
'revision' ) {
810 $tag = $this->reader->localName;
812 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
813 $this, $pageInfo, $revisionInfo
816 } elseif ( in_array(
$tag, $normalFields ) ) {
818 } elseif (
$tag ==
'contributor' ) {
820 } elseif (
$tag !=
'#text' ) {
821 $this->
warn(
"Unhandled revision XML tag $tag" );
826 $pageInfo[
'revisionCount']++;
828 $pageInfo[
'successfulRevisionCount']++;
844 if ( ( !isset( $revisionInfo[
'model'] ) ||
845 in_array( $revisionInfo[
'model'], [
856 ( isset( $revisionInfo[
'id'] ) ?
857 "the revision with ID $revisionInfo[id]" :
859 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
864 if ( isset( $revisionInfo[
'id'] ) ) {
865 $revision->setID( $revisionInfo[
'id'] );
867 if ( isset( $revisionInfo[
'model'] ) ) {
868 $revision->setModel( $revisionInfo[
'model'] );
870 if ( isset( $revisionInfo[
'format'] ) ) {
871 $revision->setFormat( $revisionInfo[
'format'] );
873 $revision->setTitle( $pageInfo[
'_title'] );
875 if ( isset( $revisionInfo[
'text'] ) ) {
876 $handler = $revision->getContentHandler();
878 $revisionInfo[
'text'],
879 $revision->getFormat() );
881 $revision->setText( $text );
883 if ( isset( $revisionInfo[
'timestamp'] ) ) {
884 $revision->setTimestamp( $revisionInfo[
'timestamp'] );
889 if ( isset( $revisionInfo[
'comment'] ) ) {
890 $revision->setComment( $revisionInfo[
'comment'] );
893 if ( isset( $revisionInfo[
'minor'] ) ) {
894 $revision->setMinor(
true );
896 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
897 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
898 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
899 $revision->setUsername( $revisionInfo[
'contributor'][
'username'] );
901 $revision->setUsername(
'Unknown user' );
903 $revision->setNoUpdates( $this->mNoUpdates );
913 $this->
debug(
"Enter upload handler" );
916 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
917 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
921 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
922 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
923 $this->reader->localName ==
'upload' ) {
927 $tag = $this->reader->localName;
929 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
933 } elseif ( in_array(
$tag, $normalFields ) ) {
935 } elseif (
$tag ==
'contributor' ) {
937 } elseif (
$tag ==
'contents' ) {
939 $encoding = $this->reader->getAttribute(
'encoding' );
940 if ( $encoding ===
'base64' ) {
941 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
942 $uploadInfo[
'isTempSrc'] =
true;
944 } elseif (
$tag !=
'#text' ) {
945 $this->
warn(
"Unhandled upload XML tag $tag" );
950 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
951 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
952 if ( file_exists(
$path ) ) {
953 $uploadInfo[
'fileSrc'] =
$path;
954 $uploadInfo[
'isTempSrc'] =
false;
958 if ( $this->mImportUploads ) {
968 $filename = tempnam(
wfTempDir(),
'importupload' );
969 file_put_contents( $filename, $contents );
980 $text = isset( $uploadInfo[
'text'] ) ? $uploadInfo[
'text'] :
'';
982 $revision->setTitle( $pageInfo[
'_title'] );
983 $revision->setID( $pageInfo[
'id'] );
984 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
985 $revision->setText( $text );
986 $revision->setFilename( $uploadInfo[
'filename'] );
987 if ( isset( $uploadInfo[
'archivename'] ) ) {
988 $revision->setArchiveName( $uploadInfo[
'archivename'] );
990 $revision->setSrc( $uploadInfo[
'src'] );
991 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
992 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
993 !empty( $uploadInfo[
'isTempSrc'] ) );
995 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
996 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
998 $revision->setSize( intval( $uploadInfo[
'size'] ) );
999 $revision->setComment( $uploadInfo[
'comment'] );
1001 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1002 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1004 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1005 $revision->setUsername( $uploadInfo[
'contributor'][
'username'] );
1007 $revision->setNoUpdates( $this->mNoUpdates );
1009 return call_user_func( $this->mUploadCallback, $revision );
1016 $fields = [
'id',
'ip',
'username' ];
1019 if ( $this->reader->isEmptyElement ) {
1022 while ( $this->reader->read() ) {
1023 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1024 $this->reader->localName ==
'contributor' ) {
1028 $tag = $this->reader->localName;
1030 if ( in_array(
$tag, $fields ) ) {
1044 if ( is_null( $this->foreignNamespaces ) ) {
1048 $this->foreignNamespaces );
1051 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1054 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1057 $commandLineMode = $this->config->get(
'CommandLineMode' );
1058 if ( is_null(
$title ) ) {
1059 # Invalid page title? Ignore the page
1060 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1062 } elseif (
$title->isExternal() ) {
1063 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1065 } elseif ( !
$title->canExist() ) {
1066 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1068 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1069 # Do not import if the importing wiki user cannot edit this page
1070 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1072 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1073 # Do not import if the importing wiki user cannot create this page
1074 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1078 return [
$title, $foreignTitle ];