62 if ( !class_exists(
'XMLReader' ) ) {
63 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
66 $this->reader =
new XMLReader();
69 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
76 $oldDisable = libxml_disable_entity_loader(
false );
77 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
78 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
80 $status = $this->reader->open(
"uploadsource://$id" );
83 $error = libxml_get_last_error();
84 libxml_disable_entity_loader( $oldDisable );
85 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
88 libxml_disable_entity_loader( $oldDisable );
109 $this->
debug(
"FAILURE: $err" );
110 wfDebug(
"WikiImporter XML error: $err\n" );
114 if ( $this->mDebug ) {
119 public function warn( $data ) {
127 if ( is_callable( $this->mNoticeCallback ) ) {
128 call_user_func( $this->mNoticeCallback, $msg,
$params );
149 $this->mNoUpdates = $noupdates;
159 $this->pageOffset = $nthPage;
169 return wfSetVar( $this->mNoticeCallback, $callback );
179 $this->mPageCallback = $callback;
194 $this->mPageOutCallback = $callback;
205 $this->mRevisionCallback = $callback;
216 $this->mUploadCallback = $callback;
227 $this->mLogItemCallback = $callback;
238 $this->mSiteInfoCallback = $callback;
248 $this->importTitleFactory = $factory;
257 if ( is_null( $namespace ) ) {
265 $namespace = intval( $namespace );
280 if ( is_null( $rootpage ) ) {
283 } elseif ( $rootpage !==
'' ) {
284 $rootpage = rtrim( $rootpage,
'/' );
288 $status->fatal(
'import-rootpage-invalid' );
293 : MediaWikiServices::getInstance()->getContentLanguage()->
294 getNsText(
$title->getNamespace() );
295 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
310 $this->mImageBasePath = $dir;
317 $this->mImportUploads = $import;
326 $this->externalUserNames =
new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
344 $title = $titleAndForeignTitle[0];
346 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
356 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
357 $this->
notice(
'import-error-bad-location',
358 $revision->getTitle()->getPrefixedText(),
360 $revision->getModel(),
361 $revision->getFormat() );
367 return $revision->importOldRevision();
369 $this->
notice(
'import-error-unserialize',
370 $revision->getTitle()->getPrefixedText(),
372 $revision->getModel(),
373 $revision->getFormat() );
385 return $revision->importLogItem();
394 return $revision->importUpload();
407 $sRevCount, $pageInfo
417 $page->loadPageData(
'fromdbmaster' );
420 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
421 ' because WikiPage::getContent() returned null' );
423 $editInfo = $page->prepareContentForEdit(
$content );
424 $countKey =
'title_' .
$title->getPrefixedText();
425 $countable = $page->isCountable( $editInfo );
426 if ( array_key_exists( $countKey, $this->countableCache ) &&
427 $countable != $this->countableCache[$countKey] ) {
429 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
435 $args = func_get_args();
444 $this->
debug(
"Got revision:" );
445 if ( is_object( $revision->title ) ) {
446 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
448 $this->
debug(
"-- Title: <invalid>" );
450 $this->
debug(
"-- User: " . $revision->user_text );
451 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
452 $this->
debug(
"-- Comment: " . $revision->comment );
453 $this->
debug(
"-- Text: " . $revision->text );
462 if ( isset( $this->mSiteInfoCallback ) ) {
463 return call_user_func_array( $this->mSiteInfoCallback,
464 [ $siteInfo, $this ] );
475 if ( isset( $this->mPageCallback ) ) {
476 call_user_func( $this->mPageCallback,
$title );
489 $sucCount, $pageInfo ) {
490 if ( isset( $this->mPageOutCallback ) ) {
491 $args = func_get_args();
492 call_user_func_array( $this->mPageOutCallback,
$args );
502 if ( isset( $this->mRevisionCallback ) ) {
503 return call_user_func_array( $this->mRevisionCallback,
504 [ $revision, $this ] );
516 if ( isset( $this->mLogItemCallback ) ) {
517 return call_user_func_array( $this->mLogItemCallback,
518 [ $revision, $this ] );
531 return $this->reader->getAttribute( $attr );
542 if ( $this->reader->isEmptyElement ) {
546 while ( $this->reader->read() ) {
547 switch ( $this->reader->nodeType ) {
548 case XMLReader::TEXT:
549 case XMLReader::CDATA:
550 case XMLReader::SIGNIFICANT_WHITESPACE:
551 $buffer .= $this->reader->value;
553 case XMLReader::END_ELEMENT:
558 $this->reader->close();
572 $oldDisable = libxml_disable_entity_loader(
true );
573 $this->reader->read();
575 if ( $this->reader->localName !=
'mediawiki' ) {
576 libxml_disable_entity_loader( $oldDisable );
577 throw new MWException(
"Expected <mediawiki> tag, got " .
578 $this->reader->localName );
580 $this->
debug(
"<mediawiki> tag is correct." );
582 $this->
debug(
"Starting primary dump processing loop." );
584 $keepReading = $this->reader->read();
589 while ( $keepReading ) {
590 $tag = $this->reader->localName;
591 if ( $this->pageOffset ) {
592 if ( $tag ===
'page' ) {
595 if ( $pageCount < $this->pageOffset ) {
596 $keepReading = $this->reader->next();
600 $type = $this->reader->nodeType;
602 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
604 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
606 } elseif ( $tag ==
'siteinfo' ) {
608 } elseif ( $tag ==
'page' ) {
610 } elseif ( $tag ==
'logitem' ) {
612 } elseif ( $tag !=
'#text' ) {
613 $this->
warn(
"Unhandled top-level XML tag $tag" );
619 $keepReading = $this->reader->next();
621 $this->
debug(
"Skip" );
623 $keepReading = $this->reader->read();
626 }
catch ( Exception $ex ) {
631 libxml_disable_entity_loader( $oldDisable );
632 $this->reader->close();
642 $this->
debug(
"Enter site info handler." );
646 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
648 while ( $this->reader->read() ) {
649 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
650 $this->reader->localName ==
'siteinfo' ) {
654 $tag = $this->reader->localName;
656 if ( $tag ==
'namespace' ) {
659 } elseif ( in_array( $tag, $normalFields ) ) {
669 $this->
debug(
"Enter log item handler." );
673 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
674 'logtitle',
'params' ];
676 while ( $this->reader->read() ) {
677 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
678 $this->reader->localName ==
'logitem' ) {
682 $tag = $this->reader->localName;
684 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
688 } elseif ( in_array( $tag, $normalFields ) ) {
690 } elseif ( $tag ==
'contributor' ) {
692 } elseif ( $tag !=
'#text' ) {
693 $this->
warn(
"Unhandled log-item XML tag $tag" );
707 if ( isset( $logInfo[
'id'] ) ) {
708 $revision->setID( $logInfo[
'id'] );
710 $revision->setType( $logInfo[
'type'] );
711 $revision->setAction( $logInfo[
'action'] );
712 if ( isset( $logInfo[
'timestamp'] ) ) {
713 $revision->setTimestamp( $logInfo[
'timestamp'] );
715 if ( isset( $logInfo[
'params'] ) ) {
716 $revision->setParams( $logInfo[
'params'] );
718 if ( isset( $logInfo[
'logtitle'] ) ) {
724 $revision->setNoUpdates( $this->mNoUpdates );
726 if ( isset( $logInfo[
'comment'] ) ) {
727 $revision->setComment( $logInfo[
'comment'] );
730 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
731 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
734 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
735 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
737 $revision->setUsername(
738 $this->externalUserNames->applyPrefix( $logInfo[
'contributor'][
'username'] )
747 $this->
debug(
"Enter page handler." );
748 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
751 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
756 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
757 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
758 $this->reader->localName ==
'page' ) {
764 $tag = $this->reader->localName;
769 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
772 } elseif ( in_array( $tag, $normalFields ) ) {
780 if ( $tag ==
'redirect' ) {
785 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
788 $pageInfo[
'ns'] ??
null );
791 if ( is_array(
$title ) ) {
793 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
801 if ( $tag ==
'revision' ) {
807 } elseif ( $tag !=
'#text' ) {
808 $this->
warn(
"Unhandled page XML tag $tag" );
818 if ( array_key_exists(
'_title', $pageInfo ) ) {
820 $pageInfo[
'revisionCount'],
821 $pageInfo[
'successfulRevisionCount'],
830 $this->
debug(
"Enter revision handler" );
833 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text',
'sha1' ];
837 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
838 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
839 $this->reader->localName ==
'revision' ) {
843 $tag = $this->reader->localName;
845 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
846 $this, $pageInfo, $revisionInfo
849 } elseif ( in_array( $tag, $normalFields ) ) {
851 } elseif ( $tag ==
'contributor' ) {
853 } elseif ( $tag !=
'#text' ) {
854 $this->
warn(
"Unhandled revision XML tag $tag" );
859 $pageInfo[
'revisionCount']++;
861 $pageInfo[
'successfulRevisionCount']++;
878 if ( ( !isset( $revisionInfo[
'model'] ) ||
879 in_array( $revisionInfo[
'model'], [
890 ( isset( $revisionInfo[
'id'] ) ?
891 "the revision with ID $revisionInfo[id]" :
893 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
898 if ( isset( $revisionInfo[
'id'] ) ) {
899 $revision->setID( $revisionInfo[
'id'] );
901 if ( isset( $revisionInfo[
'model'] ) ) {
902 $revision->setModel( $revisionInfo[
'model'] );
904 if ( isset( $revisionInfo[
'format'] ) ) {
905 $revision->setFormat( $revisionInfo[
'format'] );
907 $revision->setTitle( $pageInfo[
'_title'] );
909 if ( isset( $revisionInfo[
'text'] ) ) {
910 $handler = $revision->getContentHandler();
912 $revisionInfo[
'text'],
913 $revision->getFormat() );
915 $revision->setText( $text );
917 if ( isset( $revisionInfo[
'timestamp'] ) ) {
918 $revision->setTimestamp( $revisionInfo[
'timestamp'] );
923 if ( isset( $revisionInfo[
'comment'] ) ) {
924 $revision->setComment( $revisionInfo[
'comment'] );
927 if ( isset( $revisionInfo[
'minor'] ) ) {
928 $revision->setMinor(
true );
930 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
931 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
932 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
933 $revision->setUsername(
934 $this->externalUserNames->applyPrefix( $revisionInfo[
'contributor'][
'username'] )
937 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
939 if ( isset( $revisionInfo[
'sha1'] ) ) {
940 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
942 $revision->setNoUpdates( $this->mNoUpdates );
952 $this->
debug(
"Enter upload handler" );
955 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
956 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
960 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
961 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
962 $this->reader->localName ==
'upload' ) {
966 $tag = $this->reader->localName;
968 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
972 } elseif ( in_array( $tag, $normalFields ) ) {
974 } elseif ( $tag ==
'contributor' ) {
976 } elseif ( $tag ==
'contents' ) {
978 $encoding = $this->reader->getAttribute(
'encoding' );
979 if ( $encoding ===
'base64' ) {
980 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
981 $uploadInfo[
'isTempSrc'] =
true;
983 } elseif ( $tag !=
'#text' ) {
984 $this->
warn(
"Unhandled upload XML tag $tag" );
989 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
990 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
991 if ( file_exists(
$path ) ) {
992 $uploadInfo[
'fileSrc'] =
$path;
993 $uploadInfo[
'isTempSrc'] =
false;
997 if ( $this->mImportUploads ) {
1007 $filename = tempnam(
wfTempDir(),
'importupload' );
1008 file_put_contents( $filename, $contents );
1019 $text = $uploadInfo[
'text'] ??
'';
1021 $revision->setTitle( $pageInfo[
'_title'] );
1022 $revision->setID( $pageInfo[
'id'] );
1023 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1024 $revision->setText( $text );
1025 $revision->setFilename( $uploadInfo[
'filename'] );
1026 if ( isset( $uploadInfo[
'archivename'] ) ) {
1027 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1029 $revision->setSrc( $uploadInfo[
'src'] );
1030 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1031 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1032 !empty( $uploadInfo[
'isTempSrc'] ) );
1034 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1035 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1037 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1038 $revision->setComment( $uploadInfo[
'comment'] );
1040 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1041 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1043 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1044 $revision->setUsername(
1045 $this->externalUserNames->applyPrefix( $uploadInfo[
'contributor'][
'username'] )
1048 $revision->setNoUpdates( $this->mNoUpdates );
1050 return call_user_func( $this->mUploadCallback, $revision );
1057 $fields = [
'id',
'ip',
'username' ];
1060 if ( $this->reader->isEmptyElement ) {
1063 while ( $this->reader->read() ) {
1064 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1065 $this->reader->localName ==
'contributor' ) {
1069 $tag = $this->reader->localName;
1071 if ( in_array( $tag, $fields ) ) {
1085 if ( is_null( $this->foreignNamespaces ) ) {
1089 $this->foreignNamespaces );
1092 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1095 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1098 $commandLineMode = $this->config->get(
'CommandLineMode' );
1099 if ( is_null(
$title ) ) {
1100 # Invalid page title? Ignore the page
1101 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1103 } elseif (
$title->isExternal() ) {
1104 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1106 } elseif ( !
$title->canExist() ) {
1107 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1109 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1110 # Do not import if the importing wiki user cannot edit this page
1111 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1113 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1114 # Do not import if the importing wiki user cannot create this page
1115 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1119 return [
$title, $foreignTitle ];