55 if ( !class_exists(
'XMLReader' ) ) {
56 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
59 $this->reader =
new XMLReader();
61 wfDeprecated( __METHOD__ .
' without a Config instance',
'1.25' );
66 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
67 stream_wrapper_register(
'uploadsource',
'UploadSourceAdapter' );
73 $oldDisable = libxml_disable_entity_loader(
false );
74 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
75 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
77 $status = $this->reader->open(
"uploadsource://$id" );
80 $error = libxml_get_last_error();
81 libxml_disable_entity_loader( $oldDisable );
82 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
85 libxml_disable_entity_loader( $oldDisable );
105 $this->
debug(
"FAILURE: $err" );
106 wfDebug(
"WikiImporter XML error: $err\n" );
110 if ( $this->mDebug ) {
115 public function warn( $data ) {
123 if ( is_callable( $this->mNoticeCallback ) ) {
124 call_user_func( $this->mNoticeCallback, $msg,
$params );
143 $this->mNoUpdates = $noupdates;
153 return wfSetVar( $this->mNoticeCallback, $callback );
163 $this->mPageCallback = $callback;
178 $this->mPageOutCallback = $callback;
189 $this->mRevisionCallback = $callback;
200 $this->mUploadCallback = $callback;
211 $this->mLogItemCallback = $callback;
222 $this->mSiteInfoCallback = $callback;
232 $this->importTitleFactory = $factory;
241 if ( is_null( $namespace ) ) {
249 $namespace = intval( $namespace );
264 if ( is_null( $rootpage ) ) {
267 } elseif ( $rootpage !==
'' ) {
268 $rootpage = rtrim( $rootpage,
'/' );
272 $status->fatal(
'import-rootpage-invalid' );
280 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
295 $this->mImageBasePath =
$dir;
302 $this->mImportUploads = $import;
312 $title = $titleAndForeignTitle[0];
314 $this->countableCache[
'title_' .
$title->getPrefixedText()] =
$page->isCountable();
324 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
325 $this->
notice(
'import-error-bad-location',
326 $revision->getTitle()->getPrefixedText(),
328 $revision->getModel(),
329 $revision->getFormat() );
335 return $revision->importOldRevision();
337 $this->
notice(
'import-error-unserialize',
338 $revision->getTitle()->getPrefixedText(),
340 $revision->getModel(),
341 $revision->getFormat() );
353 return $revision->importLogItem();
362 return $revision->importUpload();
375 $sRevCount, $pageInfo ) {
384 $page->loadPageData(
'fromdbmaster' );
387 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
388 ' because WikiPage::getContent() returned null' );
391 $countKey =
'title_' .
$title->getPrefixedText();
392 $countable =
$page->isCountable( $editInfo );
393 if ( array_key_exists( $countKey, $this->countableCache ) &&
394 $countable != $this->countableCache[$countKey] ) {
396 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
401 $args = func_get_args();
410 $this->
debug(
"Got revision:" );
411 if ( is_object( $revision->title ) ) {
412 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
414 $this->
debug(
"-- Title: <invalid>" );
416 $this->
debug(
"-- User: " . $revision->user_text );
417 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
418 $this->
debug(
"-- Comment: " . $revision->comment );
419 $this->
debug(
"-- Text: " . $revision->text );
428 if ( isset( $this->mSiteInfoCallback ) ) {
429 return call_user_func_array( $this->mSiteInfoCallback,
430 [ $siteInfo, $this ] );
441 if ( isset( $this->mPageCallback ) ) {
442 call_user_func( $this->mPageCallback,
$title );
455 $sucCount, $pageInfo ) {
456 if ( isset( $this->mPageOutCallback ) ) {
457 $args = func_get_args();
458 call_user_func_array( $this->mPageOutCallback,
$args );
468 if ( isset( $this->mRevisionCallback ) ) {
469 return call_user_func_array( $this->mRevisionCallback,
470 [ $revision, $this ] );
482 if ( isset( $this->mLogItemCallback ) ) {
483 return call_user_func_array( $this->mLogItemCallback,
484 [ $revision, $this ] );
497 return $this->reader->getAttribute( $attr );
508 if ( $this->reader->isEmptyElement ) {
512 while ( $this->reader->read() ) {
513 switch ( $this->reader->nodeType ) {
514 case XMLReader::TEXT:
515 case XMLReader::CDATA:
516 case XMLReader::SIGNIFICANT_WHITESPACE:
517 $buffer .= $this->reader->value;
519 case XMLReader::END_ELEMENT:
524 $this->reader->close();
537 $oldDisable = libxml_disable_entity_loader(
true );
538 $this->reader->read();
540 if ( $this->reader->localName !=
'mediawiki' ) {
541 libxml_disable_entity_loader( $oldDisable );
542 throw new MWException(
"Expected <mediawiki> tag, got " .
543 $this->reader->localName );
545 $this->
debug(
"<mediawiki> tag is correct." );
547 $this->
debug(
"Starting primary dump processing loop." );
549 $keepReading = $this->reader->read();
553 while ( $keepReading ) {
554 $tag = $this->reader->localName;
555 $type = $this->reader->nodeType;
557 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
559 } elseif (
$tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
561 } elseif (
$tag ==
'siteinfo' ) {
563 } elseif (
$tag ==
'page' ) {
565 } elseif (
$tag ==
'logitem' ) {
567 } elseif (
$tag !=
'#text' ) {
568 $this->
warn(
"Unhandled top-level XML tag $tag" );
574 $keepReading = $this->reader->next();
576 $this->
debug(
"Skip" );
578 $keepReading = $this->reader->read();
581 }
catch ( Exception $ex ) {
586 libxml_disable_entity_loader( $oldDisable );
587 $this->reader->close();
597 $this->
debug(
"Enter site info handler." );
601 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
603 while ( $this->reader->read() ) {
604 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
605 $this->reader->localName ==
'siteinfo' ) {
609 $tag = $this->reader->localName;
611 if (
$tag ==
'namespace' ) {
614 } elseif ( in_array(
$tag, $normalFields ) ) {
624 $this->
debug(
"Enter log item handler." );
628 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
629 'logtitle',
'params' ];
631 while ( $this->reader->read() ) {
632 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
633 $this->reader->localName ==
'logitem' ) {
637 $tag = $this->reader->localName;
639 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
643 } elseif ( in_array(
$tag, $normalFields ) ) {
645 } elseif (
$tag ==
'contributor' ) {
647 } elseif (
$tag !=
'#text' ) {
648 $this->
warn(
"Unhandled log-item XML tag $tag" );
663 if ( isset( $logInfo[
'id'] ) ) {
664 $revision->setID( $logInfo[
'id'] );
666 $revision->setType( $logInfo[
'type'] );
667 $revision->setAction( $logInfo[
'action'] );
668 if ( isset( $logInfo[
'timestamp'] ) ) {
669 $revision->setTimestamp( $logInfo[
'timestamp'] );
671 if ( isset( $logInfo[
'params'] ) ) {
672 $revision->setParams( $logInfo[
'params'] );
674 if ( isset( $logInfo[
'logtitle'] ) ) {
680 $revision->setNoUpdates( $this->mNoUpdates );
682 if ( isset( $logInfo[
'comment'] ) ) {
683 $revision->setComment( $logInfo[
'comment'] );
686 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
687 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
690 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
691 $revision->setUsername(
'Unknown user' );
693 $revision->setUsername( $logInfo[
'contributor'][
'username'] );
701 $this->
debug(
"Enter page handler." );
702 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
705 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
710 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
711 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
712 $this->reader->localName ==
'page' ) {
718 $tag = $this->reader->localName;
723 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
726 } elseif ( in_array(
$tag, $normalFields ) ) {
734 if (
$tag ==
'redirect' ) {
739 } elseif (
$tag ==
'revision' ||
$tag ==
'upload' ) {
742 isset( $pageInfo[
'ns'] ) ? $pageInfo[
'ns'] :
null );
745 if ( is_array(
$title ) ) {
747 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
755 if (
$tag ==
'revision' ) {
761 } elseif (
$tag !=
'#text' ) {
762 $this->
warn(
"Unhandled page XML tag $tag" );
772 if ( array_key_exists(
'_title', $pageInfo ) ) {
774 $pageInfo[
'revisionCount'],
775 $pageInfo[
'successfulRevisionCount'],
784 $this->
debug(
"Enter revision handler" );
787 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text' ];
791 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
792 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
793 $this->reader->localName ==
'revision' ) {
797 $tag = $this->reader->localName;
799 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
800 $this, $pageInfo, $revisionInfo
803 } elseif ( in_array(
$tag, $normalFields ) ) {
805 } elseif (
$tag ==
'contributor' ) {
807 } elseif (
$tag !=
'#text' ) {
808 $this->
warn(
"Unhandled revision XML tag $tag" );
813 $pageInfo[
'revisionCount']++;
815 $pageInfo[
'successfulRevisionCount']++;
831 if ( ( !isset( $revisionInfo[
'model'] ) ||
832 in_array( $revisionInfo[
'model'], [
843 ( isset( $revisionInfo[
'id'] ) ?
844 "the revision with ID $revisionInfo[id]" :
846 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
851 if ( isset( $revisionInfo[
'id'] ) ) {
852 $revision->setID( $revisionInfo[
'id'] );
854 if ( isset( $revisionInfo[
'model'] ) ) {
855 $revision->setModel( $revisionInfo[
'model'] );
857 if ( isset( $revisionInfo[
'format'] ) ) {
858 $revision->setFormat( $revisionInfo[
'format'] );
860 $revision->setTitle( $pageInfo[
'_title'] );
862 if ( isset( $revisionInfo[
'text'] ) ) {
863 $handler = $revision->getContentHandler();
865 $revisionInfo[
'text'],
866 $revision->getFormat() );
868 $revision->setText( $text );
870 if ( isset( $revisionInfo[
'timestamp'] ) ) {
871 $revision->setTimestamp( $revisionInfo[
'timestamp'] );
876 if ( isset( $revisionInfo[
'comment'] ) ) {
877 $revision->setComment( $revisionInfo[
'comment'] );
880 if ( isset( $revisionInfo[
'minor'] ) ) {
881 $revision->setMinor(
true );
883 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
884 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
885 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
886 $revision->setUsername( $revisionInfo[
'contributor'][
'username'] );
888 $revision->setUsername(
'Unknown user' );
890 $revision->setNoUpdates( $this->mNoUpdates );
900 $this->
debug(
"Enter upload handler" );
903 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
904 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
908 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
909 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
910 $this->reader->localName ==
'upload' ) {
914 $tag = $this->reader->localName;
916 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
920 } elseif ( in_array(
$tag, $normalFields ) ) {
922 } elseif (
$tag ==
'contributor' ) {
924 } elseif (
$tag ==
'contents' ) {
926 $encoding = $this->reader->getAttribute(
'encoding' );
927 if ( $encoding ===
'base64' ) {
928 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
929 $uploadInfo[
'isTempSrc'] =
true;
931 } elseif (
$tag !=
'#text' ) {
932 $this->
warn(
"Unhandled upload XML tag $tag" );
937 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
938 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
939 if ( file_exists(
$path ) ) {
940 $uploadInfo[
'fileSrc'] =
$path;
941 $uploadInfo[
'isTempSrc'] =
false;
945 if ( $this->mImportUploads ) {
955 $filename = tempnam(
wfTempDir(),
'importupload' );
956 file_put_contents( $filename, $contents );
967 $text = isset( $uploadInfo[
'text'] ) ? $uploadInfo[
'text'] :
'';
969 $revision->setTitle( $pageInfo[
'_title'] );
970 $revision->setID( $pageInfo[
'id'] );
971 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
972 $revision->setText( $text );
973 $revision->setFilename( $uploadInfo[
'filename'] );
974 if ( isset( $uploadInfo[
'archivename'] ) ) {
975 $revision->setArchiveName( $uploadInfo[
'archivename'] );
977 $revision->setSrc( $uploadInfo[
'src'] );
978 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
979 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
980 !empty( $uploadInfo[
'isTempSrc'] ) );
982 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
983 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
985 $revision->setSize( intval( $uploadInfo[
'size'] ) );
986 $revision->setComment( $uploadInfo[
'comment'] );
988 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
989 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
991 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
992 $revision->setUsername( $uploadInfo[
'contributor'][
'username'] );
994 $revision->setNoUpdates( $this->mNoUpdates );
996 return call_user_func( $this->mUploadCallback, $revision );
1003 $fields = [
'id',
'ip',
'username' ];
1006 if ( $this->reader->isEmptyElement ) {
1009 while ( $this->reader->read() ) {
1010 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1011 $this->reader->localName ==
'contributor' ) {
1015 $tag = $this->reader->localName;
1017 if ( in_array(
$tag, $fields ) ) {
1031 if ( is_null( $this->foreignNamespaces ) ) {
1035 $this->foreignNamespaces );
1038 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1041 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1044 $commandLineMode = $this->config->get(
'CommandLineMode' );
1045 if ( is_null(
$title ) ) {
1046 # Invalid page title? Ignore the page
1047 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1049 } elseif (
$title->isExternal() ) {
1050 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1052 } elseif ( !
$title->canExist() ) {
1053 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1055 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1056 # Do not import if the importing wiki user cannot edit this page
1057 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1059 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1060 # Do not import if the importing wiki user cannot create this page
1061 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1065 return [
$title, $foreignTitle ];