60 if ( !class_exists(
'XMLReader' ) ) {
61 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
64 $this->reader =
new XMLReader();
67 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
74 $oldDisable = libxml_disable_entity_loader(
false );
75 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
76 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
78 $status = $this->reader->open(
"uploadsource://$id" );
81 $error = libxml_get_last_error();
82 libxml_disable_entity_loader( $oldDisable );
83 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
86 libxml_disable_entity_loader( $oldDisable );
107 $this->
debug(
"FAILURE: $err" );
108 wfDebug(
"WikiImporter XML error: $err\n" );
112 if ( $this->mDebug ) {
117 public function warn( $data ) {
125 if ( is_callable( $this->mNoticeCallback ) ) {
126 call_user_func( $this->mNoticeCallback, $msg,
$params );
147 $this->mNoUpdates = $noupdates;
157 $this->pageOffset = $nthPage;
167 return wfSetVar( $this->mNoticeCallback, $callback );
177 $this->mPageCallback = $callback;
192 $this->mPageOutCallback = $callback;
203 $this->mRevisionCallback = $callback;
214 $this->mUploadCallback = $callback;
225 $this->mLogItemCallback = $callback;
236 $this->mSiteInfoCallback = $callback;
246 $this->importTitleFactory = $factory;
255 if ( is_null( $namespace ) ) {
263 $namespace = intval( $namespace );
278 if ( is_null( $rootpage ) ) {
281 } elseif ( $rootpage !==
'' ) {
282 $rootpage = rtrim( $rootpage,
'/' );
286 $status->fatal(
'import-rootpage-invalid' );
294 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
309 $this->mImageBasePath = $dir;
316 $this->mImportUploads = $import;
325 $this->externalUserNames =
new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
343 $title = $titleAndForeignTitle[0];
345 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
355 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
356 $this->
notice(
'import-error-bad-location',
357 $revision->getTitle()->getPrefixedText(),
359 $revision->getModel(),
360 $revision->getFormat() );
366 return $revision->importOldRevision();
368 $this->
notice(
'import-error-unserialize',
369 $revision->getTitle()->getPrefixedText(),
371 $revision->getModel(),
372 $revision->getFormat() );
384 return $revision->importLogItem();
393 return $revision->importUpload();
406 $sRevCount, $pageInfo
416 $page->loadPageData(
'fromdbmaster' );
417 $content = $page->getContent();
418 if ( $content ===
null ) {
419 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
420 ' because WikiPage::getContent() returned null' );
422 $editInfo = $page->prepareContentForEdit( $content );
423 $countKey =
'title_' .
$title->getPrefixedText();
424 $countable = $page->isCountable( $editInfo );
425 if ( array_key_exists( $countKey, $this->countableCache ) &&
426 $countable != $this->countableCache[$countKey] ) {
428 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
434 $args = func_get_args();
443 $this->
debug(
"Got revision:" );
444 if ( is_object( $revision->title ) ) {
445 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
447 $this->
debug(
"-- Title: <invalid>" );
449 $this->
debug(
"-- User: " . $revision->user_text );
450 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
451 $this->
debug(
"-- Comment: " . $revision->comment );
452 $this->
debug(
"-- Text: " . $revision->text );
461 if ( isset( $this->mSiteInfoCallback ) ) {
462 return call_user_func_array( $this->mSiteInfoCallback,
463 [ $siteInfo, $this ] );
474 if ( isset( $this->mPageCallback ) ) {
475 call_user_func( $this->mPageCallback,
$title );
488 $sucCount, $pageInfo ) {
489 if ( isset( $this->mPageOutCallback ) ) {
490 $args = func_get_args();
491 call_user_func_array( $this->mPageOutCallback,
$args );
501 if ( isset( $this->mRevisionCallback ) ) {
502 return call_user_func_array( $this->mRevisionCallback,
503 [ $revision, $this ] );
515 if ( isset( $this->mLogItemCallback ) ) {
516 return call_user_func_array( $this->mLogItemCallback,
517 [ $revision, $this ] );
530 return $this->reader->getAttribute( $attr );
541 if ( $this->reader->isEmptyElement ) {
545 while ( $this->reader->read() ) {
546 switch ( $this->reader->nodeType ) {
547 case XMLReader::TEXT:
548 case XMLReader::CDATA:
549 case XMLReader::SIGNIFICANT_WHITESPACE:
550 $buffer .= $this->reader->value;
552 case XMLReader::END_ELEMENT:
557 $this->reader->close();
571 $oldDisable = libxml_disable_entity_loader(
true );
572 $this->reader->read();
574 if ( $this->reader->localName !=
'mediawiki' ) {
575 libxml_disable_entity_loader( $oldDisable );
576 throw new MWException(
"Expected <mediawiki> tag, got " .
577 $this->reader->localName );
579 $this->
debug(
"<mediawiki> tag is correct." );
581 $this->
debug(
"Starting primary dump processing loop." );
583 $keepReading = $this->reader->read();
588 while ( $keepReading ) {
589 $tag = $this->reader->localName;
590 if ( $this->pageOffset ) {
591 if ( $tag ===
'page' ) {
594 if ( $pageCount < $this->pageOffset ) {
595 $keepReading = $this->reader->next();
599 $type = $this->reader->nodeType;
601 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
603 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
605 } elseif ( $tag ==
'siteinfo' ) {
607 } elseif ( $tag ==
'page' ) {
609 } elseif ( $tag ==
'logitem' ) {
611 } elseif ( $tag !=
'#text' ) {
612 $this->
warn(
"Unhandled top-level XML tag $tag" );
618 $keepReading = $this->reader->next();
620 $this->
debug(
"Skip" );
622 $keepReading = $this->reader->read();
625 }
catch ( Exception $ex ) {
630 libxml_disable_entity_loader( $oldDisable );
631 $this->reader->close();
641 $this->
debug(
"Enter site info handler." );
645 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
647 while ( $this->reader->read() ) {
648 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
649 $this->reader->localName ==
'siteinfo' ) {
653 $tag = $this->reader->localName;
655 if ( $tag ==
'namespace' ) {
658 } elseif ( in_array( $tag, $normalFields ) ) {
668 $this->
debug(
"Enter log item handler." );
672 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
673 'logtitle',
'params' ];
675 while ( $this->reader->read() ) {
676 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
677 $this->reader->localName ==
'logitem' ) {
681 $tag = $this->reader->localName;
683 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
687 } elseif ( in_array( $tag, $normalFields ) ) {
689 } elseif ( $tag ==
'contributor' ) {
691 } elseif ( $tag !=
'#text' ) {
692 $this->
warn(
"Unhandled log-item XML tag $tag" );
706 if ( isset( $logInfo[
'id'] ) ) {
707 $revision->setID( $logInfo[
'id'] );
709 $revision->setType( $logInfo[
'type'] );
710 $revision->setAction( $logInfo[
'action'] );
711 if ( isset( $logInfo[
'timestamp'] ) ) {
712 $revision->setTimestamp( $logInfo[
'timestamp'] );
714 if ( isset( $logInfo[
'params'] ) ) {
715 $revision->setParams( $logInfo[
'params'] );
717 if ( isset( $logInfo[
'logtitle'] ) ) {
723 $revision->setNoUpdates( $this->mNoUpdates );
725 if ( isset( $logInfo[
'comment'] ) ) {
726 $revision->setComment( $logInfo[
'comment'] );
729 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
730 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
733 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
734 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
736 $revision->setUsername(
737 $this->externalUserNames->applyPrefix( $logInfo[
'contributor'][
'username'] )
746 $this->
debug(
"Enter page handler." );
747 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
750 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
755 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
756 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
757 $this->reader->localName ==
'page' ) {
763 $tag = $this->reader->localName;
768 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
771 } elseif ( in_array( $tag, $normalFields ) ) {
779 if ( $tag ==
'redirect' ) {
784 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
787 isset( $pageInfo[
'ns'] ) ? $pageInfo[
'ns'] :
null );
790 if ( is_array(
$title ) ) {
792 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
800 if ( $tag ==
'revision' ) {
806 } elseif ( $tag !=
'#text' ) {
807 $this->
warn(
"Unhandled page XML tag $tag" );
817 if ( array_key_exists(
'_title', $pageInfo ) ) {
819 $pageInfo[
'revisionCount'],
820 $pageInfo[
'successfulRevisionCount'],
829 $this->
debug(
"Enter revision handler" );
832 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text',
'sha1' ];
836 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
837 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
838 $this->reader->localName ==
'revision' ) {
842 $tag = $this->reader->localName;
844 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
845 $this, $pageInfo, $revisionInfo
848 } elseif ( in_array( $tag, $normalFields ) ) {
850 } elseif ( $tag ==
'contributor' ) {
852 } elseif ( $tag !=
'#text' ) {
853 $this->
warn(
"Unhandled revision XML tag $tag" );
858 $pageInfo[
'revisionCount']++;
860 $pageInfo[
'successfulRevisionCount']++;
877 if ( ( !isset( $revisionInfo[
'model'] ) ||
878 in_array( $revisionInfo[
'model'], [
889 ( isset( $revisionInfo[
'id'] ) ?
890 "the revision with ID $revisionInfo[id]" :
892 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
897 if ( isset( $revisionInfo[
'id'] ) ) {
898 $revision->setID( $revisionInfo[
'id'] );
900 if ( isset( $revisionInfo[
'model'] ) ) {
901 $revision->setModel( $revisionInfo[
'model'] );
903 if ( isset( $revisionInfo[
'format'] ) ) {
904 $revision->setFormat( $revisionInfo[
'format'] );
906 $revision->setTitle( $pageInfo[
'_title'] );
908 if ( isset( $revisionInfo[
'text'] ) ) {
909 $handler = $revision->getContentHandler();
911 $revisionInfo[
'text'],
912 $revision->getFormat() );
914 $revision->setText( $text );
916 if ( isset( $revisionInfo[
'timestamp'] ) ) {
917 $revision->setTimestamp( $revisionInfo[
'timestamp'] );
922 if ( isset( $revisionInfo[
'comment'] ) ) {
923 $revision->setComment( $revisionInfo[
'comment'] );
926 if ( isset( $revisionInfo[
'minor'] ) ) {
927 $revision->setMinor(
true );
929 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
930 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
931 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
932 $revision->setUsername(
933 $this->externalUserNames->applyPrefix( $revisionInfo[
'contributor'][
'username'] )
936 $revision->setUsername( $this->externalUserNames->addPrefix(
'Unknown user' ) );
938 if ( isset( $revisionInfo[
'sha1'] ) ) {
939 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
941 $revision->setNoUpdates( $this->mNoUpdates );
951 $this->
debug(
"Enter upload handler" );
954 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
955 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
959 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
960 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
961 $this->reader->localName ==
'upload' ) {
965 $tag = $this->reader->localName;
967 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
971 } elseif ( in_array( $tag, $normalFields ) ) {
973 } elseif ( $tag ==
'contributor' ) {
975 } elseif ( $tag ==
'contents' ) {
977 $encoding = $this->reader->getAttribute(
'encoding' );
978 if ( $encoding ===
'base64' ) {
979 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
980 $uploadInfo[
'isTempSrc'] =
true;
982 } elseif ( $tag !=
'#text' ) {
983 $this->
warn(
"Unhandled upload XML tag $tag" );
988 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
989 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
990 if ( file_exists(
$path ) ) {
991 $uploadInfo[
'fileSrc'] =
$path;
992 $uploadInfo[
'isTempSrc'] =
false;
996 if ( $this->mImportUploads ) {
1006 $filename = tempnam(
wfTempDir(),
'importupload' );
1007 file_put_contents( $filename, $contents );
1018 $text = isset( $uploadInfo[
'text'] ) ? $uploadInfo[
'text'] :
'';
1020 $revision->setTitle( $pageInfo[
'_title'] );
1021 $revision->setID( $pageInfo[
'id'] );
1022 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1023 $revision->setText( $text );
1024 $revision->setFilename( $uploadInfo[
'filename'] );
1025 if ( isset( $uploadInfo[
'archivename'] ) ) {
1026 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1028 $revision->setSrc( $uploadInfo[
'src'] );
1029 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1030 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1031 !empty( $uploadInfo[
'isTempSrc'] ) );
1033 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1034 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1036 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1037 $revision->setComment( $uploadInfo[
'comment'] );
1039 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1040 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1042 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1043 $revision->setUsername(
1044 $this->externalUserNames->applyPrefix( $uploadInfo[
'contributor'][
'username'] )
1047 $revision->setNoUpdates( $this->mNoUpdates );
1049 return call_user_func( $this->mUploadCallback, $revision );
1056 $fields = [
'id',
'ip',
'username' ];
1059 if ( $this->reader->isEmptyElement ) {
1062 while ( $this->reader->read() ) {
1063 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1064 $this->reader->localName ==
'contributor' ) {
1068 $tag = $this->reader->localName;
1070 if ( in_array( $tag, $fields ) ) {
1084 if ( is_null( $this->foreignNamespaces ) ) {
1088 $this->foreignNamespaces );
1091 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1094 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1097 $commandLineMode = $this->config->get(
'CommandLineMode' );
1098 if ( is_null(
$title ) ) {
1099 # Invalid page title? Ignore the page
1100 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1102 } elseif (
$title->isExternal() ) {
1103 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1105 } elseif ( !
$title->canExist() ) {
1106 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1108 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1109 # Do not import if the importing wiki user cannot edit this page
1110 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1112 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1113 # Do not import if the importing wiki user cannot create this page
1114 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1118 return [
$title, $foreignTitle ];