59 if ( !class_exists(
'XMLReader' ) ) {
60 throw new Exception(
'Import requires PHP to have been compiled with libxml support' );
63 $this->reader =
new XMLReader();
66 if ( !in_array(
'uploadsource', stream_get_wrappers() ) ) {
67 stream_wrapper_register(
'uploadsource',
'UploadSourceAdapter' );
73 $oldDisable = libxml_disable_entity_loader(
false );
74 if ( defined(
'LIBXML_PARSEHUGE' ) ) {
75 $status = $this->reader->open(
"uploadsource://$id",
null, LIBXML_PARSEHUGE );
77 $status = $this->reader->open(
"uploadsource://$id" );
80 $error = libxml_get_last_error();
81 libxml_disable_entity_loader( $oldDisable );
82 throw new MWException(
'Encountered an internal error while initializing WikiImporter object: ' .
85 libxml_disable_entity_loader( $oldDisable );
105 $this->
debug(
"FAILURE: $err" );
106 wfDebug(
"WikiImporter XML error: $err\n" );
110 if ( $this->mDebug ) {
115 public function warn( $data ) {
123 if ( is_callable( $this->mNoticeCallback ) ) {
124 call_user_func( $this->mNoticeCallback, $msg,
$params );
143 $this->mNoUpdates = $noupdates;
153 $this->pageOffset = $nthPage;
163 return wfSetVar( $this->mNoticeCallback, $callback );
173 $this->mPageCallback = $callback;
188 $this->mPageOutCallback = $callback;
199 $this->mRevisionCallback = $callback;
210 $this->mUploadCallback = $callback;
221 $this->mLogItemCallback = $callback;
232 $this->mSiteInfoCallback = $callback;
242 $this->importTitleFactory = $factory;
251 if ( is_null( $namespace ) ) {
259 $namespace = intval( $namespace );
274 if ( is_null( $rootpage ) ) {
277 } elseif ( $rootpage !==
'' ) {
278 $rootpage = rtrim( $rootpage,
'/' );
282 $status->fatal(
'import-rootpage-invalid' );
290 $status->fatal(
'import-rootpage-nosubpage', $displayNSText );
305 $this->mImageBasePath =
$dir;
312 $this->mImportUploads = $import;
330 $title = $titleAndForeignTitle[0];
332 $this->countableCache[
'title_' .
$title->getPrefixedText()] = $page->isCountable();
342 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
343 $this->
notice(
'import-error-bad-location',
344 $revision->getTitle()->getPrefixedText(),
346 $revision->getModel(),
347 $revision->getFormat() );
353 return $revision->importOldRevision();
355 $this->
notice(
'import-error-unserialize',
356 $revision->getTitle()->getPrefixedText(),
358 $revision->getModel(),
359 $revision->getFormat() );
371 return $revision->importLogItem();
380 return $revision->importUpload();
393 $sRevCount, $pageInfo
403 $page->loadPageData(
'fromdbmaster' );
404 $content = $page->getContent();
405 if ( $content ===
null ) {
406 wfDebug( __METHOD__ .
': Skipping article count adjustment for ' .
$title .
407 ' because WikiPage::getContent() returned null' );
409 $editInfo = $page->prepareContentForEdit( $content );
410 $countKey =
'title_' .
$title->getPrefixedText();
411 $countable = $page->isCountable( $editInfo );
412 if ( array_key_exists( $countKey, $this->countableCache ) &&
413 $countable != $this->countableCache[$countKey] ) {
415 'articles' => ( (
int)$countable - (
int)$this->countableCache[$countKey] )
421 $args = func_get_args();
430 $this->
debug(
"Got revision:" );
431 if ( is_object( $revision->title ) ) {
432 $this->
debug(
"-- Title: " . $revision->title->getPrefixedText() );
434 $this->
debug(
"-- Title: <invalid>" );
436 $this->
debug(
"-- User: " . $revision->user_text );
437 $this->
debug(
"-- Timestamp: " . $revision->timestamp );
438 $this->
debug(
"-- Comment: " . $revision->comment );
439 $this->
debug(
"-- Text: " . $revision->text );
448 if ( isset( $this->mSiteInfoCallback ) ) {
449 return call_user_func_array( $this->mSiteInfoCallback,
450 [ $siteInfo, $this ] );
461 if ( isset( $this->mPageCallback ) ) {
462 call_user_func( $this->mPageCallback,
$title );
475 $sucCount, $pageInfo ) {
476 if ( isset( $this->mPageOutCallback ) ) {
477 $args = func_get_args();
478 call_user_func_array( $this->mPageOutCallback,
$args );
488 if ( isset( $this->mRevisionCallback ) ) {
489 return call_user_func_array( $this->mRevisionCallback,
490 [ $revision, $this ] );
502 if ( isset( $this->mLogItemCallback ) ) {
503 return call_user_func_array( $this->mLogItemCallback,
504 [ $revision, $this ] );
517 return $this->reader->getAttribute( $attr );
528 if ( $this->reader->isEmptyElement ) {
532 while ( $this->reader->read() ) {
533 switch ( $this->reader->nodeType ) {
534 case XMLReader::TEXT:
535 case XMLReader::CDATA:
536 case XMLReader::SIGNIFICANT_WHITESPACE:
537 $buffer .= $this->reader->value;
539 case XMLReader::END_ELEMENT:
544 $this->reader->close();
557 $oldDisable = libxml_disable_entity_loader(
true );
558 $this->reader->read();
560 if ( $this->reader->localName !=
'mediawiki' ) {
561 libxml_disable_entity_loader( $oldDisable );
562 throw new MWException(
"Expected <mediawiki> tag, got " .
563 $this->reader->localName );
565 $this->
debug(
"<mediawiki> tag is correct." );
567 $this->
debug(
"Starting primary dump processing loop." );
569 $keepReading = $this->reader->read();
574 while ( $keepReading ) {
575 $tag = $this->reader->localName;
576 if ( $this->pageOffset ) {
577 if ( $tag ===
'page' ) {
580 if ( $pageCount < $this->pageOffset ) {
581 $keepReading = $this->reader->next();
585 $type = $this->reader->nodeType;
587 if ( !
Hooks::run(
'ImportHandleToplevelXMLTag', [ $this ] ) ) {
589 } elseif ( $tag ==
'mediawiki' &&
$type == XMLReader::END_ELEMENT ) {
591 } elseif ( $tag ==
'siteinfo' ) {
593 } elseif ( $tag ==
'page' ) {
595 } elseif ( $tag ==
'logitem' ) {
597 } elseif ( $tag !=
'#text' ) {
598 $this->
warn(
"Unhandled top-level XML tag $tag" );
604 $keepReading = $this->reader->next();
606 $this->
debug(
"Skip" );
608 $keepReading = $this->reader->read();
611 }
catch ( Exception $ex ) {
616 libxml_disable_entity_loader( $oldDisable );
617 $this->reader->close();
627 $this->
debug(
"Enter site info handler." );
631 $normalFields = [
'sitename',
'base',
'generator',
'case' ];
633 while ( $this->reader->read() ) {
634 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
635 $this->reader->localName ==
'siteinfo' ) {
639 $tag = $this->reader->localName;
641 if ( $tag ==
'namespace' ) {
644 } elseif ( in_array( $tag, $normalFields ) ) {
654 $this->
debug(
"Enter log item handler." );
658 $normalFields = [
'id',
'comment',
'type',
'action',
'timestamp',
659 'logtitle',
'params' ];
661 while ( $this->reader->read() ) {
662 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
663 $this->reader->localName ==
'logitem' ) {
667 $tag = $this->reader->localName;
669 if ( !
Hooks::run(
'ImportHandleLogItemXMLTag', [
673 } elseif ( in_array( $tag, $normalFields ) ) {
675 } elseif ( $tag ==
'contributor' ) {
677 } elseif ( $tag !=
'#text' ) {
678 $this->
warn(
"Unhandled log-item XML tag $tag" );
692 if ( isset( $logInfo[
'id'] ) ) {
693 $revision->setID( $logInfo[
'id'] );
695 $revision->setType( $logInfo[
'type'] );
696 $revision->setAction( $logInfo[
'action'] );
697 if ( isset( $logInfo[
'timestamp'] ) ) {
698 $revision->setTimestamp( $logInfo[
'timestamp'] );
700 if ( isset( $logInfo[
'params'] ) ) {
701 $revision->setParams( $logInfo[
'params'] );
703 if ( isset( $logInfo[
'logtitle'] ) ) {
709 $revision->setNoUpdates( $this->mNoUpdates );
711 if ( isset( $logInfo[
'comment'] ) ) {
712 $revision->setComment( $logInfo[
'comment'] );
715 if ( isset( $logInfo[
'contributor'][
'ip'] ) ) {
716 $revision->setUserIP( $logInfo[
'contributor'][
'ip'] );
719 if ( !isset( $logInfo[
'contributor'][
'username'] ) ) {
720 $revision->setUsername(
'Unknown user' );
722 $revision->setUsername( $logInfo[
'contributor'][
'username'] );
730 $this->
debug(
"Enter page handler." );
731 $pageInfo = [
'revisionCount' => 0,
'successfulRevisionCount' => 0 ];
734 $normalFields = [
'title',
'ns',
'id',
'redirect',
'restrictions' ];
739 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
740 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
741 $this->reader->localName ==
'page' ) {
747 $tag = $this->reader->localName;
752 } elseif ( !
Hooks::run(
'ImportHandlePageXMLTag', [ $this,
755 } elseif ( in_array( $tag, $normalFields ) ) {
763 if ( $tag ==
'redirect' ) {
768 } elseif ( $tag ==
'revision' || $tag ==
'upload' ) {
771 isset( $pageInfo[
'ns'] ) ? $pageInfo[
'ns'] :
null );
774 if ( is_array(
$title ) ) {
776 list( $pageInfo[
'_title'], $foreignTitle ) =
$title;
784 if ( $tag ==
'revision' ) {
790 } elseif ( $tag !=
'#text' ) {
791 $this->
warn(
"Unhandled page XML tag $tag" );
801 if ( array_key_exists(
'_title', $pageInfo ) ) {
803 $pageInfo[
'revisionCount'],
804 $pageInfo[
'successfulRevisionCount'],
813 $this->
debug(
"Enter revision handler" );
816 $normalFields = [
'id',
'timestamp',
'comment',
'minor',
'model',
'format',
'text',
'sha1' ];
820 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
821 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
822 $this->reader->localName ==
'revision' ) {
826 $tag = $this->reader->localName;
828 if ( !
Hooks::run(
'ImportHandleRevisionXMLTag', [
829 $this, $pageInfo, $revisionInfo
832 } elseif ( in_array( $tag, $normalFields ) ) {
834 } elseif ( $tag ==
'contributor' ) {
836 } elseif ( $tag !=
'#text' ) {
837 $this->
warn(
"Unhandled revision XML tag $tag" );
842 $pageInfo[
'revisionCount']++;
844 $pageInfo[
'successfulRevisionCount']++;
860 if ( ( !isset( $revisionInfo[
'model'] ) ||
861 in_array( $revisionInfo[
'model'], [
872 ( isset( $revisionInfo[
'id'] ) ?
873 "the revision with ID $revisionInfo[id]" :
875 ) .
" exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
880 if ( isset( $revisionInfo[
'id'] ) ) {
881 $revision->setID( $revisionInfo[
'id'] );
883 if ( isset( $revisionInfo[
'model'] ) ) {
884 $revision->setModel( $revisionInfo[
'model'] );
886 if ( isset( $revisionInfo[
'format'] ) ) {
887 $revision->setFormat( $revisionInfo[
'format'] );
889 $revision->setTitle( $pageInfo[
'_title'] );
891 if ( isset( $revisionInfo[
'text'] ) ) {
892 $handler = $revision->getContentHandler();
894 $revisionInfo[
'text'],
895 $revision->getFormat() );
897 $revision->setText( $text );
899 if ( isset( $revisionInfo[
'timestamp'] ) ) {
900 $revision->setTimestamp( $revisionInfo[
'timestamp'] );
905 if ( isset( $revisionInfo[
'comment'] ) ) {
906 $revision->setComment( $revisionInfo[
'comment'] );
909 if ( isset( $revisionInfo[
'minor'] ) ) {
910 $revision->setMinor(
true );
912 if ( isset( $revisionInfo[
'contributor'][
'ip'] ) ) {
913 $revision->setUserIP( $revisionInfo[
'contributor'][
'ip'] );
914 } elseif ( isset( $revisionInfo[
'contributor'][
'username'] ) ) {
915 $revision->setUsername( $revisionInfo[
'contributor'][
'username'] );
917 $revision->setUsername(
'Unknown user' );
919 if ( isset( $revisionInfo[
'sha1'] ) ) {
920 $revision->setSha1Base36( $revisionInfo[
'sha1'] );
922 $revision->setNoUpdates( $this->mNoUpdates );
932 $this->
debug(
"Enter upload handler" );
935 $normalFields = [
'timestamp',
'comment',
'filename',
'text',
936 'src',
'size',
'sha1base36',
'archivename',
'rel' ];
940 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
941 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
942 $this->reader->localName ==
'upload' ) {
946 $tag = $this->reader->localName;
948 if ( !
Hooks::run(
'ImportHandleUploadXMLTag', [
952 } elseif ( in_array( $tag, $normalFields ) ) {
954 } elseif ( $tag ==
'contributor' ) {
956 } elseif ( $tag ==
'contents' ) {
958 $encoding = $this->reader->getAttribute(
'encoding' );
959 if ( $encoding ===
'base64' ) {
960 $uploadInfo[
'fileSrc'] = $this->
dumpTemp( base64_decode( $contents ) );
961 $uploadInfo[
'isTempSrc'] =
true;
963 } elseif ( $tag !=
'#text' ) {
964 $this->
warn(
"Unhandled upload XML tag $tag" );
969 if ( $this->mImageBasePath && isset( $uploadInfo[
'rel'] ) ) {
970 $path =
"{$this->mImageBasePath}/{$uploadInfo['rel']}";
971 if ( file_exists(
$path ) ) {
972 $uploadInfo[
'fileSrc'] =
$path;
973 $uploadInfo[
'isTempSrc'] =
false;
977 if ( $this->mImportUploads ) {
987 $filename = tempnam(
wfTempDir(),
'importupload' );
988 file_put_contents( $filename, $contents );
999 $text = isset( $uploadInfo[
'text'] ) ? $uploadInfo[
'text'] :
'';
1001 $revision->setTitle( $pageInfo[
'_title'] );
1002 $revision->setID( $pageInfo[
'id'] );
1003 $revision->setTimestamp( $uploadInfo[
'timestamp'] );
1004 $revision->setText( $text );
1005 $revision->setFilename( $uploadInfo[
'filename'] );
1006 if ( isset( $uploadInfo[
'archivename'] ) ) {
1007 $revision->setArchiveName( $uploadInfo[
'archivename'] );
1009 $revision->setSrc( $uploadInfo[
'src'] );
1010 if ( isset( $uploadInfo[
'fileSrc'] ) ) {
1011 $revision->setFileSrc( $uploadInfo[
'fileSrc'],
1012 !empty( $uploadInfo[
'isTempSrc'] ) );
1014 if ( isset( $uploadInfo[
'sha1base36'] ) ) {
1015 $revision->setSha1Base36( $uploadInfo[
'sha1base36'] );
1017 $revision->setSize( intval( $uploadInfo[
'size'] ) );
1018 $revision->setComment( $uploadInfo[
'comment'] );
1020 if ( isset( $uploadInfo[
'contributor'][
'ip'] ) ) {
1021 $revision->setUserIP( $uploadInfo[
'contributor'][
'ip'] );
1023 if ( isset( $uploadInfo[
'contributor'][
'username'] ) ) {
1024 $revision->setUsername( $uploadInfo[
'contributor'][
'username'] );
1026 $revision->setNoUpdates( $this->mNoUpdates );
1028 return call_user_func( $this->mUploadCallback, $revision );
1035 $fields = [
'id',
'ip',
'username' ];
1038 if ( $this->reader->isEmptyElement ) {
1041 while ( $this->reader->read() ) {
1042 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1043 $this->reader->localName ==
'contributor' ) {
1047 $tag = $this->reader->localName;
1049 if ( in_array( $tag, $fields ) ) {
1063 if ( is_null( $this->foreignNamespaces ) ) {
1067 $this->foreignNamespaces );
1070 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1073 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1076 $commandLineMode = $this->config->get(
'CommandLineMode' );
1077 if ( is_null(
$title ) ) {
1078 # Invalid page title? Ignore the page
1079 $this->
notice(
'import-error-invalid', $foreignTitle->getFullText() );
1081 } elseif (
$title->isExternal() ) {
1082 $this->
notice(
'import-error-interwiki',
$title->getPrefixedText() );
1084 } elseif ( !
$title->canExist() ) {
1085 $this->
notice(
'import-error-special',
$title->getPrefixedText() );
1087 } elseif ( !
$title->userCan(
'edit' ) && !$commandLineMode ) {
1088 # Do not import if the importing wiki user cannot edit this page
1089 $this->
notice(
'import-error-edit',
$title->getPrefixedText() );
1091 } elseif ( !
$title->exists() && !
$title->userCan(
'create' ) && !$commandLineMode ) {
1092 # Do not import if the importing wiki user cannot create this page
1093 $this->
notice(
'import-error-create',
$title->getPrefixedText() );
1097 return [
$title, $foreignTitle ];