MediaWiki REL1_31
WikiImporter.php
Go to the documentation of this file.
1<?php
34 private $reader = null;
35 private $foreignNamespaces = null;
40 private $mNoUpdates = false;
41 private $pageOffset = 0;
43 private $config;
47 private $countableCache = [];
49 private $disableStatisticsUpdate = false;
52
60 if ( !class_exists( 'XMLReader' ) ) {
61 throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
62 }
63
64 $this->reader = new XMLReader();
65 $this->config = $config;
66
67 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
68 stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
69 }
71
72 // Enable the entity loader, as it is needed for loading external URLs via
73 // XMLReader::open (T86036)
74 $oldDisable = libxml_disable_entity_loader( false );
75 if ( defined( 'LIBXML_PARSEHUGE' ) ) {
76 $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
77 } else {
78 $status = $this->reader->open( "uploadsource://$id" );
79 }
80 if ( !$status ) {
81 $error = libxml_get_last_error();
82 libxml_disable_entity_loader( $oldDisable );
83 throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
84 $error->message );
85 }
86 libxml_disable_entity_loader( $oldDisable );
87
88 // Default callbacks
89 $this->setPageCallback( [ $this, 'beforeImportPage' ] );
90 $this->setRevisionCallback( [ $this, "importRevision" ] );
91 $this->setUploadCallback( [ $this, 'importUpload' ] );
92 $this->setLogItemCallback( [ $this, 'importLogItem' ] );
93 $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
94
95 $this->importTitleFactory = new NaiveImportTitleFactory();
96 $this->externalUserNames = new ExternalUserNames( 'imported', false );
97 }
98
102 public function getReader() {
103 return $this->reader;
104 }
105
106 public function throwXmlError( $err ) {
107 $this->debug( "FAILURE: $err" );
108 wfDebug( "WikiImporter XML error: $err\n" );
109 }
110
111 public function debug( $data ) {
112 if ( $this->mDebug ) {
113 wfDebug( "IMPORT: $data\n" );
114 }
115 }
116
117 public function warn( $data ) {
118 wfDebug( "IMPORT: $data\n" );
119 }
120
121 public function notice( $msg /*, $param, ...*/ ) {
122 $params = func_get_args();
123 array_shift( $params );
124
125 if ( is_callable( $this->mNoticeCallback ) ) {
126 call_user_func( $this->mNoticeCallback, $msg, $params );
127 } else { # No ImportReporter -> CLI
128 // T177997: the command line importers should call setNoticeCallback()
129 // for their own custom callback to echo the notice
130 wfDebug( wfMessage( $msg, $params )->text() . "\n" );
131 }
132 }
133
138 function setDebug( $debug ) {
139 $this->mDebug = $debug;
140 }
141
146 function setNoUpdates( $noupdates ) {
147 $this->mNoUpdates = $noupdates;
148 }
149
156 function setPageOffset( $nthPage ) {
157 $this->pageOffset = $nthPage;
158 }
159
166 public function setNoticeCallback( $callback ) {
167 return wfSetVar( $this->mNoticeCallback, $callback );
168 }
169
175 public function setPageCallback( $callback ) {
176 $previous = $this->mPageCallback;
177 $this->mPageCallback = $callback;
178 return $previous;
179 }
180
190 public function setPageOutCallback( $callback ) {
191 $previous = $this->mPageOutCallback;
192 $this->mPageOutCallback = $callback;
193 return $previous;
194 }
195
201 public function setRevisionCallback( $callback ) {
202 $previous = $this->mRevisionCallback;
203 $this->mRevisionCallback = $callback;
204 return $previous;
205 }
206
212 public function setUploadCallback( $callback ) {
213 $previous = $this->mUploadCallback;
214 $this->mUploadCallback = $callback;
215 return $previous;
216 }
217
223 public function setLogItemCallback( $callback ) {
224 $previous = $this->mLogItemCallback;
225 $this->mLogItemCallback = $callback;
226 return $previous;
227 }
228
234 public function setSiteInfoCallback( $callback ) {
235 $previous = $this->mSiteInfoCallback;
236 $this->mSiteInfoCallback = $callback;
237 return $previous;
238 }
239
245 public function setImportTitleFactory( $factory ) {
246 $this->importTitleFactory = $factory;
247 }
248
254 public function setTargetNamespace( $namespace ) {
255 if ( is_null( $namespace ) ) {
256 // Don't override namespaces
258 return true;
259 } elseif (
260 $namespace >= 0 &&
261 MWNamespace::exists( intval( $namespace ) )
262 ) {
263 $namespace = intval( $namespace );
264 $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
265 return true;
266 } else {
267 return false;
268 }
269 }
270
276 public function setTargetRootPage( $rootpage ) {
277 $status = Status::newGood();
278 if ( is_null( $rootpage ) ) {
279 // No rootpage
281 } elseif ( $rootpage !== '' ) {
282 $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
283 $title = Title::newFromText( $rootpage );
284
285 if ( !$title || $title->isExternal() ) {
286 $status->fatal( 'import-rootpage-invalid' );
287 } else {
288 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
290
291 $displayNSText = $title->getNamespace() == NS_MAIN
292 ? wfMessage( 'blanknamespace' )->text()
293 : $wgContLang->getNsText( $title->getNamespace() );
294 $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
295 } else {
296 // set namespace to 'all', so the namespace check in processTitle() can pass
297 $this->setTargetNamespace( null );
299 }
300 }
301 }
302 return $status;
303 }
304
308 public function setImageBasePath( $dir ) {
309 $this->mImageBasePath = $dir;
310 }
311
315 public function setImportUploads( $import ) {
316 $this->mImportUploads = $import;
317 }
318
324 public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
325 $this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
326 }
327
332 public function disableStatisticsUpdate() {
333 $this->disableStatisticsUpdate = true;
334 }
335
342 public function beforeImportPage( $titleAndForeignTitle ) {
343 $title = $titleAndForeignTitle[0];
344 $page = WikiPage::factory( $title );
345 $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
346 return true;
347 }
348
354 public function importRevision( $revision ) {
355 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
356 $this->notice( 'import-error-bad-location',
357 $revision->getTitle()->getPrefixedText(),
358 $revision->getID(),
359 $revision->getModel(),
360 $revision->getFormat() );
361
362 return false;
363 }
364
365 try {
366 return $revision->importOldRevision();
367 } catch ( MWContentSerializationException $ex ) {
368 $this->notice( 'import-error-unserialize',
369 $revision->getTitle()->getPrefixedText(),
370 $revision->getID(),
371 $revision->getModel(),
372 $revision->getFormat() );
373 }
374
375 return false;
376 }
377
383 public function importLogItem( $revision ) {
384 return $revision->importLogItem();
385 }
386
392 public function importUpload( $revision ) {
393 return $revision->importUpload();
394 }
395
405 public function finishImportPage( $title, $foreignTitle, $revCount,
406 $sRevCount, $pageInfo
407 ) {
408 // Update article count statistics (T42009)
409 // The normal counting logic in WikiPage->doEditUpdates() is designed for
410 // one-revision-at-a-time editing, not bulk imports. In this situation it
411 // suffers from issues of replica DB lag. We let WikiPage handle the total page
412 // and revision count, and we implement our own custom logic for the
413 // article (content page) count.
414 if ( !$this->disableStatisticsUpdate ) {
415 $page = WikiPage::factory( $title );
416 $page->loadPageData( 'fromdbmaster' );
417 $content = $page->getContent();
418 if ( $content === null ) {
419 wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
420 ' because WikiPage::getContent() returned null' );
421 } else {
422 $editInfo = $page->prepareContentForEdit( $content );
423 $countKey = 'title_' . $title->getPrefixedText();
424 $countable = $page->isCountable( $editInfo );
425 if ( array_key_exists( $countKey, $this->countableCache ) &&
426 $countable != $this->countableCache[$countKey] ) {
427 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
428 'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
429 ] ) );
430 }
431 }
432 }
433
434 $args = func_get_args();
435 return Hooks::run( 'AfterImportPage', $args );
436 }
437
442 public function debugRevisionHandler( &$revision ) {
443 $this->debug( "Got revision:" );
444 if ( is_object( $revision->title ) ) {
445 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
446 } else {
447 $this->debug( "-- Title: <invalid>" );
448 }
449 $this->debug( "-- User: " . $revision->user_text );
450 $this->debug( "-- Timestamp: " . $revision->timestamp );
451 $this->debug( "-- Comment: " . $revision->comment );
452 $this->debug( "-- Text: " . $revision->text );
453 }
454
460 private function siteInfoCallback( $siteInfo ) {
461 if ( isset( $this->mSiteInfoCallback ) ) {
462 return call_user_func_array( $this->mSiteInfoCallback,
463 [ $siteInfo, $this ] );
464 } else {
465 return false;
466 }
467 }
468
473 function pageCallback( $title ) {
474 if ( isset( $this->mPageCallback ) ) {
475 call_user_func( $this->mPageCallback, $title );
476 }
477 }
478
487 private function pageOutCallback( $title, $foreignTitle, $revCount,
488 $sucCount, $pageInfo ) {
489 if ( isset( $this->mPageOutCallback ) ) {
490 $args = func_get_args();
491 call_user_func_array( $this->mPageOutCallback, $args );
492 }
493 }
494
500 private function revisionCallback( $revision ) {
501 if ( isset( $this->mRevisionCallback ) ) {
502 return call_user_func_array( $this->mRevisionCallback,
503 [ $revision, $this ] );
504 } else {
505 return false;
506 }
507 }
508
514 private function logItemCallback( $revision ) {
515 if ( isset( $this->mLogItemCallback ) ) {
516 return call_user_func_array( $this->mLogItemCallback,
517 [ $revision, $this ] );
518 } else {
519 return false;
520 }
521 }
522
529 public function nodeAttribute( $attr ) {
530 return $this->reader->getAttribute( $attr );
531 }
532
540 public function nodeContents() {
541 if ( $this->reader->isEmptyElement ) {
542 return "";
543 }
544 $buffer = "";
545 while ( $this->reader->read() ) {
546 switch ( $this->reader->nodeType ) {
547 case XMLReader::TEXT:
548 case XMLReader::CDATA:
549 case XMLReader::SIGNIFICANT_WHITESPACE:
550 $buffer .= $this->reader->value;
551 break;
552 case XMLReader::END_ELEMENT:
553 return $buffer;
554 }
555 }
556
557 $this->reader->close();
558 return '';
559 }
560
567 public function doImport() {
568 // Calls to reader->read need to be wrapped in calls to
569 // libxml_disable_entity_loader() to avoid local file
570 // inclusion attacks (T48932).
571 $oldDisable = libxml_disable_entity_loader( true );
572 $this->reader->read();
573
574 if ( $this->reader->localName != 'mediawiki' ) {
575 libxml_disable_entity_loader( $oldDisable );
576 throw new MWException( "Expected <mediawiki> tag, got " .
577 $this->reader->localName );
578 }
579 $this->debug( "<mediawiki> tag is correct." );
580
581 $this->debug( "Starting primary dump processing loop." );
582
583 $keepReading = $this->reader->read();
584 $skip = false;
585 $rethrow = null;
586 $pageCount = 0;
587 try {
588 while ( $keepReading ) {
589 $tag = $this->reader->localName;
590 if ( $this->pageOffset ) {
591 if ( $tag === 'page' ) {
592 $pageCount++;
593 }
594 if ( $pageCount < $this->pageOffset ) {
595 $keepReading = $this->reader->next();
596 continue;
597 }
598 }
599 $type = $this->reader->nodeType;
600
601 if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
602 // Do nothing
603 } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
604 break;
605 } elseif ( $tag == 'siteinfo' ) {
606 $this->handleSiteInfo();
607 } elseif ( $tag == 'page' ) {
608 $this->handlePage();
609 } elseif ( $tag == 'logitem' ) {
610 $this->handleLogItem();
611 } elseif ( $tag != '#text' ) {
612 $this->warn( "Unhandled top-level XML tag $tag" );
613
614 $skip = true;
615 }
616
617 if ( $skip ) {
618 $keepReading = $this->reader->next();
619 $skip = false;
620 $this->debug( "Skip" );
621 } else {
622 $keepReading = $this->reader->read();
623 }
624 }
625 } catch ( Exception $ex ) {
626 $rethrow = $ex;
627 }
628
629 // finally
630 libxml_disable_entity_loader( $oldDisable );
631 $this->reader->close();
632
633 if ( $rethrow ) {
634 throw $rethrow;
635 }
636
637 return true;
638 }
639
640 private function handleSiteInfo() {
641 $this->debug( "Enter site info handler." );
642 $siteInfo = [];
643
644 // Fields that can just be stuffed in the siteInfo object
645 $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
646
647 while ( $this->reader->read() ) {
648 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
649 $this->reader->localName == 'siteinfo' ) {
650 break;
651 }
652
653 $tag = $this->reader->localName;
654
655 if ( $tag == 'namespace' ) {
656 $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
657 $this->nodeContents();
658 } elseif ( in_array( $tag, $normalFields ) ) {
659 $siteInfo[$tag] = $this->nodeContents();
660 }
661 }
662
663 $siteInfo['_namespaces'] = $this->foreignNamespaces;
664 $this->siteInfoCallback( $siteInfo );
665 }
666
667 private function handleLogItem() {
668 $this->debug( "Enter log item handler." );
669 $logInfo = [];
670
671 // Fields that can just be stuffed in the pageInfo object
672 $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
673 'logtitle', 'params' ];
674
675 while ( $this->reader->read() ) {
676 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
677 $this->reader->localName == 'logitem' ) {
678 break;
679 }
680
681 $tag = $this->reader->localName;
682
683 if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
684 $this, $logInfo
685 ] ) ) {
686 // Do nothing
687 } elseif ( in_array( $tag, $normalFields ) ) {
688 $logInfo[$tag] = $this->nodeContents();
689 } elseif ( $tag == 'contributor' ) {
690 $logInfo['contributor'] = $this->handleContributor();
691 } elseif ( $tag != '#text' ) {
692 $this->warn( "Unhandled log-item XML tag $tag" );
693 }
694 }
695
696 $this->processLogItem( $logInfo );
697 }
698
703 private function processLogItem( $logInfo ) {
704 $revision = new WikiRevision( $this->config );
705
706 if ( isset( $logInfo['id'] ) ) {
707 $revision->setID( $logInfo['id'] );
708 }
709 $revision->setType( $logInfo['type'] );
710 $revision->setAction( $logInfo['action'] );
711 if ( isset( $logInfo['timestamp'] ) ) {
712 $revision->setTimestamp( $logInfo['timestamp'] );
713 }
714 if ( isset( $logInfo['params'] ) ) {
715 $revision->setParams( $logInfo['params'] );
716 }
717 if ( isset( $logInfo['logtitle'] ) ) {
718 // @todo Using Title for non-local titles is a recipe for disaster.
719 // We should use ForeignTitle here instead.
720 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
721 }
722
723 $revision->setNoUpdates( $this->mNoUpdates );
724
725 if ( isset( $logInfo['comment'] ) ) {
726 $revision->setComment( $logInfo['comment'] );
727 }
728
729 if ( isset( $logInfo['contributor']['ip'] ) ) {
730 $revision->setUserIP( $logInfo['contributor']['ip'] );
731 }
732
733 if ( !isset( $logInfo['contributor']['username'] ) ) {
734 $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
735 } else {
736 $revision->setUsername(
737 $this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
738 );
739 }
740
741 return $this->logItemCallback( $revision );
742 }
743
744 private function handlePage() {
745 // Handle page data.
746 $this->debug( "Enter page handler." );
747 $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
748
749 // Fields that can just be stuffed in the pageInfo object
750 $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
751
752 $skip = false;
753 $badTitle = false;
754
755 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
756 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
757 $this->reader->localName == 'page' ) {
758 break;
759 }
760
761 $skip = false;
762
763 $tag = $this->reader->localName;
764
765 if ( $badTitle ) {
766 // The title is invalid, bail out of this page
767 $skip = true;
768 } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
769 &$pageInfo ] ) ) {
770 // Do nothing
771 } elseif ( in_array( $tag, $normalFields ) ) {
772 // An XML snippet:
773 // <page>
774 // <id>123</id>
775 // <title>Page</title>
776 // <redirect title="NewTitle"/>
777 // ...
778 // Because the redirect tag is built differently, we need special handling for that case.
779 if ( $tag == 'redirect' ) {
780 $pageInfo[$tag] = $this->nodeAttribute( 'title' );
781 } else {
782 $pageInfo[$tag] = $this->nodeContents();
783 }
784 } elseif ( $tag == 'revision' || $tag == 'upload' ) {
785 if ( !isset( $title ) ) {
786 $title = $this->processTitle( $pageInfo['title'],
787 isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
788
789 // $title is either an array of two titles or false.
790 if ( is_array( $title ) ) {
791 $this->pageCallback( $title );
792 list( $pageInfo['_title'], $foreignTitle ) = $title;
793 } else {
794 $badTitle = true;
795 $skip = true;
796 }
797 }
798
799 if ( $title ) {
800 if ( $tag == 'revision' ) {
801 $this->handleRevision( $pageInfo );
802 } else {
803 $this->handleUpload( $pageInfo );
804 }
805 }
806 } elseif ( $tag != '#text' ) {
807 $this->warn( "Unhandled page XML tag $tag" );
808 $skip = true;
809 }
810 }
811
812 // @note $pageInfo is only set if a valid $title is processed above with
813 // no error. If we have a valid $title, then pageCallback is called
814 // above, $pageInfo['title'] is set and we do pageOutCallback here.
815 // If $pageInfo['_title'] is not set, then $foreignTitle is also not
816 // set since they both come from $title above.
817 if ( array_key_exists( '_title', $pageInfo ) ) {
818 $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
819 $pageInfo['revisionCount'],
820 $pageInfo['successfulRevisionCount'],
821 $pageInfo );
822 }
823 }
824
828 private function handleRevision( &$pageInfo ) {
829 $this->debug( "Enter revision handler" );
830 $revisionInfo = [];
831
832 $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text', 'sha1' ];
833
834 $skip = false;
835
836 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
837 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
838 $this->reader->localName == 'revision' ) {
839 break;
840 }
841
842 $tag = $this->reader->localName;
843
844 if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
845 $this, $pageInfo, $revisionInfo
846 ] ) ) {
847 // Do nothing
848 } elseif ( in_array( $tag, $normalFields ) ) {
849 $revisionInfo[$tag] = $this->nodeContents();
850 } elseif ( $tag == 'contributor' ) {
851 $revisionInfo['contributor'] = $this->handleContributor();
852 } elseif ( $tag != '#text' ) {
853 $this->warn( "Unhandled revision XML tag $tag" );
854 $skip = true;
855 }
856 }
857
858 $pageInfo['revisionCount']++;
859 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
860 $pageInfo['successfulRevisionCount']++;
861 }
862 }
863
870 private function processRevision( $pageInfo, $revisionInfo ) {
872
873 // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
874 // database errors and instability. Testing for revisions with only listed
875 // content models, as other content models might use serialization formats
876 // which aren't checked against $wgMaxArticleSize.
877 if ( ( !isset( $revisionInfo['model'] ) ||
878 in_array( $revisionInfo['model'], [
879 'wikitext',
880 'css',
881 'json',
882 'javascript',
883 'text',
884 ''
885 ] ) ) &&
886 strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
887 ) {
888 throw new MWException( 'The text of ' .
889 ( isset( $revisionInfo['id'] ) ?
890 "the revision with ID $revisionInfo[id]" :
891 'a revision'
892 ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
893 }
894
895 $revision = new WikiRevision( $this->config );
896
897 if ( isset( $revisionInfo['id'] ) ) {
898 $revision->setID( $revisionInfo['id'] );
899 }
900 if ( isset( $revisionInfo['model'] ) ) {
901 $revision->setModel( $revisionInfo['model'] );
902 }
903 if ( isset( $revisionInfo['format'] ) ) {
904 $revision->setFormat( $revisionInfo['format'] );
905 }
906 $revision->setTitle( $pageInfo['_title'] );
907
908 if ( isset( $revisionInfo['text'] ) ) {
909 $handler = $revision->getContentHandler();
910 $text = $handler->importTransform(
911 $revisionInfo['text'],
912 $revision->getFormat() );
913
914 $revision->setText( $text );
915 }
916 if ( isset( $revisionInfo['timestamp'] ) ) {
917 $revision->setTimestamp( $revisionInfo['timestamp'] );
918 } else {
919 $revision->setTimestamp( wfTimestampNow() );
920 }
921
922 if ( isset( $revisionInfo['comment'] ) ) {
923 $revision->setComment( $revisionInfo['comment'] );
924 }
925
926 if ( isset( $revisionInfo['minor'] ) ) {
927 $revision->setMinor( true );
928 }
929 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
930 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
931 } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
932 $revision->setUsername(
933 $this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
934 );
935 } else {
936 $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
937 }
938 if ( isset( $revisionInfo['sha1'] ) ) {
939 $revision->setSha1Base36( $revisionInfo['sha1'] );
940 }
941 $revision->setNoUpdates( $this->mNoUpdates );
942
943 return $this->revisionCallback( $revision );
944 }
945
950 private function handleUpload( &$pageInfo ) {
951 $this->debug( "Enter upload handler" );
952 $uploadInfo = [];
953
954 $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
955 'src', 'size', 'sha1base36', 'archivename', 'rel' ];
956
957 $skip = false;
958
959 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
960 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
961 $this->reader->localName == 'upload' ) {
962 break;
963 }
964
965 $tag = $this->reader->localName;
966
967 if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
968 $this, $pageInfo
969 ] ) ) {
970 // Do nothing
971 } elseif ( in_array( $tag, $normalFields ) ) {
972 $uploadInfo[$tag] = $this->nodeContents();
973 } elseif ( $tag == 'contributor' ) {
974 $uploadInfo['contributor'] = $this->handleContributor();
975 } elseif ( $tag == 'contents' ) {
976 $contents = $this->nodeContents();
977 $encoding = $this->reader->getAttribute( 'encoding' );
978 if ( $encoding === 'base64' ) {
979 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
980 $uploadInfo['isTempSrc'] = true;
981 }
982 } elseif ( $tag != '#text' ) {
983 $this->warn( "Unhandled upload XML tag $tag" );
984 $skip = true;
985 }
986 }
987
988 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
989 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
990 if ( file_exists( $path ) ) {
991 $uploadInfo['fileSrc'] = $path;
992 $uploadInfo['isTempSrc'] = false;
993 }
994 }
995
996 if ( $this->mImportUploads ) {
997 return $this->processUpload( $pageInfo, $uploadInfo );
998 }
999 }
1000
1005 private function dumpTemp( $contents ) {
1006 $filename = tempnam( wfTempDir(), 'importupload' );
1007 file_put_contents( $filename, $contents );
1008 return $filename;
1009 }
1010
1016 private function processUpload( $pageInfo, $uploadInfo ) {
1017 $revision = new WikiRevision( $this->config );
1018 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
1019
1020 $revision->setTitle( $pageInfo['_title'] );
1021 $revision->setID( $pageInfo['id'] );
1022 $revision->setTimestamp( $uploadInfo['timestamp'] );
1023 $revision->setText( $text );
1024 $revision->setFilename( $uploadInfo['filename'] );
1025 if ( isset( $uploadInfo['archivename'] ) ) {
1026 $revision->setArchiveName( $uploadInfo['archivename'] );
1027 }
1028 $revision->setSrc( $uploadInfo['src'] );
1029 if ( isset( $uploadInfo['fileSrc'] ) ) {
1030 $revision->setFileSrc( $uploadInfo['fileSrc'],
1031 !empty( $uploadInfo['isTempSrc'] ) );
1032 }
1033 if ( isset( $uploadInfo['sha1base36'] ) ) {
1034 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1035 }
1036 $revision->setSize( intval( $uploadInfo['size'] ) );
1037 $revision->setComment( $uploadInfo['comment'] );
1038
1039 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
1040 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1041 }
1042 if ( isset( $uploadInfo['contributor']['username'] ) ) {
1043 $revision->setUsername(
1044 $this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
1045 );
1046 }
1047 $revision->setNoUpdates( $this->mNoUpdates );
1048
1049 return call_user_func( $this->mUploadCallback, $revision );
1050 }
1051
1055 private function handleContributor() {
1056 $fields = [ 'id', 'ip', 'username' ];
1057 $info = [];
1058
1059 if ( $this->reader->isEmptyElement ) {
1060 return $info;
1061 }
1062 while ( $this->reader->read() ) {
1063 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1064 $this->reader->localName == 'contributor' ) {
1065 break;
1066 }
1067
1068 $tag = $this->reader->localName;
1069
1070 if ( in_array( $tag, $fields ) ) {
1071 $info[$tag] = $this->nodeContents();
1072 }
1073 }
1074
1075 return $info;
1076 }
1077
1083 private function processTitle( $text, $ns = null ) {
1084 if ( is_null( $this->foreignNamespaces ) ) {
1085 $foreignTitleFactory = new NaiveForeignTitleFactory();
1086 } else {
1087 $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1088 $this->foreignNamespaces );
1089 }
1090
1091 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1092 intval( $ns ) );
1093
1094 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1095 $foreignTitle );
1096
1097 $commandLineMode = $this->config->get( 'CommandLineMode' );
1098 if ( is_null( $title ) ) {
1099 # Invalid page title? Ignore the page
1100 $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1101 return false;
1102 } elseif ( $title->isExternal() ) {
1103 $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1104 return false;
1105 } elseif ( !$title->canExist() ) {
1106 $this->notice( 'import-error-special', $title->getPrefixedText() );
1107 return false;
1108 } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1109 # Do not import if the importing wiki user cannot edit this page
1110 $this->notice( 'import-error-edit', $title->getPrefixedText() );
1111 return false;
1112 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1113 # Do not import if the importing wiki user cannot create this page
1114 $this->notice( 'import-error-create', $title->getPrefixedText() );
1115 return false;
1116 }
1117
1118 return [ $title, $foreignTitle ];
1119 }
1120}
$wgMaxArticleSize
Maximum article size in kilobytes.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTempDir()
Tries to get the system directory for temporary files.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
if( $line===false) $args
Definition cdb.php:64
Class to parse and build external user names.
Reporting callback.
Exception representing a failure to serialize or unserialize a content object.
MediaWiki exception.
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static factory(array $deltas)
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static registerSource(ImportSource $source)
XML file reader for the page data importer.
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
setImportUploads( $import)
doImport()
Primary entry point.
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
setUsernamePrefix( $usernamePrefix, $assignKnownUsers)
ExternalUserNames $externalUserNames
setNoUpdates( $noupdates)
Set 'no updates' mode.
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
importUpload( $revision)
Dummy for now...
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
dumpTemp( $contents)
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
processLogItem( $logInfo)
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
setPageOffset( $nthPage)
Sets 'pageOffset' value.
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
array $countableCache
importLogItem( $revision)
Default per-revision callback, performs the import.
setImageBasePath( $dir)
handleUpload(&$pageInfo)
handleRevision(&$pageInfo)
revisionCallback( $revision)
Notify the callback function of a revision.
logItemCallback( $revision)
Notify the callback function of a new log item.
throwXmlError( $err)
setDebug( $debug)
Set debug mode...
processRevision( $pageInfo, $revisionInfo)
processUpload( $pageInfo, $uploadInfo)
bool $disableStatisticsUpdate
processTitle( $text, $ns=null)
importRevision( $revision)
Default per-revision callback, performs the import.
ImportTitleFactory $importTitleFactory
__construct(ImportSource $source, Config $config)
Creates an ImportXMLReader drawing from the source provided.
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
setNoticeCallback( $callback)
Set a callback that displays notice messages.
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
disableStatisticsUpdate()
Statistics update can cause a lot of time.
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Represents a revision, log entry or upload during the import process.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
const NS_MAIN
Definition Defines.php:74
the array() calling protocol came about after MediaWiki 1.4rc1.
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:964
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action or null $user:User who performed the tagging when the tagging is subsequent to the action or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy:boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DatabaseOraclePostInit':Called after initialising an Oracle database $db:the DatabaseOracle object 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DeleteUnknownPreferences':Called by the cleanupPreferences.php maintenance script to build a WHERE clause with which to delete preferences that are not known about. This hook is used by extensions that have dynamically-named preferences that should not be deleted in the usual cleanup process. For example, the Gadgets extension creates preferences prefixed with 'gadget-', and so anything with that prefix is excluded from the deletion. &where:An array that will be passed as the $cond parameter to IDatabase::select() to determine what will be deleted from the user_properties table. $db:The IDatabase object, useful for accessing $db->buildLike() etc. 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition hooks.txt:1255
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition hooks.txt:903
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Interface for configuration instances.
Definition Config.php:28
Source interface for XML import.
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
$debug
Definition mcc.php:31
$source
$buffer
$params