MediaWiki REL1_33
WikiImporter.php
Go to the documentation of this file.
1<?php
28
36 private $reader = null;
37 private $foreignNamespaces = null;
42 private $mNoUpdates = false;
43 private $pageOffset = 0;
45 private $config;
49 private $countableCache = [];
51 private $disableStatisticsUpdate = false;
54
62 if ( !class_exists( 'XMLReader' ) ) {
63 throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
64 }
65
66 $this->reader = new XMLReader();
67 $this->config = $config;
68
69 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
70 stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
71 }
73
74 // Enable the entity loader, as it is needed for loading external URLs via
75 // XMLReader::open (T86036)
76 $oldDisable = libxml_disable_entity_loader( false );
77 if ( defined( 'LIBXML_PARSEHUGE' ) ) {
78 $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
79 } else {
80 $status = $this->reader->open( "uploadsource://$id" );
81 }
82 if ( !$status ) {
83 $error = libxml_get_last_error();
84 libxml_disable_entity_loader( $oldDisable );
85 throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
86 $error->message );
87 }
88 libxml_disable_entity_loader( $oldDisable );
89
90 // Default callbacks
91 $this->setPageCallback( [ $this, 'beforeImportPage' ] );
92 $this->setRevisionCallback( [ $this, "importRevision" ] );
93 $this->setUploadCallback( [ $this, 'importUpload' ] );
94 $this->setLogItemCallback( [ $this, 'importLogItem' ] );
95 $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
96
97 $this->importTitleFactory = new NaiveImportTitleFactory();
98 $this->externalUserNames = new ExternalUserNames( 'imported', false );
99 }
100
104 public function getReader() {
105 return $this->reader;
106 }
107
108 public function throwXmlError( $err ) {
109 $this->debug( "FAILURE: $err" );
110 wfDebug( "WikiImporter XML error: $err\n" );
111 }
112
113 public function debug( $data ) {
114 if ( $this->mDebug ) {
115 wfDebug( "IMPORT: $data\n" );
116 }
117 }
118
119 public function warn( $data ) {
120 wfDebug( "IMPORT: $data\n" );
121 }
122
123 public function notice( $msg /*, $param, ...*/ ) {
126
127 if ( is_callable( $this->mNoticeCallback ) ) {
128 call_user_func( $this->mNoticeCallback, $msg, $params );
129 } else { # No ImportReporter -> CLI
130 // T177997: the command line importers should call setNoticeCallback()
131 // for their own custom callback to echo the notice
132 wfDebug( wfMessage( $msg, $params )->text() . "\n" );
133 }
134 }
135
140 function setDebug( $debug ) {
141 $this->mDebug = $debug;
142 }
143
148 function setNoUpdates( $noupdates ) {
149 $this->mNoUpdates = $noupdates;
150 }
151
158 function setPageOffset( $nthPage ) {
159 $this->pageOffset = $nthPage;
160 }
161
168 public function setNoticeCallback( $callback ) {
169 return wfSetVar( $this->mNoticeCallback, $callback );
170 }
171
177 public function setPageCallback( $callback ) {
178 $previous = $this->mPageCallback;
179 $this->mPageCallback = $callback;
180 return $previous;
181 }
182
192 public function setPageOutCallback( $callback ) {
193 $previous = $this->mPageOutCallback;
194 $this->mPageOutCallback = $callback;
195 return $previous;
196 }
197
203 public function setRevisionCallback( $callback ) {
204 $previous = $this->mRevisionCallback;
205 $this->mRevisionCallback = $callback;
206 return $previous;
207 }
208
214 public function setUploadCallback( $callback ) {
215 $previous = $this->mUploadCallback;
216 $this->mUploadCallback = $callback;
217 return $previous;
218 }
219
225 public function setLogItemCallback( $callback ) {
226 $previous = $this->mLogItemCallback;
227 $this->mLogItemCallback = $callback;
228 return $previous;
229 }
230
236 public function setSiteInfoCallback( $callback ) {
237 $previous = $this->mSiteInfoCallback;
238 $this->mSiteInfoCallback = $callback;
239 return $previous;
240 }
241
247 public function setImportTitleFactory( $factory ) {
248 $this->importTitleFactory = $factory;
249 }
250
256 public function setTargetNamespace( $namespace ) {
257 if ( is_null( $namespace ) ) {
258 // Don't override namespaces
260 return true;
261 } elseif (
262 $namespace >= 0 &&
263 MWNamespace::exists( intval( $namespace ) )
264 ) {
265 $namespace = intval( $namespace );
266 $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
267 return true;
268 } else {
269 return false;
270 }
271 }
272
278 public function setTargetRootPage( $rootpage ) {
279 $status = Status::newGood();
280 if ( is_null( $rootpage ) ) {
281 // No rootpage
283 } elseif ( $rootpage !== '' ) {
284 $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
285 $title = Title::newFromText( $rootpage );
286
287 if ( !$title || $title->isExternal() ) {
288 $status->fatal( 'import-rootpage-invalid' );
289 } elseif ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
290 $displayNSText = $title->getNamespace() == NS_MAIN
291 ? wfMessage( 'blanknamespace' )->text()
292 : MediaWikiServices::getInstance()->getContentLanguage()->
293 getNsText( $title->getNamespace() );
294 $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
295 } else {
296 // set namespace to 'all', so the namespace check in processTitle() can pass
297 $this->setTargetNamespace( null );
298 $this->setImportTitleFactory( new SubpageImportTitleFactory( $title ) );
299 }
300 }
301 return $status;
302 }
303
307 public function setImageBasePath( $dir ) {
308 $this->mImageBasePath = $dir;
309 }
310
314 public function setImportUploads( $import ) {
315 $this->mImportUploads = $import;
316 }
317
323 public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
324 $this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
325 }
326
331 public function disableStatisticsUpdate() {
332 $this->disableStatisticsUpdate = true;
333 }
334
341 public function beforeImportPage( $titleAndForeignTitle ) {
342 $title = $titleAndForeignTitle[0];
343 $page = WikiPage::factory( $title );
344 $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
345 return true;
346 }
347
353 public function importRevision( $revision ) {
354 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
355 $this->notice( 'import-error-bad-location',
356 $revision->getTitle()->getPrefixedText(),
357 $revision->getID(),
358 $revision->getModel(),
359 $revision->getFormat() );
360
361 return false;
362 }
363
364 try {
365 return $revision->importOldRevision();
366 } catch ( MWContentSerializationException $ex ) {
367 $this->notice( 'import-error-unserialize',
368 $revision->getTitle()->getPrefixedText(),
369 $revision->getID(),
370 $revision->getModel(),
371 $revision->getFormat() );
372 }
373
374 return false;
375 }
376
382 public function importLogItem( $revision ) {
383 return $revision->importLogItem();
384 }
385
391 public function importUpload( $revision ) {
392 return $revision->importUpload();
393 }
394
404 public function finishImportPage( $title, $foreignTitle, $revCount,
405 $sRevCount, $pageInfo
406 ) {
407 // Update article count statistics (T42009)
408 // The normal counting logic in WikiPage->doEditUpdates() is designed for
409 // one-revision-at-a-time editing, not bulk imports. In this situation it
410 // suffers from issues of replica DB lag. We let WikiPage handle the total page
411 // and revision count, and we implement our own custom logic for the
412 // article (content page) count.
413 if ( !$this->disableStatisticsUpdate ) {
414 $page = WikiPage::factory( $title );
415 $page->loadPageData( 'fromdbmaster' );
416 $content = $page->getContent();
417 if ( $content === null ) {
418 wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
419 ' because WikiPage::getContent() returned null' );
420 } else {
421 $editInfo = $page->prepareContentForEdit( $content );
422 $countKey = 'title_' . $title->getPrefixedText();
423 $countable = $page->isCountable( $editInfo );
424 if ( array_key_exists( $countKey, $this->countableCache ) &&
425 $countable != $this->countableCache[$countKey] ) {
426 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
427 'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
428 ] ) );
429 }
430 }
431 }
432
434 return Hooks::run( 'AfterImportPage', $args );
435 }
436
441 public function debugRevisionHandler( &$revision ) {
442 $this->debug( "Got revision:" );
443 if ( is_object( $revision->title ) ) {
444 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
445 } else {
446 $this->debug( "-- Title: <invalid>" );
447 }
448 $this->debug( "-- User: " . $revision->user_text );
449 $this->debug( "-- Timestamp: " . $revision->timestamp );
450 $this->debug( "-- Comment: " . $revision->comment );
451 $this->debug( "-- Text: " . $revision->text );
452 }
453
459 private function siteInfoCallback( $siteInfo ) {
460 if ( isset( $this->mSiteInfoCallback ) ) {
461 return call_user_func_array( $this->mSiteInfoCallback,
462 [ $siteInfo, $this ] );
463 } else {
464 return false;
465 }
466 }
467
472 function pageCallback( $title ) {
473 if ( isset( $this->mPageCallback ) ) {
474 call_user_func( $this->mPageCallback, $title );
475 }
476 }
477
486 private function pageOutCallback( $title, $foreignTitle, $revCount,
487 $sucCount, $pageInfo ) {
488 if ( isset( $this->mPageOutCallback ) ) {
490 call_user_func_array( $this->mPageOutCallback, $args );
491 }
492 }
493
499 private function revisionCallback( $revision ) {
500 if ( isset( $this->mRevisionCallback ) ) {
501 return call_user_func_array( $this->mRevisionCallback,
502 [ $revision, $this ] );
503 } else {
504 return false;
505 }
506 }
507
513 private function logItemCallback( $revision ) {
514 if ( isset( $this->mLogItemCallback ) ) {
515 return call_user_func_array( $this->mLogItemCallback,
516 [ $revision, $this ] );
517 } else {
518 return false;
519 }
520 }
521
528 public function nodeAttribute( $attr ) {
529 return $this->reader->getAttribute( $attr );
530 }
531
539 public function nodeContents() {
540 if ( $this->reader->isEmptyElement ) {
541 return "";
542 }
543 $buffer = "";
544 while ( $this->reader->read() ) {
545 switch ( $this->reader->nodeType ) {
546 case XMLReader::TEXT:
547 case XMLReader::CDATA:
548 case XMLReader::SIGNIFICANT_WHITESPACE:
549 $buffer .= $this->reader->value;
550 break;
551 case XMLReader::END_ELEMENT:
552 return $buffer;
553 }
554 }
555
556 $this->reader->close();
557 return '';
558 }
559
566 public function doImport() {
567 // Calls to reader->read need to be wrapped in calls to
568 // libxml_disable_entity_loader() to avoid local file
569 // inclusion attacks (T48932).
570 $oldDisable = libxml_disable_entity_loader( true );
571 $this->reader->read();
572
573 if ( $this->reader->localName != 'mediawiki' ) {
574 libxml_disable_entity_loader( $oldDisable );
575 throw new MWException( "Expected <mediawiki> tag, got " .
576 $this->reader->localName );
577 }
578 $this->debug( "<mediawiki> tag is correct." );
579
580 $this->debug( "Starting primary dump processing loop." );
581
582 $keepReading = $this->reader->read();
583 $skip = false;
584 $rethrow = null;
585 $pageCount = 0;
586 try {
587 while ( $keepReading ) {
588 $tag = $this->reader->localName;
589 if ( $this->pageOffset ) {
590 if ( $tag === 'page' ) {
591 $pageCount++;
592 }
593 if ( $pageCount < $this->pageOffset ) {
594 $keepReading = $this->reader->next();
595 continue;
596 }
597 }
598 $type = $this->reader->nodeType;
599
600 if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
601 // Do nothing
602 } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
603 break;
604 } elseif ( $tag == 'siteinfo' ) {
605 $this->handleSiteInfo();
606 } elseif ( $tag == 'page' ) {
607 $this->handlePage();
608 } elseif ( $tag == 'logitem' ) {
609 $this->handleLogItem();
610 } elseif ( $tag != '#text' ) {
611 $this->warn( "Unhandled top-level XML tag $tag" );
612
613 $skip = true;
614 }
615
616 if ( $skip ) {
617 $keepReading = $this->reader->next();
618 $skip = false;
619 $this->debug( "Skip" );
620 } else {
621 $keepReading = $this->reader->read();
622 }
623 }
624 } catch ( Exception $ex ) {
625 $rethrow = $ex;
626 }
627
628 // finally
629 libxml_disable_entity_loader( $oldDisable );
630 $this->reader->close();
631
632 if ( $rethrow ) {
633 throw $rethrow;
634 }
635
636 return true;
637 }
638
639 private function handleSiteInfo() {
640 $this->debug( "Enter site info handler." );
641 $siteInfo = [];
642
643 // Fields that can just be stuffed in the siteInfo object
644 $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
645
646 while ( $this->reader->read() ) {
647 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
648 $this->reader->localName == 'siteinfo' ) {
649 break;
650 }
651
652 $tag = $this->reader->localName;
653
654 if ( $tag == 'namespace' ) {
655 $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
656 $this->nodeContents();
657 } elseif ( in_array( $tag, $normalFields ) ) {
658 $siteInfo[$tag] = $this->nodeContents();
659 }
660 }
661
662 $siteInfo['_namespaces'] = $this->foreignNamespaces;
663 $this->siteInfoCallback( $siteInfo );
664 }
665
666 private function handleLogItem() {
667 $this->debug( "Enter log item handler." );
668 $logInfo = [];
669
670 // Fields that can just be stuffed in the pageInfo object
671 $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
672 'logtitle', 'params' ];
673
674 while ( $this->reader->read() ) {
675 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
676 $this->reader->localName == 'logitem' ) {
677 break;
678 }
679
680 $tag = $this->reader->localName;
681
682 if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
683 $this, $logInfo
684 ] ) ) {
685 // Do nothing
686 } elseif ( in_array( $tag, $normalFields ) ) {
687 $logInfo[$tag] = $this->nodeContents();
688 } elseif ( $tag == 'contributor' ) {
689 $logInfo['contributor'] = $this->handleContributor();
690 } elseif ( $tag != '#text' ) {
691 $this->warn( "Unhandled log-item XML tag $tag" );
692 }
693 }
694
695 $this->processLogItem( $logInfo );
696 }
697
702 private function processLogItem( $logInfo ) {
703 $revision = new WikiRevision( $this->config );
704
705 if ( isset( $logInfo['id'] ) ) {
706 $revision->setID( $logInfo['id'] );
707 }
708 $revision->setType( $logInfo['type'] );
709 $revision->setAction( $logInfo['action'] );
710 if ( isset( $logInfo['timestamp'] ) ) {
711 $revision->setTimestamp( $logInfo['timestamp'] );
712 }
713 if ( isset( $logInfo['params'] ) ) {
714 $revision->setParams( $logInfo['params'] );
715 }
716 if ( isset( $logInfo['logtitle'] ) ) {
717 // @todo Using Title for non-local titles is a recipe for disaster.
718 // We should use ForeignTitle here instead.
719 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
720 }
721
722 $revision->setNoUpdates( $this->mNoUpdates );
723
724 if ( isset( $logInfo['comment'] ) ) {
725 $revision->setComment( $logInfo['comment'] );
726 }
727
728 if ( isset( $logInfo['contributor']['ip'] ) ) {
729 $revision->setUserIP( $logInfo['contributor']['ip'] );
730 }
731
732 if ( !isset( $logInfo['contributor']['username'] ) ) {
733 $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
734 } else {
735 $revision->setUsername(
736 $this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
737 );
738 }
739
740 return $this->logItemCallback( $revision );
741 }
742
743 private function handlePage() {
744 // Handle page data.
745 $this->debug( "Enter page handler." );
746 $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
747
748 // Fields that can just be stuffed in the pageInfo object
749 $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
750
751 $skip = false;
752 $badTitle = false;
753
754 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
755 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
756 $this->reader->localName == 'page' ) {
757 break;
758 }
759
760 $skip = false;
761
762 $tag = $this->reader->localName;
763
764 if ( $badTitle ) {
765 // The title is invalid, bail out of this page
766 $skip = true;
767 } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
768 &$pageInfo ] ) ) {
769 // Do nothing
770 } elseif ( in_array( $tag, $normalFields ) ) {
771 // An XML snippet:
772 // <page>
773 // <id>123</id>
774 // <title>Page</title>
775 // <redirect title="NewTitle"/>
776 // ...
777 // Because the redirect tag is built differently, we need special handling for that case.
778 if ( $tag == 'redirect' ) {
779 $pageInfo[$tag] = $this->nodeAttribute( 'title' );
780 } else {
781 $pageInfo[$tag] = $this->nodeContents();
782 }
783 } elseif ( $tag == 'revision' || $tag == 'upload' ) {
784 if ( !isset( $title ) ) {
785 $title = $this->processTitle( $pageInfo['title'],
786 $pageInfo['ns'] ?? null );
787
788 // $title is either an array of two titles or false.
789 if ( is_array( $title ) ) {
790 $this->pageCallback( $title );
791 list( $pageInfo['_title'], $foreignTitle ) = $title;
792 } else {
793 $badTitle = true;
794 $skip = true;
795 }
796 }
797
798 if ( $title ) {
799 if ( $tag == 'revision' ) {
800 $this->handleRevision( $pageInfo );
801 } else {
802 $this->handleUpload( $pageInfo );
803 }
804 }
805 } elseif ( $tag != '#text' ) {
806 $this->warn( "Unhandled page XML tag $tag" );
807 $skip = true;
808 }
809 }
810
811 // @note $pageInfo is only set if a valid $title is processed above with
812 // no error. If we have a valid $title, then pageCallback is called
813 // above, $pageInfo['title'] is set and we do pageOutCallback here.
814 // If $pageInfo['_title'] is not set, then $foreignTitle is also not
815 // set since they both come from $title above.
816 if ( array_key_exists( '_title', $pageInfo ) ) {
817 $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
818 $pageInfo['revisionCount'],
819 $pageInfo['successfulRevisionCount'],
820 $pageInfo );
821 }
822 }
823
827 private function handleRevision( &$pageInfo ) {
828 $this->debug( "Enter revision handler" );
829 $revisionInfo = [];
830
831 $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text', 'sha1' ];
832
833 $skip = false;
834
835 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
836 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
837 $this->reader->localName == 'revision' ) {
838 break;
839 }
840
841 $tag = $this->reader->localName;
842
843 if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
844 $this, $pageInfo, $revisionInfo
845 ] ) ) {
846 // Do nothing
847 } elseif ( in_array( $tag, $normalFields ) ) {
848 $revisionInfo[$tag] = $this->nodeContents();
849 } elseif ( $tag == 'contributor' ) {
850 $revisionInfo['contributor'] = $this->handleContributor();
851 } elseif ( $tag != '#text' ) {
852 $this->warn( "Unhandled revision XML tag $tag" );
853 $skip = true;
854 }
855 }
856
857 $pageInfo['revisionCount']++;
858 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
859 $pageInfo['successfulRevisionCount']++;
860 }
861 }
862
869 private function processRevision( $pageInfo, $revisionInfo ) {
870 global $wgMaxArticleSize;
871
872 // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
873 // database errors and instability. Testing for revisions with only listed
874 // content models, as other content models might use serialization formats
875 // which aren't checked against $wgMaxArticleSize.
876 if ( ( !isset( $revisionInfo['model'] ) ||
877 in_array( $revisionInfo['model'], [
878 'wikitext',
879 'css',
880 'json',
881 'javascript',
882 'text',
883 ''
884 ] ) ) &&
885 strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
886 ) {
887 throw new MWException( 'The text of ' .
888 ( isset( $revisionInfo['id'] ) ?
889 "the revision with ID $revisionInfo[id]" :
890 'a revision'
891 ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
892 }
893
894 // FIXME: process schema version 11!
895 $revision = new WikiRevision( $this->config );
896
897 if ( isset( $revisionInfo['id'] ) ) {
898 $revision->setID( $revisionInfo['id'] );
899 }
900 if ( isset( $revisionInfo['model'] ) ) {
901 $revision->setModel( $revisionInfo['model'] );
902 }
903 if ( isset( $revisionInfo['format'] ) ) {
904 $revision->setFormat( $revisionInfo['format'] );
905 }
906 $revision->setTitle( $pageInfo['_title'] );
907
908 if ( isset( $revisionInfo['text'] ) ) {
909 $handler = $revision->getContentHandler();
910 $text = $handler->importTransform(
911 $revisionInfo['text'],
912 $revision->getFormat() );
913
914 $revision->setText( $text );
915 }
916 $revision->setTimestamp( $revisionInfo['timestamp'] ?? wfTimestampNow() );
917
918 if ( isset( $revisionInfo['comment'] ) ) {
919 $revision->setComment( $revisionInfo['comment'] );
920 }
921
922 if ( isset( $revisionInfo['minor'] ) ) {
923 $revision->setMinor( true );
924 }
925 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
926 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
927 } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
928 $revision->setUsername(
929 $this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
930 );
931 } else {
932 $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
933 }
934 if ( isset( $revisionInfo['sha1'] ) ) {
935 $revision->setSha1Base36( $revisionInfo['sha1'] );
936 }
937 $revision->setNoUpdates( $this->mNoUpdates );
938
939 return $this->revisionCallback( $revision );
940 }
941
946 private function handleUpload( &$pageInfo ) {
947 $this->debug( "Enter upload handler" );
948 $uploadInfo = [];
949
950 $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
951 'src', 'size', 'sha1base36', 'archivename', 'rel' ];
952
953 $skip = false;
954
955 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
956 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
957 $this->reader->localName == 'upload' ) {
958 break;
959 }
960
961 $tag = $this->reader->localName;
962
963 if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
964 $this, $pageInfo
965 ] ) ) {
966 // Do nothing
967 } elseif ( in_array( $tag, $normalFields ) ) {
968 $uploadInfo[$tag] = $this->nodeContents();
969 } elseif ( $tag == 'contributor' ) {
970 $uploadInfo['contributor'] = $this->handleContributor();
971 } elseif ( $tag == 'contents' ) {
972 $contents = $this->nodeContents();
973 $encoding = $this->reader->getAttribute( 'encoding' );
974 if ( $encoding === 'base64' ) {
975 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
976 $uploadInfo['isTempSrc'] = true;
977 }
978 } elseif ( $tag != '#text' ) {
979 $this->warn( "Unhandled upload XML tag $tag" );
980 $skip = true;
981 }
982 }
983
984 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
985 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
986 if ( file_exists( $path ) ) {
987 $uploadInfo['fileSrc'] = $path;
988 $uploadInfo['isTempSrc'] = false;
989 }
990 }
991
992 if ( $this->mImportUploads ) {
993 return $this->processUpload( $pageInfo, $uploadInfo );
994 }
995 }
996
1001 private function dumpTemp( $contents ) {
1002 $filename = tempnam( wfTempDir(), 'importupload' );
1003 file_put_contents( $filename, $contents );
1004 return $filename;
1005 }
1006
1012 private function processUpload( $pageInfo, $uploadInfo ) {
1013 $revision = new WikiRevision( $this->config );
1014 $text = $uploadInfo['text'] ?? '';
1015
1016 $revision->setTitle( $pageInfo['_title'] );
1017 $revision->setID( $pageInfo['id'] );
1018 $revision->setTimestamp( $uploadInfo['timestamp'] );
1019 $revision->setText( $text );
1020 $revision->setFilename( $uploadInfo['filename'] );
1021 if ( isset( $uploadInfo['archivename'] ) ) {
1022 $revision->setArchiveName( $uploadInfo['archivename'] );
1023 }
1024 $revision->setSrc( $uploadInfo['src'] );
1025 if ( isset( $uploadInfo['fileSrc'] ) ) {
1026 $revision->setFileSrc( $uploadInfo['fileSrc'],
1027 !empty( $uploadInfo['isTempSrc'] ) );
1028 }
1029 if ( isset( $uploadInfo['sha1base36'] ) ) {
1030 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1031 }
1032 $revision->setSize( intval( $uploadInfo['size'] ) );
1033 $revision->setComment( $uploadInfo['comment'] );
1034
1035 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
1036 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1037 }
1038 if ( isset( $uploadInfo['contributor']['username'] ) ) {
1039 $revision->setUsername(
1040 $this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
1041 );
1042 }
1043 $revision->setNoUpdates( $this->mNoUpdates );
1044
1045 return call_user_func( $this->mUploadCallback, $revision );
1046 }
1047
1051 private function handleContributor() {
1052 $fields = [ 'id', 'ip', 'username' ];
1053 $info = [];
1054
1055 if ( $this->reader->isEmptyElement ) {
1056 return $info;
1057 }
1058 while ( $this->reader->read() ) {
1059 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1060 $this->reader->localName == 'contributor' ) {
1061 break;
1062 }
1063
1064 $tag = $this->reader->localName;
1065
1066 if ( in_array( $tag, $fields ) ) {
1067 $info[$tag] = $this->nodeContents();
1068 }
1069 }
1070
1071 return $info;
1072 }
1073
1079 private function processTitle( $text, $ns = null ) {
1080 if ( is_null( $this->foreignNamespaces ) ) {
1081 $foreignTitleFactory = new NaiveForeignTitleFactory();
1082 } else {
1083 $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1084 $this->foreignNamespaces );
1085 }
1086
1087 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1088 intval( $ns ) );
1089
1090 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1091 $foreignTitle );
1092
1093 $commandLineMode = $this->config->get( 'CommandLineMode' );
1094 if ( is_null( $title ) ) {
1095 # Invalid page title? Ignore the page
1096 $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1097 return false;
1098 } elseif ( $title->isExternal() ) {
1099 $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1100 return false;
1101 } elseif ( !$title->canExist() ) {
1102 $this->notice( 'import-error-special', $title->getPrefixedText() );
1103 return false;
1104 } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1105 # Do not import if the importing wiki user cannot edit this page
1106 $this->notice( 'import-error-edit', $title->getPrefixedText() );
1107 return false;
1108 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1109 # Do not import if the importing wiki user cannot create this page
1110 $this->notice( 'import-error-create', $title->getPrefixedText() );
1111 return false;
1112 }
1113
1114 return [ $title, $foreignTitle ];
1115 }
1116}
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
$wgMaxArticleSize
Maximum article size in kilobytes.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTempDir()
Tries to get the system directory for temporary files.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
if( $line===false) $args
Definition cdb.php:64
Class to parse and build external user names.
Reporting callback.
Exception representing a failure to serialize or unserialize a content object.
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static registerSource(ImportSource $source)
XML file reader for the page data importer.
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
setImportUploads( $import)
doImport()
Primary entry point.
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
setUsernamePrefix( $usernamePrefix, $assignKnownUsers)
ExternalUserNames $externalUserNames
setNoUpdates( $noupdates)
Set 'no updates' mode.
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
importUpload( $revision)
Dummy for now...
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
dumpTemp( $contents)
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
processLogItem( $logInfo)
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
setPageOffset( $nthPage)
Sets 'pageOffset' value.
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
array $countableCache
importLogItem( $revision)
Default per-revision callback, performs the import.
setImageBasePath( $dir)
handleUpload(&$pageInfo)
handleRevision(&$pageInfo)
revisionCallback( $revision)
Notify the callback function of a revision.
logItemCallback( $revision)
Notify the callback function of a new log item.
throwXmlError( $err)
setDebug( $debug)
Set debug mode...
processRevision( $pageInfo, $revisionInfo)
processUpload( $pageInfo, $uploadInfo)
bool $disableStatisticsUpdate
processTitle( $text, $ns=null)
importRevision( $revision)
Default per-revision callback, performs the import.
ImportTitleFactory $importTitleFactory
__construct(ImportSource $source, Config $config)
Creates an ImportXMLReader drawing from the source provided.
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
setNoticeCallback( $callback)
Set a callback that displays notice messages.
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
disableStatisticsUpdate()
Statistics update can cause a lot of time.
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Represents a revision, log entry or upload during the import process.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition hooks.txt:894
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:955
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action, or null $user:User who performed the tagging when the tagging is subsequent to the action, or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy: boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'ContentSecurityPolicyDefaultSource':Modify the allowed CSP load sources. This affects all directives except for the script directive. If you want to add a script source, see ContentSecurityPolicyScriptSource hook. & $defaultSrc:Array of Content-Security-Policy allowed sources $policyConfig:Current configuration for the Content-Security-Policy header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyDirectives':Modify the content security policy directives. Use this only if ContentSecurityPolicyDefaultSource and ContentSecurityPolicyScriptSource do not meet your needs. & $directives:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyScriptSource':Modify the allowed CSP script sources. Note that you also have to use ContentSecurityPolicyDefaultSource if you want non-script sources to be loaded from whatever you add. & $scriptSrc:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DatabaseOraclePostInit':Called after initialising an Oracle database $db:the DatabaseOracle object 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DeleteUnknownPreferences':Called by the cleanupPreferences.php maintenance script to build a WHERE clause with which to delete preferences that are not known about. This hook is used by extensions that have dynamically-named preferences that should not be deleted in the usual cleanup process. For example, the Gadgets extension creates preferences prefixed with 'gadget-', and so anything with that prefix is excluded from the deletion. &where:An array that will be passed as the $cond parameter to IDatabase::select() to determine what will be deleted from the user_properties table. $db:The IDatabase object, useful for accessing $db->buildLike() etc. 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition hooks.txt:1032
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
const NS_MAIN
Definition Defines.php:73
Interface for configuration instances.
Definition Config.php:28
Source interface for XML import.
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
$debug
Definition mcc.php:31
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$source
$buffer
$content
$params