MediaWiki REL1_30
WikiImporter.php
Go to the documentation of this file.
1<?php
27
35 private $reader = null;
36 private $foreignNamespaces = null;
41 private $mNoUpdates = false;
42 private $pageOffset = 0;
44 private $config;
48 private $countableCache = [];
50 private $disableStatisticsUpdate = false;
51
59 if ( !class_exists( 'XMLReader' ) ) {
60 throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
61 }
62
63 $this->reader = new XMLReader();
64 $this->config = $config;
65
66 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
67 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
68 }
70
71 // Enable the entity loader, as it is needed for loading external URLs via
72 // XMLReader::open (T86036)
73 $oldDisable = libxml_disable_entity_loader( false );
74 if ( defined( 'LIBXML_PARSEHUGE' ) ) {
75 $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
76 } else {
77 $status = $this->reader->open( "uploadsource://$id" );
78 }
79 if ( !$status ) {
80 $error = libxml_get_last_error();
81 libxml_disable_entity_loader( $oldDisable );
82 throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
83 $error->message );
84 }
85 libxml_disable_entity_loader( $oldDisable );
86
87 // Default callbacks
88 $this->setPageCallback( [ $this, 'beforeImportPage' ] );
89 $this->setRevisionCallback( [ $this, "importRevision" ] );
90 $this->setUploadCallback( [ $this, 'importUpload' ] );
91 $this->setLogItemCallback( [ $this, 'importLogItem' ] );
92 $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
93
94 $this->importTitleFactory = new NaiveImportTitleFactory();
95 }
96
100 public function getReader() {
101 return $this->reader;
102 }
103
104 public function throwXmlError( $err ) {
105 $this->debug( "FAILURE: $err" );
106 wfDebug( "WikiImporter XML error: $err\n" );
107 }
108
109 public function debug( $data ) {
110 if ( $this->mDebug ) {
111 wfDebug( "IMPORT: $data\n" );
112 }
113 }
114
115 public function warn( $data ) {
116 wfDebug( "IMPORT: $data\n" );
117 }
118
119 public function notice( $msg /*, $param, ...*/ ) {
120 $params = func_get_args();
121 array_shift( $params );
122
123 if ( is_callable( $this->mNoticeCallback ) ) {
124 call_user_func( $this->mNoticeCallback, $msg, $params );
125 } else { # No ImportReporter -> CLI
126 echo wfMessage( $msg, $params )->text() . "\n";
127 }
128 }
129
134 function setDebug( $debug ) {
135 $this->mDebug = $debug;
136 }
137
142 function setNoUpdates( $noupdates ) {
143 $this->mNoUpdates = $noupdates;
144 }
145
152 function setPageOffset( $nthPage ) {
153 $this->pageOffset = $nthPage;
154 }
155
162 public function setNoticeCallback( $callback ) {
163 return wfSetVar( $this->mNoticeCallback, $callback );
164 }
165
171 public function setPageCallback( $callback ) {
172 $previous = $this->mPageCallback;
173 $this->mPageCallback = $callback;
174 return $previous;
175 }
176
186 public function setPageOutCallback( $callback ) {
187 $previous = $this->mPageOutCallback;
188 $this->mPageOutCallback = $callback;
189 return $previous;
190 }
191
197 public function setRevisionCallback( $callback ) {
198 $previous = $this->mRevisionCallback;
199 $this->mRevisionCallback = $callback;
200 return $previous;
201 }
202
208 public function setUploadCallback( $callback ) {
209 $previous = $this->mUploadCallback;
210 $this->mUploadCallback = $callback;
211 return $previous;
212 }
213
219 public function setLogItemCallback( $callback ) {
220 $previous = $this->mLogItemCallback;
221 $this->mLogItemCallback = $callback;
222 return $previous;
223 }
224
230 public function setSiteInfoCallback( $callback ) {
231 $previous = $this->mSiteInfoCallback;
232 $this->mSiteInfoCallback = $callback;
233 return $previous;
234 }
235
241 public function setImportTitleFactory( $factory ) {
242 $this->importTitleFactory = $factory;
243 }
244
250 public function setTargetNamespace( $namespace ) {
251 if ( is_null( $namespace ) ) {
252 // Don't override namespaces
254 return true;
255 } elseif (
256 $namespace >= 0 &&
257 MWNamespace::exists( intval( $namespace ) )
258 ) {
259 $namespace = intval( $namespace );
260 $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
261 return true;
262 } else {
263 return false;
264 }
265 }
266
272 public function setTargetRootPage( $rootpage ) {
273 $status = Status::newGood();
274 if ( is_null( $rootpage ) ) {
275 // No rootpage
277 } elseif ( $rootpage !== '' ) {
278 $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
279 $title = Title::newFromText( $rootpage );
280
281 if ( !$title || $title->isExternal() ) {
282 $status->fatal( 'import-rootpage-invalid' );
283 } else {
284 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
286
287 $displayNSText = $title->getNamespace() == NS_MAIN
288 ? wfMessage( 'blanknamespace' )->text()
289 : $wgContLang->getNsText( $title->getNamespace() );
290 $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
291 } else {
292 // set namespace to 'all', so the namespace check in processTitle() can pass
293 $this->setTargetNamespace( null );
295 }
296 }
297 }
298 return $status;
299 }
300
304 public function setImageBasePath( $dir ) {
305 $this->mImageBasePath = $dir;
306 }
307
311 public function setImportUploads( $import ) {
312 $this->mImportUploads = $import;
313 }
314
319 public function disableStatisticsUpdate() {
320 $this->disableStatisticsUpdate = true;
321 }
322
329 public function beforeImportPage( $titleAndForeignTitle ) {
330 $title = $titleAndForeignTitle[0];
331 $page = WikiPage::factory( $title );
332 $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
333 return true;
334 }
335
341 public function importRevision( $revision ) {
342 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
343 $this->notice( 'import-error-bad-location',
344 $revision->getTitle()->getPrefixedText(),
345 $revision->getID(),
346 $revision->getModel(),
347 $revision->getFormat() );
348
349 return false;
350 }
351
352 try {
353 return $revision->importOldRevision();
354 } catch ( MWContentSerializationException $ex ) {
355 $this->notice( 'import-error-unserialize',
356 $revision->getTitle()->getPrefixedText(),
357 $revision->getID(),
358 $revision->getModel(),
359 $revision->getFormat() );
360 }
361
362 return false;
363 }
364
370 public function importLogItem( $revision ) {
371 return $revision->importLogItem();
372 }
373
379 public function importUpload( $revision ) {
380 return $revision->importUpload();
381 }
382
392 public function finishImportPage( $title, $foreignTitle, $revCount,
393 $sRevCount, $pageInfo
394 ) {
395 // Update article count statistics (T42009)
396 // The normal counting logic in WikiPage->doEditUpdates() is designed for
397 // one-revision-at-a-time editing, not bulk imports. In this situation it
398 // suffers from issues of replica DB lag. We let WikiPage handle the total page
399 // and revision count, and we implement our own custom logic for the
400 // article (content page) count.
401 if ( !$this->disableStatisticsUpdate ) {
402 $page = WikiPage::factory( $title );
403 $page->loadPageData( 'fromdbmaster' );
404 $content = $page->getContent();
405 if ( $content === null ) {
406 wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
407 ' because WikiPage::getContent() returned null' );
408 } else {
409 $editInfo = $page->prepareContentForEdit( $content );
410 $countKey = 'title_' . $title->getPrefixedText();
411 $countable = $page->isCountable( $editInfo );
412 if ( array_key_exists( $countKey, $this->countableCache ) &&
413 $countable != $this->countableCache[$countKey] ) {
414 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
415 'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
416 ] ) );
417 }
418 }
419 }
420
421 $args = func_get_args();
422 return Hooks::run( 'AfterImportPage', $args );
423 }
424
429 public function debugRevisionHandler( &$revision ) {
430 $this->debug( "Got revision:" );
431 if ( is_object( $revision->title ) ) {
432 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
433 } else {
434 $this->debug( "-- Title: <invalid>" );
435 }
436 $this->debug( "-- User: " . $revision->user_text );
437 $this->debug( "-- Timestamp: " . $revision->timestamp );
438 $this->debug( "-- Comment: " . $revision->comment );
439 $this->debug( "-- Text: " . $revision->text );
440 }
441
447 private function siteInfoCallback( $siteInfo ) {
448 if ( isset( $this->mSiteInfoCallback ) ) {
449 return call_user_func_array( $this->mSiteInfoCallback,
450 [ $siteInfo, $this ] );
451 } else {
452 return false;
453 }
454 }
455
460 function pageCallback( $title ) {
461 if ( isset( $this->mPageCallback ) ) {
462 call_user_func( $this->mPageCallback, $title );
463 }
464 }
465
474 private function pageOutCallback( $title, $foreignTitle, $revCount,
475 $sucCount, $pageInfo ) {
476 if ( isset( $this->mPageOutCallback ) ) {
477 $args = func_get_args();
478 call_user_func_array( $this->mPageOutCallback, $args );
479 }
480 }
481
487 private function revisionCallback( $revision ) {
488 if ( isset( $this->mRevisionCallback ) ) {
489 return call_user_func_array( $this->mRevisionCallback,
490 [ $revision, $this ] );
491 } else {
492 return false;
493 }
494 }
495
501 private function logItemCallback( $revision ) {
502 if ( isset( $this->mLogItemCallback ) ) {
503 return call_user_func_array( $this->mLogItemCallback,
504 [ $revision, $this ] );
505 } else {
506 return false;
507 }
508 }
509
516 public function nodeAttribute( $attr ) {
517 return $this->reader->getAttribute( $attr );
518 }
519
527 public function nodeContents() {
528 if ( $this->reader->isEmptyElement ) {
529 return "";
530 }
531 $buffer = "";
532 while ( $this->reader->read() ) {
533 switch ( $this->reader->nodeType ) {
534 case XMLReader::TEXT:
535 case XMLReader::CDATA:
536 case XMLReader::SIGNIFICANT_WHITESPACE:
537 $buffer .= $this->reader->value;
538 break;
539 case XMLReader::END_ELEMENT:
540 return $buffer;
541 }
542 }
543
544 $this->reader->close();
545 return '';
546 }
547
553 public function doImport() {
554 // Calls to reader->read need to be wrapped in calls to
555 // libxml_disable_entity_loader() to avoid local file
556 // inclusion attacks (T48932).
557 $oldDisable = libxml_disable_entity_loader( true );
558 $this->reader->read();
559
560 if ( $this->reader->localName != 'mediawiki' ) {
561 libxml_disable_entity_loader( $oldDisable );
562 throw new MWException( "Expected <mediawiki> tag, got " .
563 $this->reader->localName );
564 }
565 $this->debug( "<mediawiki> tag is correct." );
566
567 $this->debug( "Starting primary dump processing loop." );
568
569 $keepReading = $this->reader->read();
570 $skip = false;
571 $rethrow = null;
572 $pageCount = 0;
573 try {
574 while ( $keepReading ) {
575 $tag = $this->reader->localName;
576 if ( $this->pageOffset ) {
577 if ( $tag === 'page' ) {
578 $pageCount++;
579 }
580 if ( $pageCount < $this->pageOffset ) {
581 $keepReading = $this->reader->next();
582 continue;
583 }
584 }
585 $type = $this->reader->nodeType;
586
587 if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
588 // Do nothing
589 } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
590 break;
591 } elseif ( $tag == 'siteinfo' ) {
592 $this->handleSiteInfo();
593 } elseif ( $tag == 'page' ) {
594 $this->handlePage();
595 } elseif ( $tag == 'logitem' ) {
596 $this->handleLogItem();
597 } elseif ( $tag != '#text' ) {
598 $this->warn( "Unhandled top-level XML tag $tag" );
599
600 $skip = true;
601 }
602
603 if ( $skip ) {
604 $keepReading = $this->reader->next();
605 $skip = false;
606 $this->debug( "Skip" );
607 } else {
608 $keepReading = $this->reader->read();
609 }
610 }
611 } catch ( Exception $ex ) {
612 $rethrow = $ex;
613 }
614
615 // finally
616 libxml_disable_entity_loader( $oldDisable );
617 $this->reader->close();
618
619 if ( $rethrow ) {
620 throw $rethrow;
621 }
622
623 return true;
624 }
625
626 private function handleSiteInfo() {
627 $this->debug( "Enter site info handler." );
628 $siteInfo = [];
629
630 // Fields that can just be stuffed in the siteInfo object
631 $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
632
633 while ( $this->reader->read() ) {
634 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
635 $this->reader->localName == 'siteinfo' ) {
636 break;
637 }
638
639 $tag = $this->reader->localName;
640
641 if ( $tag == 'namespace' ) {
642 $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
643 $this->nodeContents();
644 } elseif ( in_array( $tag, $normalFields ) ) {
645 $siteInfo[$tag] = $this->nodeContents();
646 }
647 }
648
649 $siteInfo['_namespaces'] = $this->foreignNamespaces;
650 $this->siteInfoCallback( $siteInfo );
651 }
652
653 private function handleLogItem() {
654 $this->debug( "Enter log item handler." );
655 $logInfo = [];
656
657 // Fields that can just be stuffed in the pageInfo object
658 $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
659 'logtitle', 'params' ];
660
661 while ( $this->reader->read() ) {
662 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
663 $this->reader->localName == 'logitem' ) {
664 break;
665 }
666
667 $tag = $this->reader->localName;
668
669 if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
670 $this, $logInfo
671 ] ) ) {
672 // Do nothing
673 } elseif ( in_array( $tag, $normalFields ) ) {
674 $logInfo[$tag] = $this->nodeContents();
675 } elseif ( $tag == 'contributor' ) {
676 $logInfo['contributor'] = $this->handleContributor();
677 } elseif ( $tag != '#text' ) {
678 $this->warn( "Unhandled log-item XML tag $tag" );
679 }
680 }
681
682 $this->processLogItem( $logInfo );
683 }
684
689 private function processLogItem( $logInfo ) {
690 $revision = new WikiRevision( $this->config );
691
692 if ( isset( $logInfo['id'] ) ) {
693 $revision->setID( $logInfo['id'] );
694 }
695 $revision->setType( $logInfo['type'] );
696 $revision->setAction( $logInfo['action'] );
697 if ( isset( $logInfo['timestamp'] ) ) {
698 $revision->setTimestamp( $logInfo['timestamp'] );
699 }
700 if ( isset( $logInfo['params'] ) ) {
701 $revision->setParams( $logInfo['params'] );
702 }
703 if ( isset( $logInfo['logtitle'] ) ) {
704 // @todo Using Title for non-local titles is a recipe for disaster.
705 // We should use ForeignTitle here instead.
706 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
707 }
708
709 $revision->setNoUpdates( $this->mNoUpdates );
710
711 if ( isset( $logInfo['comment'] ) ) {
712 $revision->setComment( $logInfo['comment'] );
713 }
714
715 if ( isset( $logInfo['contributor']['ip'] ) ) {
716 $revision->setUserIP( $logInfo['contributor']['ip'] );
717 }
718
719 if ( !isset( $logInfo['contributor']['username'] ) ) {
720 $revision->setUsername( 'Unknown user' );
721 } else {
722 $revision->setUsername( $logInfo['contributor']['username'] );
723 }
724
725 return $this->logItemCallback( $revision );
726 }
727
728 private function handlePage() {
729 // Handle page data.
730 $this->debug( "Enter page handler." );
731 $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
732
733 // Fields that can just be stuffed in the pageInfo object
734 $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
735
736 $skip = false;
737 $badTitle = false;
738
739 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
740 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
741 $this->reader->localName == 'page' ) {
742 break;
743 }
744
745 $skip = false;
746
747 $tag = $this->reader->localName;
748
749 if ( $badTitle ) {
750 // The title is invalid, bail out of this page
751 $skip = true;
752 } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
753 &$pageInfo ] ) ) {
754 // Do nothing
755 } elseif ( in_array( $tag, $normalFields ) ) {
756 // An XML snippet:
757 // <page>
758 // <id>123</id>
759 // <title>Page</title>
760 // <redirect title="NewTitle"/>
761 // ...
762 // Because the redirect tag is built differently, we need special handling for that case.
763 if ( $tag == 'redirect' ) {
764 $pageInfo[$tag] = $this->nodeAttribute( 'title' );
765 } else {
766 $pageInfo[$tag] = $this->nodeContents();
767 }
768 } elseif ( $tag == 'revision' || $tag == 'upload' ) {
769 if ( !isset( $title ) ) {
770 $title = $this->processTitle( $pageInfo['title'],
771 isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
772
773 // $title is either an array of two titles or false.
774 if ( is_array( $title ) ) {
775 $this->pageCallback( $title );
776 list( $pageInfo['_title'], $foreignTitle ) = $title;
777 } else {
778 $badTitle = true;
779 $skip = true;
780 }
781 }
782
783 if ( $title ) {
784 if ( $tag == 'revision' ) {
785 $this->handleRevision( $pageInfo );
786 } else {
787 $this->handleUpload( $pageInfo );
788 }
789 }
790 } elseif ( $tag != '#text' ) {
791 $this->warn( "Unhandled page XML tag $tag" );
792 $skip = true;
793 }
794 }
795
796 // @note $pageInfo is only set if a valid $title is processed above with
797 // no error. If we have a valid $title, then pageCallback is called
798 // above, $pageInfo['title'] is set and we do pageOutCallback here.
799 // If $pageInfo['_title'] is not set, then $foreignTitle is also not
800 // set since they both come from $title above.
801 if ( array_key_exists( '_title', $pageInfo ) ) {
802 $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
803 $pageInfo['revisionCount'],
804 $pageInfo['successfulRevisionCount'],
805 $pageInfo );
806 }
807 }
808
812 private function handleRevision( &$pageInfo ) {
813 $this->debug( "Enter revision handler" );
814 $revisionInfo = [];
815
816 $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text', 'sha1' ];
817
818 $skip = false;
819
820 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
821 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
822 $this->reader->localName == 'revision' ) {
823 break;
824 }
825
826 $tag = $this->reader->localName;
827
828 if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
829 $this, $pageInfo, $revisionInfo
830 ] ) ) {
831 // Do nothing
832 } elseif ( in_array( $tag, $normalFields ) ) {
833 $revisionInfo[$tag] = $this->nodeContents();
834 } elseif ( $tag == 'contributor' ) {
835 $revisionInfo['contributor'] = $this->handleContributor();
836 } elseif ( $tag != '#text' ) {
837 $this->warn( "Unhandled revision XML tag $tag" );
838 $skip = true;
839 }
840 }
841
842 $pageInfo['revisionCount']++;
843 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
844 $pageInfo['successfulRevisionCount']++;
845 }
846 }
847
853 private function processRevision( $pageInfo, $revisionInfo ) {
855
856 // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
857 // database errors and instability. Testing for revisions with only listed
858 // content models, as other content models might use serialization formats
859 // which aren't checked against $wgMaxArticleSize.
860 if ( ( !isset( $revisionInfo['model'] ) ||
861 in_array( $revisionInfo['model'], [
862 'wikitext',
863 'css',
864 'json',
865 'javascript',
866 'text',
867 ''
868 ] ) ) &&
869 strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
870 ) {
871 throw new MWException( 'The text of ' .
872 ( isset( $revisionInfo['id'] ) ?
873 "the revision with ID $revisionInfo[id]" :
874 'a revision'
875 ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
876 }
877
878 $revision = new WikiRevision( $this->config );
879
880 if ( isset( $revisionInfo['id'] ) ) {
881 $revision->setID( $revisionInfo['id'] );
882 }
883 if ( isset( $revisionInfo['model'] ) ) {
884 $revision->setModel( $revisionInfo['model'] );
885 }
886 if ( isset( $revisionInfo['format'] ) ) {
887 $revision->setFormat( $revisionInfo['format'] );
888 }
889 $revision->setTitle( $pageInfo['_title'] );
890
891 if ( isset( $revisionInfo['text'] ) ) {
892 $handler = $revision->getContentHandler();
893 $text = $handler->importTransform(
894 $revisionInfo['text'],
895 $revision->getFormat() );
896
897 $revision->setText( $text );
898 }
899 if ( isset( $revisionInfo['timestamp'] ) ) {
900 $revision->setTimestamp( $revisionInfo['timestamp'] );
901 } else {
902 $revision->setTimestamp( wfTimestampNow() );
903 }
904
905 if ( isset( $revisionInfo['comment'] ) ) {
906 $revision->setComment( $revisionInfo['comment'] );
907 }
908
909 if ( isset( $revisionInfo['minor'] ) ) {
910 $revision->setMinor( true );
911 }
912 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
913 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
914 } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
915 $revision->setUsername( $revisionInfo['contributor']['username'] );
916 } else {
917 $revision->setUsername( 'Unknown user' );
918 }
919 if ( isset( $revisionInfo['sha1'] ) ) {
920 $revision->setSha1Base36( $revisionInfo['sha1'] );
921 }
922 $revision->setNoUpdates( $this->mNoUpdates );
923
924 return $this->revisionCallback( $revision );
925 }
926
931 private function handleUpload( &$pageInfo ) {
932 $this->debug( "Enter upload handler" );
933 $uploadInfo = [];
934
935 $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
936 'src', 'size', 'sha1base36', 'archivename', 'rel' ];
937
938 $skip = false;
939
940 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
941 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
942 $this->reader->localName == 'upload' ) {
943 break;
944 }
945
946 $tag = $this->reader->localName;
947
948 if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
949 $this, $pageInfo
950 ] ) ) {
951 // Do nothing
952 } elseif ( in_array( $tag, $normalFields ) ) {
953 $uploadInfo[$tag] = $this->nodeContents();
954 } elseif ( $tag == 'contributor' ) {
955 $uploadInfo['contributor'] = $this->handleContributor();
956 } elseif ( $tag == 'contents' ) {
957 $contents = $this->nodeContents();
958 $encoding = $this->reader->getAttribute( 'encoding' );
959 if ( $encoding === 'base64' ) {
960 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
961 $uploadInfo['isTempSrc'] = true;
962 }
963 } elseif ( $tag != '#text' ) {
964 $this->warn( "Unhandled upload XML tag $tag" );
965 $skip = true;
966 }
967 }
968
969 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
970 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
971 if ( file_exists( $path ) ) {
972 $uploadInfo['fileSrc'] = $path;
973 $uploadInfo['isTempSrc'] = false;
974 }
975 }
976
977 if ( $this->mImportUploads ) {
978 return $this->processUpload( $pageInfo, $uploadInfo );
979 }
980 }
981
986 private function dumpTemp( $contents ) {
987 $filename = tempnam( wfTempDir(), 'importupload' );
988 file_put_contents( $filename, $contents );
989 return $filename;
990 }
991
997 private function processUpload( $pageInfo, $uploadInfo ) {
998 $revision = new WikiRevision( $this->config );
999 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
1000
1001 $revision->setTitle( $pageInfo['_title'] );
1002 $revision->setID( $pageInfo['id'] );
1003 $revision->setTimestamp( $uploadInfo['timestamp'] );
1004 $revision->setText( $text );
1005 $revision->setFilename( $uploadInfo['filename'] );
1006 if ( isset( $uploadInfo['archivename'] ) ) {
1007 $revision->setArchiveName( $uploadInfo['archivename'] );
1008 }
1009 $revision->setSrc( $uploadInfo['src'] );
1010 if ( isset( $uploadInfo['fileSrc'] ) ) {
1011 $revision->setFileSrc( $uploadInfo['fileSrc'],
1012 !empty( $uploadInfo['isTempSrc'] ) );
1013 }
1014 if ( isset( $uploadInfo['sha1base36'] ) ) {
1015 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1016 }
1017 $revision->setSize( intval( $uploadInfo['size'] ) );
1018 $revision->setComment( $uploadInfo['comment'] );
1019
1020 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
1021 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1022 }
1023 if ( isset( $uploadInfo['contributor']['username'] ) ) {
1024 $revision->setUsername( $uploadInfo['contributor']['username'] );
1025 }
1026 $revision->setNoUpdates( $this->mNoUpdates );
1027
1028 return call_user_func( $this->mUploadCallback, $revision );
1029 }
1030
1034 private function handleContributor() {
1035 $fields = [ 'id', 'ip', 'username' ];
1036 $info = [];
1037
1038 if ( $this->reader->isEmptyElement ) {
1039 return $info;
1040 }
1041 while ( $this->reader->read() ) {
1042 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1043 $this->reader->localName == 'contributor' ) {
1044 break;
1045 }
1046
1047 $tag = $this->reader->localName;
1048
1049 if ( in_array( $tag, $fields ) ) {
1050 $info[$tag] = $this->nodeContents();
1051 }
1052 }
1053
1054 return $info;
1055 }
1056
1062 private function processTitle( $text, $ns = null ) {
1063 if ( is_null( $this->foreignNamespaces ) ) {
1064 $foreignTitleFactory = new NaiveForeignTitleFactory();
1065 } else {
1066 $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1067 $this->foreignNamespaces );
1068 }
1069
1070 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1071 intval( $ns ) );
1072
1073 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1074 $foreignTitle );
1075
1076 $commandLineMode = $this->config->get( 'CommandLineMode' );
1077 if ( is_null( $title ) ) {
1078 # Invalid page title? Ignore the page
1079 $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1080 return false;
1081 } elseif ( $title->isExternal() ) {
1082 $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1083 return false;
1084 } elseif ( !$title->canExist() ) {
1085 $this->notice( 'import-error-special', $title->getPrefixedText() );
1086 return false;
1087 } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1088 # Do not import if the importing wiki user cannot edit this page
1089 $this->notice( 'import-error-edit', $title->getPrefixedText() );
1090 return false;
1091 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1092 # Do not import if the importing wiki user cannot create this page
1093 $this->notice( 'import-error-create', $title->getPrefixedText() );
1094 return false;
1095 }
1096
1097 return [ $title, $foreignTitle ];
1098 }
1099}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgMaxArticleSize
Maximum article size in kilobytes.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTempDir()
Tries to get the system directory for temporary files.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
if( $line===false) $args
Definition cdb.php:63
Reporting callback.
Exception representing a failure to serialize or unserialize a content object.
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static factory(array $deltas)
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static registerSource(ImportSource $source)
XML file reader for the page data importer.
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
setImportUploads( $import)
doImport()
Primary entry point.
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
setNoUpdates( $noupdates)
Set 'no updates' mode.
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
importUpload( $revision)
Dummy for now...
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
dumpTemp( $contents)
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
processLogItem( $logInfo)
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
setPageOffset( $nthPage)
Sets 'pageOffset' value.
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
array $countableCache
importLogItem( $revision)
Default per-revision callback, performs the import.
setImageBasePath( $dir)
handleUpload(&$pageInfo)
handleRevision(&$pageInfo)
revisionCallback( $revision)
Notify the callback function of a revision.
logItemCallback( $revision)
Notify the callback function of a new log item.
throwXmlError( $err)
setDebug( $debug)
Set debug mode...
processRevision( $pageInfo, $revisionInfo)
processUpload( $pageInfo, $uploadInfo)
bool $disableStatisticsUpdate
processTitle( $text, $ns=null)
importRevision( $revision)
Default per-revision callback, performs the import.
ImportTitleFactory $importTitleFactory
__construct(ImportSource $source, Config $config)
Creates an ImportXMLReader drawing from the source provided.
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
setNoticeCallback( $callback)
Set a callback that displays notice messages.
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
disableStatisticsUpdate()
Statistics update can cause a lot of time.
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition WikiPage.php:121
Represents a revision, log entry or upload during the import process.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
const NS_MAIN
Definition Defines.php:65
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action or null $user:User who performed the tagging when the tagging is subsequent to the action or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy:boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DatabaseOraclePostInit':Called after initialising an Oracle database $db:the DatabaseOracle object 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition hooks.txt:1245
the array() calling protocol came about after MediaWiki 1.4rc1.
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:962
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition hooks.txt:901
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Source interface for XML import.
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
$debug
Definition mcc.php:31
$source
$buffer
$params