MediaWiki REL1_28
WikiImporter.php
Go to the documentation of this file.
1<?php
34 private $reader = null;
35 private $foreignNamespaces = null;
40 private $mNoUpdates = false;
42 private $config;
46 private $countableCache = [];
47
55 if ( !class_exists( 'XMLReader' ) ) {
56 throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
57 }
58
59 $this->reader = new XMLReader();
60 if ( !$config ) {
61 wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
62 $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
63 }
64 $this->config = $config;
65
66 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
67 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
68 }
70
71 // Enable the entity loader, as it is needed for loading external URLs via
72 // XMLReader::open (T86036)
73 $oldDisable = libxml_disable_entity_loader( false );
74 if ( defined( 'LIBXML_PARSEHUGE' ) ) {
75 $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
76 } else {
77 $status = $this->reader->open( "uploadsource://$id" );
78 }
79 if ( !$status ) {
80 $error = libxml_get_last_error();
81 libxml_disable_entity_loader( $oldDisable );
82 throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
83 $error->message );
84 }
85 libxml_disable_entity_loader( $oldDisable );
86
87 // Default callbacks
88 $this->setPageCallback( [ $this, 'beforeImportPage' ] );
89 $this->setRevisionCallback( [ $this, "importRevision" ] );
90 $this->setUploadCallback( [ $this, 'importUpload' ] );
91 $this->setLogItemCallback( [ $this, 'importLogItem' ] );
92 $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
93
94 $this->importTitleFactory = new NaiveImportTitleFactory();
95 }
96
100 public function getReader() {
101 return $this->reader;
102 }
103
104 public function throwXmlError( $err ) {
105 $this->debug( "FAILURE: $err" );
106 wfDebug( "WikiImporter XML error: $err\n" );
107 }
108
109 public function debug( $data ) {
110 if ( $this->mDebug ) {
111 wfDebug( "IMPORT: $data\n" );
112 }
113 }
114
115 public function warn( $data ) {
116 wfDebug( "IMPORT: $data\n" );
117 }
118
119 public function notice( $msg /*, $param, ...*/ ) {
120 $params = func_get_args();
121 array_shift( $params );
122
123 if ( is_callable( $this->mNoticeCallback ) ) {
124 call_user_func( $this->mNoticeCallback, $msg, $params );
125 } else { # No ImportReporter -> CLI
126 echo wfMessage( $msg, $params )->text() . "\n";
127 }
128 }
129
134 function setDebug( $debug ) {
135 $this->mDebug = $debug;
136 }
137
142 function setNoUpdates( $noupdates ) {
143 $this->mNoUpdates = $noupdates;
144 }
145
152 public function setNoticeCallback( $callback ) {
153 return wfSetVar( $this->mNoticeCallback, $callback );
154 }
155
161 public function setPageCallback( $callback ) {
162 $previous = $this->mPageCallback;
163 $this->mPageCallback = $callback;
164 return $previous;
165 }
166
176 public function setPageOutCallback( $callback ) {
177 $previous = $this->mPageOutCallback;
178 $this->mPageOutCallback = $callback;
179 return $previous;
180 }
181
187 public function setRevisionCallback( $callback ) {
188 $previous = $this->mRevisionCallback;
189 $this->mRevisionCallback = $callback;
190 return $previous;
191 }
192
198 public function setUploadCallback( $callback ) {
199 $previous = $this->mUploadCallback;
200 $this->mUploadCallback = $callback;
201 return $previous;
202 }
203
209 public function setLogItemCallback( $callback ) {
210 $previous = $this->mLogItemCallback;
211 $this->mLogItemCallback = $callback;
212 return $previous;
213 }
214
220 public function setSiteInfoCallback( $callback ) {
221 $previous = $this->mSiteInfoCallback;
222 $this->mSiteInfoCallback = $callback;
223 return $previous;
224 }
225
231 public function setImportTitleFactory( $factory ) {
232 $this->importTitleFactory = $factory;
233 }
234
240 public function setTargetNamespace( $namespace ) {
241 if ( is_null( $namespace ) ) {
242 // Don't override namespaces
244 return true;
245 } elseif (
246 $namespace >= 0 &&
247 MWNamespace::exists( intval( $namespace ) )
248 ) {
249 $namespace = intval( $namespace );
250 $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
251 return true;
252 } else {
253 return false;
254 }
255 }
256
262 public function setTargetRootPage( $rootpage ) {
263 $status = Status::newGood();
264 if ( is_null( $rootpage ) ) {
265 // No rootpage
267 } elseif ( $rootpage !== '' ) {
268 $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
269 $title = Title::newFromText( $rootpage );
270
271 if ( !$title || $title->isExternal() ) {
272 $status->fatal( 'import-rootpage-invalid' );
273 } else {
274 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
276
277 $displayNSText = $title->getNamespace() == NS_MAIN
278 ? wfMessage( 'blanknamespace' )->text()
279 : $wgContLang->getNsText( $title->getNamespace() );
280 $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
281 } else {
282 // set namespace to 'all', so the namespace check in processTitle() can pass
283 $this->setTargetNamespace( null );
285 }
286 }
287 }
288 return $status;
289 }
290
294 public function setImageBasePath( $dir ) {
295 $this->mImageBasePath = $dir;
296 }
297
301 public function setImportUploads( $import ) {
302 $this->mImportUploads = $import;
303 }
304
311 public function beforeImportPage( $titleAndForeignTitle ) {
312 $title = $titleAndForeignTitle[0];
314 $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
315 return true;
316 }
317
323 public function importRevision( $revision ) {
324 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
325 $this->notice( 'import-error-bad-location',
326 $revision->getTitle()->getPrefixedText(),
327 $revision->getID(),
328 $revision->getModel(),
329 $revision->getFormat() );
330
331 return false;
332 }
333
334 try {
335 return $revision->importOldRevision();
336 } catch ( MWContentSerializationException $ex ) {
337 $this->notice( 'import-error-unserialize',
338 $revision->getTitle()->getPrefixedText(),
339 $revision->getID(),
340 $revision->getModel(),
341 $revision->getFormat() );
342 }
343
344 return false;
345 }
346
352 public function importLogItem( $revision ) {
353 return $revision->importLogItem();
354 }
355
361 public function importUpload( $revision ) {
362 return $revision->importUpload();
363 }
364
374 public function finishImportPage( $title, $foreignTitle, $revCount,
375 $sRevCount, $pageInfo ) {
376
377 // Update article count statistics (T42009)
378 // The normal counting logic in WikiPage->doEditUpdates() is designed for
379 // one-revision-at-a-time editing, not bulk imports. In this situation it
380 // suffers from issues of replica DB lag. We let WikiPage handle the total page
381 // and revision count, and we implement our own custom logic for the
382 // article (content page) count.
384 $page->loadPageData( 'fromdbmaster' );
385 $content = $page->getContent();
386 if ( $content === null ) {
387 wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
388 ' because WikiPage::getContent() returned null' );
389 } else {
390 $editInfo = $page->prepareContentForEdit( $content );
391 $countKey = 'title_' . $title->getPrefixedText();
392 $countable = $page->isCountable( $editInfo );
393 if ( array_key_exists( $countKey, $this->countableCache ) &&
394 $countable != $this->countableCache[$countKey] ) {
395 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
396 'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
397 ] ) );
398 }
399 }
400
401 $args = func_get_args();
402 return Hooks::run( 'AfterImportPage', $args );
403 }
404
409 public function debugRevisionHandler( &$revision ) {
410 $this->debug( "Got revision:" );
411 if ( is_object( $revision->title ) ) {
412 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
413 } else {
414 $this->debug( "-- Title: <invalid>" );
415 }
416 $this->debug( "-- User: " . $revision->user_text );
417 $this->debug( "-- Timestamp: " . $revision->timestamp );
418 $this->debug( "-- Comment: " . $revision->comment );
419 $this->debug( "-- Text: " . $revision->text );
420 }
421
427 private function siteInfoCallback( $siteInfo ) {
428 if ( isset( $this->mSiteInfoCallback ) ) {
429 return call_user_func_array( $this->mSiteInfoCallback,
430 [ $siteInfo, $this ] );
431 } else {
432 return false;
433 }
434 }
435
440 function pageCallback( $title ) {
441 if ( isset( $this->mPageCallback ) ) {
442 call_user_func( $this->mPageCallback, $title );
443 }
444 }
445
454 private function pageOutCallback( $title, $foreignTitle, $revCount,
455 $sucCount, $pageInfo ) {
456 if ( isset( $this->mPageOutCallback ) ) {
457 $args = func_get_args();
458 call_user_func_array( $this->mPageOutCallback, $args );
459 }
460 }
461
467 private function revisionCallback( $revision ) {
468 if ( isset( $this->mRevisionCallback ) ) {
469 return call_user_func_array( $this->mRevisionCallback,
470 [ $revision, $this ] );
471 } else {
472 return false;
473 }
474 }
475
481 private function logItemCallback( $revision ) {
482 if ( isset( $this->mLogItemCallback ) ) {
483 return call_user_func_array( $this->mLogItemCallback,
484 [ $revision, $this ] );
485 } else {
486 return false;
487 }
488 }
489
496 public function nodeAttribute( $attr ) {
497 return $this->reader->getAttribute( $attr );
498 }
499
507 public function nodeContents() {
508 if ( $this->reader->isEmptyElement ) {
509 return "";
510 }
511 $buffer = "";
512 while ( $this->reader->read() ) {
513 switch ( $this->reader->nodeType ) {
514 case XMLReader::TEXT:
515 case XMLReader::CDATA:
516 case XMLReader::SIGNIFICANT_WHITESPACE:
517 $buffer .= $this->reader->value;
518 break;
519 case XMLReader::END_ELEMENT:
520 return $buffer;
521 }
522 }
523
524 $this->reader->close();
525 return '';
526 }
527
533 public function doImport() {
534 // Calls to reader->read need to be wrapped in calls to
535 // libxml_disable_entity_loader() to avoid local file
536 // inclusion attacks (bug 46932).
537 $oldDisable = libxml_disable_entity_loader( true );
538 $this->reader->read();
539
540 if ( $this->reader->localName != 'mediawiki' ) {
541 libxml_disable_entity_loader( $oldDisable );
542 throw new MWException( "Expected <mediawiki> tag, got " .
543 $this->reader->localName );
544 }
545 $this->debug( "<mediawiki> tag is correct." );
546
547 $this->debug( "Starting primary dump processing loop." );
548
549 $keepReading = $this->reader->read();
550 $skip = false;
551 $rethrow = null;
552 try {
553 while ( $keepReading ) {
554 $tag = $this->reader->localName;
555 $type = $this->reader->nodeType;
556
557 if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
558 // Do nothing
559 } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
560 break;
561 } elseif ( $tag == 'siteinfo' ) {
562 $this->handleSiteInfo();
563 } elseif ( $tag == 'page' ) {
564 $this->handlePage();
565 } elseif ( $tag == 'logitem' ) {
566 $this->handleLogItem();
567 } elseif ( $tag != '#text' ) {
568 $this->warn( "Unhandled top-level XML tag $tag" );
569
570 $skip = true;
571 }
572
573 if ( $skip ) {
574 $keepReading = $this->reader->next();
575 $skip = false;
576 $this->debug( "Skip" );
577 } else {
578 $keepReading = $this->reader->read();
579 }
580 }
581 } catch ( Exception $ex ) {
582 $rethrow = $ex;
583 }
584
585 // finally
586 libxml_disable_entity_loader( $oldDisable );
587 $this->reader->close();
588
589 if ( $rethrow ) {
590 throw $rethrow;
591 }
592
593 return true;
594 }
595
596 private function handleSiteInfo() {
597 $this->debug( "Enter site info handler." );
598 $siteInfo = [];
599
600 // Fields that can just be stuffed in the siteInfo object
601 $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
602
603 while ( $this->reader->read() ) {
604 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
605 $this->reader->localName == 'siteinfo' ) {
606 break;
607 }
608
609 $tag = $this->reader->localName;
610
611 if ( $tag == 'namespace' ) {
612 $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
613 $this->nodeContents();
614 } elseif ( in_array( $tag, $normalFields ) ) {
615 $siteInfo[$tag] = $this->nodeContents();
616 }
617 }
618
619 $siteInfo['_namespaces'] = $this->foreignNamespaces;
620 $this->siteInfoCallback( $siteInfo );
621 }
622
623 private function handleLogItem() {
624 $this->debug( "Enter log item handler." );
625 $logInfo = [];
626
627 // Fields that can just be stuffed in the pageInfo object
628 $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
629 'logtitle', 'params' ];
630
631 while ( $this->reader->read() ) {
632 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
633 $this->reader->localName == 'logitem' ) {
634 break;
635 }
636
637 $tag = $this->reader->localName;
638
639 if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
640 $this, $logInfo
641 ] ) ) {
642 // Do nothing
643 } elseif ( in_array( $tag, $normalFields ) ) {
644 $logInfo[$tag] = $this->nodeContents();
645 } elseif ( $tag == 'contributor' ) {
646 $logInfo['contributor'] = $this->handleContributor();
647 } elseif ( $tag != '#text' ) {
648 $this->warn( "Unhandled log-item XML tag $tag" );
649 }
650 }
651
652 $this->processLogItem( $logInfo );
653 }
654
659 private function processLogItem( $logInfo ) {
660
661 $revision = new WikiRevision( $this->config );
662
663 if ( isset( $logInfo['id'] ) ) {
664 $revision->setID( $logInfo['id'] );
665 }
666 $revision->setType( $logInfo['type'] );
667 $revision->setAction( $logInfo['action'] );
668 if ( isset( $logInfo['timestamp'] ) ) {
669 $revision->setTimestamp( $logInfo['timestamp'] );
670 }
671 if ( isset( $logInfo['params'] ) ) {
672 $revision->setParams( $logInfo['params'] );
673 }
674 if ( isset( $logInfo['logtitle'] ) ) {
675 // @todo Using Title for non-local titles is a recipe for disaster.
676 // We should use ForeignTitle here instead.
677 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
678 }
679
680 $revision->setNoUpdates( $this->mNoUpdates );
681
682 if ( isset( $logInfo['comment'] ) ) {
683 $revision->setComment( $logInfo['comment'] );
684 }
685
686 if ( isset( $logInfo['contributor']['ip'] ) ) {
687 $revision->setUserIP( $logInfo['contributor']['ip'] );
688 }
689
690 if ( !isset( $logInfo['contributor']['username'] ) ) {
691 $revision->setUsername( 'Unknown user' );
692 } else {
693 $revision->setUsername( $logInfo['contributor']['username'] );
694 }
695
696 return $this->logItemCallback( $revision );
697 }
698
699 private function handlePage() {
700 // Handle page data.
701 $this->debug( "Enter page handler." );
702 $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
703
704 // Fields that can just be stuffed in the pageInfo object
705 $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
706
707 $skip = false;
708 $badTitle = false;
709
710 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
711 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
712 $this->reader->localName == 'page' ) {
713 break;
714 }
715
716 $skip = false;
717
718 $tag = $this->reader->localName;
719
720 if ( $badTitle ) {
721 // The title is invalid, bail out of this page
722 $skip = true;
723 } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
724 &$pageInfo ] ) ) {
725 // Do nothing
726 } elseif ( in_array( $tag, $normalFields ) ) {
727 // An XML snippet:
728 // <page>
729 // <id>123</id>
730 // <title>Page</title>
731 // <redirect title="NewTitle"/>
732 // ...
733 // Because the redirect tag is built differently, we need special handling for that case.
734 if ( $tag == 'redirect' ) {
735 $pageInfo[$tag] = $this->nodeAttribute( 'title' );
736 } else {
737 $pageInfo[$tag] = $this->nodeContents();
738 }
739 } elseif ( $tag == 'revision' || $tag == 'upload' ) {
740 if ( !isset( $title ) ) {
741 $title = $this->processTitle( $pageInfo['title'],
742 isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
743
744 // $title is either an array of two titles or false.
745 if ( is_array( $title ) ) {
746 $this->pageCallback( $title );
747 list( $pageInfo['_title'], $foreignTitle ) = $title;
748 } else {
749 $badTitle = true;
750 $skip = true;
751 }
752 }
753
754 if ( $title ) {
755 if ( $tag == 'revision' ) {
756 $this->handleRevision( $pageInfo );
757 } else {
758 $this->handleUpload( $pageInfo );
759 }
760 }
761 } elseif ( $tag != '#text' ) {
762 $this->warn( "Unhandled page XML tag $tag" );
763 $skip = true;
764 }
765 }
766
767 // @note $pageInfo is only set if a valid $title is processed above with
768 // no error. If we have a valid $title, then pageCallback is called
769 // above, $pageInfo['title'] is set and we do pageOutCallback here.
770 // If $pageInfo['_title'] is not set, then $foreignTitle is also not
771 // set since they both come from $title above.
772 if ( array_key_exists( '_title', $pageInfo ) ) {
773 $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
774 $pageInfo['revisionCount'],
775 $pageInfo['successfulRevisionCount'],
776 $pageInfo );
777 }
778 }
779
783 private function handleRevision( &$pageInfo ) {
784 $this->debug( "Enter revision handler" );
785 $revisionInfo = [];
786
787 $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ];
788
789 $skip = false;
790
791 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
792 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
793 $this->reader->localName == 'revision' ) {
794 break;
795 }
796
797 $tag = $this->reader->localName;
798
799 if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
800 $this, $pageInfo, $revisionInfo
801 ] ) ) {
802 // Do nothing
803 } elseif ( in_array( $tag, $normalFields ) ) {
804 $revisionInfo[$tag] = $this->nodeContents();
805 } elseif ( $tag == 'contributor' ) {
806 $revisionInfo['contributor'] = $this->handleContributor();
807 } elseif ( $tag != '#text' ) {
808 $this->warn( "Unhandled revision XML tag $tag" );
809 $skip = true;
810 }
811 }
812
813 $pageInfo['revisionCount']++;
814 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
815 $pageInfo['successfulRevisionCount']++;
816 }
817 }
818
824 private function processRevision( $pageInfo, $revisionInfo ) {
826
827 // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
828 // database errors and instability. Testing for revisions with only listed
829 // content models, as other content models might use serialization formats
830 // which aren't checked against $wgMaxArticleSize.
831 if ( ( !isset( $revisionInfo['model'] ) ||
832 in_array( $revisionInfo['model'], [
833 'wikitext',
834 'css',
835 'json',
836 'javascript',
837 'text',
838 ''
839 ] ) ) &&
840 strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
841 ) {
842 throw new MWException( 'The text of ' .
843 ( isset( $revisionInfo['id'] ) ?
844 "the revision with ID $revisionInfo[id]" :
845 'a revision'
846 ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
847 }
848
849 $revision = new WikiRevision( $this->config );
850
851 if ( isset( $revisionInfo['id'] ) ) {
852 $revision->setID( $revisionInfo['id'] );
853 }
854 if ( isset( $revisionInfo['model'] ) ) {
855 $revision->setModel( $revisionInfo['model'] );
856 }
857 if ( isset( $revisionInfo['format'] ) ) {
858 $revision->setFormat( $revisionInfo['format'] );
859 }
860 $revision->setTitle( $pageInfo['_title'] );
861
862 if ( isset( $revisionInfo['text'] ) ) {
863 $handler = $revision->getContentHandler();
864 $text = $handler->importTransform(
865 $revisionInfo['text'],
866 $revision->getFormat() );
867
868 $revision->setText( $text );
869 }
870 if ( isset( $revisionInfo['timestamp'] ) ) {
871 $revision->setTimestamp( $revisionInfo['timestamp'] );
872 } else {
873 $revision->setTimestamp( wfTimestampNow() );
874 }
875
876 if ( isset( $revisionInfo['comment'] ) ) {
877 $revision->setComment( $revisionInfo['comment'] );
878 }
879
880 if ( isset( $revisionInfo['minor'] ) ) {
881 $revision->setMinor( true );
882 }
883 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
884 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
885 } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
886 $revision->setUsername( $revisionInfo['contributor']['username'] );
887 } else {
888 $revision->setUsername( 'Unknown user' );
889 }
890 $revision->setNoUpdates( $this->mNoUpdates );
891
892 return $this->revisionCallback( $revision );
893 }
894
899 private function handleUpload( &$pageInfo ) {
900 $this->debug( "Enter upload handler" );
901 $uploadInfo = [];
902
903 $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
904 'src', 'size', 'sha1base36', 'archivename', 'rel' ];
905
906 $skip = false;
907
908 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
909 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
910 $this->reader->localName == 'upload' ) {
911 break;
912 }
913
914 $tag = $this->reader->localName;
915
916 if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
917 $this, $pageInfo
918 ] ) ) {
919 // Do nothing
920 } elseif ( in_array( $tag, $normalFields ) ) {
921 $uploadInfo[$tag] = $this->nodeContents();
922 } elseif ( $tag == 'contributor' ) {
923 $uploadInfo['contributor'] = $this->handleContributor();
924 } elseif ( $tag == 'contents' ) {
925 $contents = $this->nodeContents();
926 $encoding = $this->reader->getAttribute( 'encoding' );
927 if ( $encoding === 'base64' ) {
928 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
929 $uploadInfo['isTempSrc'] = true;
930 }
931 } elseif ( $tag != '#text' ) {
932 $this->warn( "Unhandled upload XML tag $tag" );
933 $skip = true;
934 }
935 }
936
937 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
938 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
939 if ( file_exists( $path ) ) {
940 $uploadInfo['fileSrc'] = $path;
941 $uploadInfo['isTempSrc'] = false;
942 }
943 }
944
945 if ( $this->mImportUploads ) {
946 return $this->processUpload( $pageInfo, $uploadInfo );
947 }
948 }
949
954 private function dumpTemp( $contents ) {
955 $filename = tempnam( wfTempDir(), 'importupload' );
956 file_put_contents( $filename, $contents );
957 return $filename;
958 }
959
965 private function processUpload( $pageInfo, $uploadInfo ) {
966 $revision = new WikiRevision( $this->config );
967 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
968
969 $revision->setTitle( $pageInfo['_title'] );
970 $revision->setID( $pageInfo['id'] );
971 $revision->setTimestamp( $uploadInfo['timestamp'] );
972 $revision->setText( $text );
973 $revision->setFilename( $uploadInfo['filename'] );
974 if ( isset( $uploadInfo['archivename'] ) ) {
975 $revision->setArchiveName( $uploadInfo['archivename'] );
976 }
977 $revision->setSrc( $uploadInfo['src'] );
978 if ( isset( $uploadInfo['fileSrc'] ) ) {
979 $revision->setFileSrc( $uploadInfo['fileSrc'],
980 !empty( $uploadInfo['isTempSrc'] ) );
981 }
982 if ( isset( $uploadInfo['sha1base36'] ) ) {
983 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
984 }
985 $revision->setSize( intval( $uploadInfo['size'] ) );
986 $revision->setComment( $uploadInfo['comment'] );
987
988 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
989 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
990 }
991 if ( isset( $uploadInfo['contributor']['username'] ) ) {
992 $revision->setUsername( $uploadInfo['contributor']['username'] );
993 }
994 $revision->setNoUpdates( $this->mNoUpdates );
995
996 return call_user_func( $this->mUploadCallback, $revision );
997 }
998
1002 private function handleContributor() {
1003 $fields = [ 'id', 'ip', 'username' ];
1004 $info = [];
1005
1006 if ( $this->reader->isEmptyElement ) {
1007 return $info;
1008 }
1009 while ( $this->reader->read() ) {
1010 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1011 $this->reader->localName == 'contributor' ) {
1012 break;
1013 }
1014
1015 $tag = $this->reader->localName;
1016
1017 if ( in_array( $tag, $fields ) ) {
1018 $info[$tag] = $this->nodeContents();
1019 }
1020 }
1021
1022 return $info;
1023 }
1024
1030 private function processTitle( $text, $ns = null ) {
1031 if ( is_null( $this->foreignNamespaces ) ) {
1032 $foreignTitleFactory = new NaiveForeignTitleFactory();
1033 } else {
1034 $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1035 $this->foreignNamespaces );
1036 }
1037
1038 $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1039 intval( $ns ) );
1040
1041 $title = $this->importTitleFactory->createTitleFromForeignTitle(
1042 $foreignTitle );
1043
1044 $commandLineMode = $this->config->get( 'CommandLineMode' );
1045 if ( is_null( $title ) ) {
1046 # Invalid page title? Ignore the page
1047 $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1048 return false;
1049 } elseif ( $title->isExternal() ) {
1050 $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1051 return false;
1052 } elseif ( !$title->canExist() ) {
1053 $this->notice( 'import-error-special', $title->getPrefixedText() );
1054 return false;
1055 } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1056 # Do not import if the importing wiki user cannot edit this page
1057 $this->notice( 'import-error-edit', $title->getPrefixedText() );
1058 return false;
1059 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1060 # Do not import if the importing wiki user cannot create this page
1061 $this->notice( 'import-error-create', $title->getPrefixedText() );
1062 return false;
1063 }
1064
1065 return [ $title, $foreignTitle ];
1066 }
1067}
$wgMaxArticleSize
Maximum article size in kilobytes.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTempDir()
Tries to get the system directory for temporary files.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
if( $line===false) $args
Definition cdb.php:64
Reporting callback.
Exception representing a failure to serialize or unserialize a content object.
MediaWiki exception.
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static factory(array $deltas)
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
static registerSource(ImportSource $source)
XML file reader for the page data importer.
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
setImportUploads( $import)
doImport()
Primary entry point.
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
setNoUpdates( $noupdates)
Set 'no updates' mode.
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
importUpload( $revision)
Dummy for now...
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
dumpTemp( $contents)
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
processLogItem( $logInfo)
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
array $countableCache
importLogItem( $revision)
Default per-revision callback, performs the import.
setImageBasePath( $dir)
handleUpload(&$pageInfo)
handleRevision(&$pageInfo)
revisionCallback( $revision)
Notify the callback function of a revision.
logItemCallback( $revision)
Notify the callback function of a new log item.
throwXmlError( $err)
setDebug( $debug)
Set debug mode...
processRevision( $pageInfo, $revisionInfo)
processUpload( $pageInfo, $uploadInfo)
processTitle( $text, $ns=null)
importRevision( $revision)
Default per-revision callback, performs the import.
ImportTitleFactory $importTitleFactory
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
setNoticeCallback( $callback)
Set a callback that displays notice messages.
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
__construct(ImportSource $source, Config $config=null)
Creates an ImportXMLReader drawing from the source provided.
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition WikiPage.php:115
Represents a revision, log entry or upload during the import process.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
const NS_MAIN
Definition Defines.php:56
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition hooks.txt:1049
the array() calling protocol came about after MediaWiki 1.4rc1.
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2568
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:986
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition hooks.txt:1094
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition hooks.txt:1033
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition hooks.txt:2534
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition hooks.txt:925
if(count( $args)==0) $dir
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Interface for configuration instances.
Definition Config.php:28
Source interface for XML import.
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
$debug
Definition mcc.php:31
$source
$buffer
$params