MediaWiki  1.34.0
WikiImporter.php
Go to the documentation of this file.
1 <?php
28 
35 class WikiImporter {
37  private $reader;
38  private $foreignNamespaces = null;
43  private $mNoUpdates = false;
44  private $pageOffset = 0;
46  private $config;
50  private $countableCache = [];
52  private $disableStatisticsUpdate = false;
55 
63  if ( !class_exists( 'XMLReader' ) ) {
64  throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
65  }
66 
67  $this->reader = new XMLReader();
68  $this->config = $config;
69 
70  if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
71  stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
72  }
74 
75  // Enable the entity loader, as it is needed for loading external URLs via
76  // XMLReader::open (T86036)
77  $oldDisable = libxml_disable_entity_loader( false );
78  if ( defined( 'LIBXML_PARSEHUGE' ) ) {
79  $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
80  } else {
81  $status = $this->reader->open( "uploadsource://$id" );
82  }
83  if ( !$status ) {
84  $error = libxml_get_last_error();
85  libxml_disable_entity_loader( $oldDisable );
86  throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
87  $error->message );
88  }
89  libxml_disable_entity_loader( $oldDisable );
90 
91  // Default callbacks
92  $this->setPageCallback( [ $this, 'beforeImportPage' ] );
93  $this->setRevisionCallback( [ $this, "importRevision" ] );
94  $this->setUploadCallback( [ $this, 'importUpload' ] );
95  $this->setLogItemCallback( [ $this, 'importLogItem' ] );
96  $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
97 
98  $this->importTitleFactory = new NaiveImportTitleFactory();
99  $this->externalUserNames = new ExternalUserNames( 'imported', false );
100  }
101 
105  public function getReader() {
106  return $this->reader;
107  }
108 
109  public function throwXmlError( $err ) {
110  $this->debug( "FAILURE: $err" );
111  wfDebug( "WikiImporter XML error: $err\n" );
112  }
113 
114  public function debug( $data ) {
115  if ( $this->mDebug ) {
116  wfDebug( "IMPORT: $data\n" );
117  }
118  }
119 
120  public function warn( $data ) {
121  wfDebug( "IMPORT: $data\n" );
122  }
123 
124  public function notice( $msg, ...$params ) {
125  if ( is_callable( $this->mNoticeCallback ) ) {
126  call_user_func( $this->mNoticeCallback, $msg, $params );
127  } else { # No ImportReporter -> CLI
128  // T177997: the command line importers should call setNoticeCallback()
129  // for their own custom callback to echo the notice
130  wfDebug( wfMessage( $msg, $params )->text() . "\n" );
131  }
132  }
133 
138  function setDebug( $debug ) {
139  $this->mDebug = $debug;
140  }
141 
146  function setNoUpdates( $noupdates ) {
147  $this->mNoUpdates = $noupdates;
148  }
149 
156  function setPageOffset( $nthPage ) {
157  $this->pageOffset = $nthPage;
158  }
159 
166  public function setNoticeCallback( $callback ) {
167  return wfSetVar( $this->mNoticeCallback, $callback );
168  }
169 
175  public function setPageCallback( $callback ) {
176  $previous = $this->mPageCallback;
177  $this->mPageCallback = $callback;
178  return $previous;
179  }
180 
190  public function setPageOutCallback( $callback ) {
191  $previous = $this->mPageOutCallback;
192  $this->mPageOutCallback = $callback;
193  return $previous;
194  }
195 
201  public function setRevisionCallback( $callback ) {
202  $previous = $this->mRevisionCallback;
203  $this->mRevisionCallback = $callback;
204  return $previous;
205  }
206 
212  public function setUploadCallback( $callback ) {
213  $previous = $this->mUploadCallback;
214  $this->mUploadCallback = $callback;
215  return $previous;
216  }
217 
223  public function setLogItemCallback( $callback ) {
224  $previous = $this->mLogItemCallback;
225  $this->mLogItemCallback = $callback;
226  return $previous;
227  }
228 
234  public function setSiteInfoCallback( $callback ) {
235  $previous = $this->mSiteInfoCallback;
236  $this->mSiteInfoCallback = $callback;
237  return $previous;
238  }
239 
245  public function setImportTitleFactory( $factory ) {
246  $this->importTitleFactory = $factory;
247  }
248 
254  public function setTargetNamespace( $namespace ) {
255  if ( is_null( $namespace ) ) {
256  // Don't override namespaces
258  return true;
259  } elseif (
260  $namespace >= 0 &&
261  MediaWikiServices::getInstance()->getNamespaceInfo()->exists( intval( $namespace ) )
262  ) {
263  $namespace = intval( $namespace );
264  $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
265  return true;
266  } else {
267  return false;
268  }
269  }
270 
276  public function setTargetRootPage( $rootpage ) {
278  if ( is_null( $rootpage ) ) {
279  // No rootpage
281  } elseif ( $rootpage !== '' ) {
282  $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
283  $title = Title::newFromText( $rootpage );
284 
285  if ( !$title || $title->isExternal() ) {
286  $status->fatal( 'import-rootpage-invalid' );
287  } elseif (
288  !MediaWikiServices::getInstance()->getNamespaceInfo()->
289  hasSubpages( $title->getNamespace() )
290  ) {
291  $displayNSText = $title->getNamespace() == NS_MAIN
292  ? wfMessage( 'blanknamespace' )->text()
293  : MediaWikiServices::getInstance()->getContentLanguage()->
294  getNsText( $title->getNamespace() );
295  $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
296  } else {
297  // set namespace to 'all', so the namespace check in processTitle() can pass
298  $this->setTargetNamespace( null );
300  }
301  }
302  return $status;
303  }
304 
308  public function setImageBasePath( $dir ) {
309  $this->mImageBasePath = $dir;
310  }
311 
315  public function setImportUploads( $import ) {
316  $this->mImportUploads = $import;
317  }
318 
324  public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
325  $this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
326  }
327 
332  public function disableStatisticsUpdate() {
333  $this->disableStatisticsUpdate = true;
334  }
335 
342  public function beforeImportPage( $titleAndForeignTitle ) {
343  $title = $titleAndForeignTitle[0];
344  $page = WikiPage::factory( $title );
345  $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
346  return true;
347  }
348 
354  public function importRevision( $revision ) {
355  if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
356  $this->notice( 'import-error-bad-location',
357  $revision->getTitle()->getPrefixedText(),
358  $revision->getID(),
359  $revision->getModel(),
360  $revision->getFormat() );
361 
362  return false;
363  }
364 
365  try {
366  return $revision->importOldRevision();
367  } catch ( MWContentSerializationException $ex ) {
368  $this->notice( 'import-error-unserialize',
369  $revision->getTitle()->getPrefixedText(),
370  $revision->getID(),
371  $revision->getModel(),
372  $revision->getFormat() );
373  }
374 
375  return false;
376  }
377 
383  public function importLogItem( $revision ) {
384  return $revision->importLogItem();
385  }
386 
392  public function importUpload( $revision ) {
393  return $revision->importUpload();
394  }
395 
405  public function finishImportPage( $title, $foreignTitle, $revCount,
406  $sRevCount, $pageInfo
407  ) {
408  // Update article count statistics (T42009)
409  // The normal counting logic in WikiPage->doEditUpdates() is designed for
410  // one-revision-at-a-time editing, not bulk imports. In this situation it
411  // suffers from issues of replica DB lag. We let WikiPage handle the total page
412  // and revision count, and we implement our own custom logic for the
413  // article (content page) count.
414  if ( !$this->disableStatisticsUpdate ) {
415  $page = WikiPage::factory( $title );
416  $page->loadPageData( 'fromdbmaster' );
417  $content = $page->getContent();
418  if ( $content === null ) {
419  wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
420  ' because WikiPage::getContent() returned null' );
421  } else {
422  $editInfo = $page->prepareContentForEdit( $content );
423  $countKey = 'title_' . $title->getPrefixedText();
424  $countable = $page->isCountable( $editInfo );
425  if ( array_key_exists( $countKey, $this->countableCache ) &&
426  $countable != $this->countableCache[$countKey] ) {
428  'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
429  ] ) );
430  }
431  }
432  }
433 
434  return Hooks::run( 'AfterImportPage', func_get_args() );
435  }
436 
441  public function debugRevisionHandler( &$revision ) {
442  $this->debug( "Got revision:" );
443  if ( is_object( $revision->title ) ) {
444  $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
445  } else {
446  $this->debug( "-- Title: <invalid>" );
447  }
448  $this->debug( "-- User: " . $revision->user_text );
449  $this->debug( "-- Timestamp: " . $revision->timestamp );
450  $this->debug( "-- Comment: " . $revision->comment );
451  $this->debug( "-- Text: " . $revision->text );
452  }
453 
459  private function siteInfoCallback( $siteInfo ) {
460  if ( isset( $this->mSiteInfoCallback ) ) {
461  return call_user_func_array( $this->mSiteInfoCallback,
462  [ $siteInfo, $this ] );
463  } else {
464  return false;
465  }
466  }
467 
472  function pageCallback( $title ) {
473  if ( isset( $this->mPageCallback ) ) {
474  call_user_func( $this->mPageCallback, $title );
475  }
476  }
477 
486  private function pageOutCallback( $title, $foreignTitle, $revCount,
487  $sucCount, $pageInfo ) {
488  if ( isset( $this->mPageOutCallback ) ) {
489  call_user_func_array( $this->mPageOutCallback, func_get_args() );
490  }
491  }
492 
498  private function revisionCallback( $revision ) {
499  if ( isset( $this->mRevisionCallback ) ) {
500  return call_user_func_array( $this->mRevisionCallback,
501  [ $revision, $this ] );
502  } else {
503  return false;
504  }
505  }
506 
512  private function logItemCallback( $revision ) {
513  if ( isset( $this->mLogItemCallback ) ) {
514  return call_user_func_array( $this->mLogItemCallback,
515  [ $revision, $this ] );
516  } else {
517  return false;
518  }
519  }
520 
527  public function nodeAttribute( $attr ) {
528  return $this->reader->getAttribute( $attr );
529  }
530 
538  public function nodeContents() {
539  if ( $this->reader->isEmptyElement ) {
540  return "";
541  }
542  $buffer = "";
543  while ( $this->reader->read() ) {
544  switch ( $this->reader->nodeType ) {
545  case XMLReader::TEXT:
546  case XMLReader::CDATA:
547  case XMLReader::SIGNIFICANT_WHITESPACE:
548  $buffer .= $this->reader->value;
549  break;
550  case XMLReader::END_ELEMENT:
551  return $buffer;
552  }
553  }
554 
555  $this->reader->close();
556  return '';
557  }
558 
565  public function doImport() {
566  // Calls to reader->read need to be wrapped in calls to
567  // libxml_disable_entity_loader() to avoid local file
568  // inclusion attacks (T48932).
569  $oldDisable = libxml_disable_entity_loader( true );
570  $this->reader->read();
571 
572  if ( $this->reader->localName != 'mediawiki' ) {
573  libxml_disable_entity_loader( $oldDisable );
574  throw new MWException( "Expected <mediawiki> tag, got " .
575  $this->reader->localName );
576  }
577  $this->debug( "<mediawiki> tag is correct." );
578 
579  $this->debug( "Starting primary dump processing loop." );
580 
581  $keepReading = $this->reader->read();
582  $skip = false;
583  $rethrow = null;
584  $pageCount = 0;
585  try {
586  while ( $keepReading ) {
587  $tag = $this->reader->localName;
588  if ( $this->pageOffset ) {
589  if ( $tag === 'page' ) {
590  $pageCount++;
591  }
592  if ( $pageCount < $this->pageOffset ) {
593  $keepReading = $this->reader->next();
594  continue;
595  }
596  }
597  $type = $this->reader->nodeType;
598 
599  if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
600  // Do nothing
601  } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
602  break;
603  } elseif ( $tag == 'siteinfo' ) {
604  $this->handleSiteInfo();
605  } elseif ( $tag == 'page' ) {
606  $this->handlePage();
607  } elseif ( $tag == 'logitem' ) {
608  $this->handleLogItem();
609  } elseif ( $tag != '#text' ) {
610  $this->warn( "Unhandled top-level XML tag $tag" );
611 
612  $skip = true;
613  }
614 
615  if ( $skip ) {
616  $keepReading = $this->reader->next();
617  $skip = false;
618  $this->debug( "Skip" );
619  } else {
620  $keepReading = $this->reader->read();
621  }
622  }
623  } catch ( Exception $ex ) {
624  $rethrow = $ex;
625  }
626 
627  // finally
628  libxml_disable_entity_loader( $oldDisable );
629  $this->reader->close();
630 
631  if ( $rethrow ) {
632  throw $rethrow;
633  }
634 
635  return true;
636  }
637 
638  private function handleSiteInfo() {
639  $this->debug( "Enter site info handler." );
640  $siteInfo = [];
641 
642  // Fields that can just be stuffed in the siteInfo object
643  $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
644 
645  while ( $this->reader->read() ) {
646  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
647  $this->reader->localName == 'siteinfo' ) {
648  break;
649  }
650 
651  $tag = $this->reader->localName;
652 
653  if ( $tag == 'namespace' ) {
654  $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
655  $this->nodeContents();
656  } elseif ( in_array( $tag, $normalFields ) ) {
657  $siteInfo[$tag] = $this->nodeContents();
658  }
659  }
660 
661  $siteInfo['_namespaces'] = $this->foreignNamespaces;
662  $this->siteInfoCallback( $siteInfo );
663  }
664 
665  private function handleLogItem() {
666  $this->debug( "Enter log item handler." );
667  $logInfo = [];
668 
669  // Fields that can just be stuffed in the pageInfo object
670  $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
671  'logtitle', 'params' ];
672 
673  while ( $this->reader->read() ) {
674  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
675  $this->reader->localName == 'logitem' ) {
676  break;
677  }
678 
679  $tag = $this->reader->localName;
680 
681  if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
682  $this, $logInfo
683  ] ) ) {
684  // Do nothing
685  } elseif ( in_array( $tag, $normalFields ) ) {
686  $logInfo[$tag] = $this->nodeContents();
687  } elseif ( $tag == 'contributor' ) {
688  $logInfo['contributor'] = $this->handleContributor();
689  } elseif ( $tag != '#text' ) {
690  $this->warn( "Unhandled log-item XML tag $tag" );
691  }
692  }
693 
694  $this->processLogItem( $logInfo );
695  }
696 
701  private function processLogItem( $logInfo ) {
702  $revision = new WikiRevision( $this->config );
703 
704  if ( isset( $logInfo['id'] ) ) {
705  $revision->setID( $logInfo['id'] );
706  }
707  $revision->setType( $logInfo['type'] );
708  $revision->setAction( $logInfo['action'] );
709  if ( isset( $logInfo['timestamp'] ) ) {
710  $revision->setTimestamp( $logInfo['timestamp'] );
711  }
712  if ( isset( $logInfo['params'] ) ) {
713  $revision->setParams( $logInfo['params'] );
714  }
715  if ( isset( $logInfo['logtitle'] ) ) {
716  // @todo Using Title for non-local titles is a recipe for disaster.
717  // We should use ForeignTitle here instead.
718  $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
719  }
720 
721  $revision->setNoUpdates( $this->mNoUpdates );
722 
723  if ( isset( $logInfo['comment'] ) ) {
724  $revision->setComment( $logInfo['comment'] );
725  }
726 
727  if ( isset( $logInfo['contributor']['ip'] ) ) {
728  $revision->setUserIP( $logInfo['contributor']['ip'] );
729  }
730 
731  if ( !isset( $logInfo['contributor']['username'] ) ) {
732  $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
733  } else {
734  $revision->setUsername(
735  $this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
736  );
737  }
738 
739  return $this->logItemCallback( $revision );
740  }
741 
745  private function handlePage() {
746  // Handle page data.
747  $this->debug( "Enter page handler." );
748  $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
749 
750  // Fields that can just be stuffed in the pageInfo object
751  $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
752 
753  $skip = false;
754  $badTitle = false;
755 
756  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
757  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
758  $this->reader->localName == 'page' ) {
759  break;
760  }
761 
762  $skip = false;
763 
764  $tag = $this->reader->localName;
765 
766  if ( $badTitle ) {
767  // The title is invalid, bail out of this page
768  $skip = true;
769  } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
770  &$pageInfo ] ) ) {
771  // Do nothing
772  } elseif ( in_array( $tag, $normalFields ) ) {
773  // An XML snippet:
774  // <page>
775  // <id>123</id>
776  // <title>Page</title>
777  // <redirect title="NewTitle"/>
778  // ...
779  // Because the redirect tag is built differently, we need special handling for that case.
780  if ( $tag == 'redirect' ) {
781  $pageInfo[$tag] = $this->nodeAttribute( 'title' );
782  } else {
783  $pageInfo[$tag] = $this->nodeContents();
784  }
785  } elseif ( $tag == 'revision' || $tag == 'upload' ) {
786  if ( !isset( $title ) ) {
787  $title = $this->processTitle( $pageInfo['title'],
788  $pageInfo['ns'] ?? null );
789 
790  // $title is either an array of two titles or false.
791  if ( is_array( $title ) ) {
792  $this->pageCallback( $title );
793  list( $pageInfo['_title'], $foreignTitle ) = $title;
794  } else {
795  $badTitle = true;
796  $skip = true;
797  }
798  }
799 
800  if ( $title ) {
801  if ( $tag == 'revision' ) {
802  $this->handleRevision( $pageInfo );
803  } else {
804  $this->handleUpload( $pageInfo );
805  }
806  }
807  } elseif ( $tag != '#text' ) {
808  $this->warn( "Unhandled page XML tag $tag" );
809  $skip = true;
810  }
811  }
812 
813  // @note $pageInfo is only set if a valid $title is processed above with
814  // no error. If we have a valid $title, then pageCallback is called
815  // above, $pageInfo['title'] is set and we do pageOutCallback here.
816  // If $pageInfo['_title'] is not set, then $foreignTitle is also not
817  // set since they both come from $title above.
818  if ( array_key_exists( '_title', $pageInfo ) ) {
819  $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
820  $pageInfo['revisionCount'],
821  $pageInfo['successfulRevisionCount'],
822  $pageInfo );
823  }
824  }
825 
829  private function handleRevision( &$pageInfo ) {
830  $this->debug( "Enter revision handler" );
831  $revisionInfo = [];
832 
833  $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text', 'sha1' ];
834 
835  $skip = false;
836 
837  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
838  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
839  $this->reader->localName == 'revision' ) {
840  break;
841  }
842 
843  $tag = $this->reader->localName;
844 
845  if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
846  $this, $pageInfo, $revisionInfo
847  ] ) ) {
848  // Do nothing
849  } elseif ( in_array( $tag, $normalFields ) ) {
850  $revisionInfo[$tag] = $this->nodeContents();
851  } elseif ( $tag == 'contributor' ) {
852  $revisionInfo['contributor'] = $this->handleContributor();
853  } elseif ( $tag != '#text' ) {
854  $this->warn( "Unhandled revision XML tag $tag" );
855  $skip = true;
856  }
857  }
858 
859  $pageInfo['revisionCount']++;
860  if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
861  $pageInfo['successfulRevisionCount']++;
862  }
863  }
864 
871  private function processRevision( $pageInfo, $revisionInfo ) {
872  global $wgMaxArticleSize;
873 
874  // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
875  // database errors and instability. Testing for revisions with only listed
876  // content models, as other content models might use serialization formats
877  // which aren't checked against $wgMaxArticleSize.
878  if ( ( !isset( $revisionInfo['model'] ) ||
879  in_array( $revisionInfo['model'], [
880  'wikitext',
881  'css',
882  'json',
883  'javascript',
884  'text',
885  ''
886  ] ) ) &&
887  strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
888  ) {
889  throw new MWException( 'The text of ' .
890  ( isset( $revisionInfo['id'] ) ?
891  "the revision with ID $revisionInfo[id]" :
892  'a revision'
893  ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
894  }
895 
896  // FIXME: process schema version 11!
897  $revision = new WikiRevision( $this->config );
898 
899  if ( isset( $revisionInfo['id'] ) ) {
900  $revision->setID( $revisionInfo['id'] );
901  }
902  if ( isset( $revisionInfo['model'] ) ) {
903  $revision->setModel( $revisionInfo['model'] );
904  }
905  if ( isset( $revisionInfo['format'] ) ) {
906  $revision->setFormat( $revisionInfo['format'] );
907  }
908  $revision->setTitle( $pageInfo['_title'] );
909 
910  if ( isset( $revisionInfo['text'] ) ) {
911  $handler = $revision->getContentHandler();
912  $text = $handler->importTransform(
913  $revisionInfo['text'],
914  $revision->getFormat() );
915 
916  $revision->setText( $text );
917  }
918  $revision->setTimestamp( $revisionInfo['timestamp'] ?? wfTimestampNow() );
919 
920  if ( isset( $revisionInfo['comment'] ) ) {
921  $revision->setComment( $revisionInfo['comment'] );
922  }
923 
924  if ( isset( $revisionInfo['minor'] ) ) {
925  $revision->setMinor( true );
926  }
927  if ( isset( $revisionInfo['contributor']['ip'] ) ) {
928  $revision->setUserIP( $revisionInfo['contributor']['ip'] );
929  } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
930  $revision->setUsername(
931  $this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
932  );
933  } else {
934  $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
935  }
936  if ( isset( $revisionInfo['sha1'] ) ) {
937  $revision->setSha1Base36( $revisionInfo['sha1'] );
938  }
939  $revision->setNoUpdates( $this->mNoUpdates );
940 
941  return $this->revisionCallback( $revision );
942  }
943 
948  private function handleUpload( &$pageInfo ) {
949  $this->debug( "Enter upload handler" );
950  $uploadInfo = [];
951 
952  $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
953  'src', 'size', 'sha1base36', 'archivename', 'rel' ];
954 
955  $skip = false;
956 
957  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
958  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
959  $this->reader->localName == 'upload' ) {
960  break;
961  }
962 
963  $tag = $this->reader->localName;
964 
965  if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
966  $this, $pageInfo
967  ] ) ) {
968  // Do nothing
969  } elseif ( in_array( $tag, $normalFields ) ) {
970  $uploadInfo[$tag] = $this->nodeContents();
971  } elseif ( $tag == 'contributor' ) {
972  $uploadInfo['contributor'] = $this->handleContributor();
973  } elseif ( $tag == 'contents' ) {
974  $contents = $this->nodeContents();
975  $encoding = $this->reader->getAttribute( 'encoding' );
976  if ( $encoding === 'base64' ) {
977  $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
978  $uploadInfo['isTempSrc'] = true;
979  }
980  } elseif ( $tag != '#text' ) {
981  $this->warn( "Unhandled upload XML tag $tag" );
982  $skip = true;
983  }
984  }
985 
986  if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
987  $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
988  if ( file_exists( $path ) ) {
989  $uploadInfo['fileSrc'] = $path;
990  $uploadInfo['isTempSrc'] = false;
991  }
992  }
993 
994  if ( $this->mImportUploads ) {
995  return $this->processUpload( $pageInfo, $uploadInfo );
996  }
997  }
998 
1003  private function dumpTemp( $contents ) {
1004  $filename = tempnam( wfTempDir(), 'importupload' );
1005  file_put_contents( $filename, $contents );
1006  return $filename;
1007  }
1008 
1014  private function processUpload( $pageInfo, $uploadInfo ) {
1015  $revision = new WikiRevision( $this->config );
1016  $text = $uploadInfo['text'] ?? '';
1017 
1018  $revision->setTitle( $pageInfo['_title'] );
1019  $revision->setID( $pageInfo['id'] );
1020  $revision->setTimestamp( $uploadInfo['timestamp'] );
1021  $revision->setText( $text );
1022  $revision->setFilename( $uploadInfo['filename'] );
1023  if ( isset( $uploadInfo['archivename'] ) ) {
1024  $revision->setArchiveName( $uploadInfo['archivename'] );
1025  }
1026  $revision->setSrc( $uploadInfo['src'] );
1027  if ( isset( $uploadInfo['fileSrc'] ) ) {
1028  $revision->setFileSrc( $uploadInfo['fileSrc'],
1029  !empty( $uploadInfo['isTempSrc'] ) );
1030  }
1031  if ( isset( $uploadInfo['sha1base36'] ) ) {
1032  $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1033  }
1034  $revision->setSize( intval( $uploadInfo['size'] ) );
1035  $revision->setComment( $uploadInfo['comment'] );
1036 
1037  if ( isset( $uploadInfo['contributor']['ip'] ) ) {
1038  $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1039  }
1040  if ( isset( $uploadInfo['contributor']['username'] ) ) {
1041  $revision->setUsername(
1042  $this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
1043  );
1044  }
1045  $revision->setNoUpdates( $this->mNoUpdates );
1046 
1047  return call_user_func( $this->mUploadCallback, $revision );
1048  }
1049 
1053  private function handleContributor() {
1054  $fields = [ 'id', 'ip', 'username' ];
1055  $info = [];
1056 
1057  if ( $this->reader->isEmptyElement ) {
1058  return $info;
1059  }
1060  while ( $this->reader->read() ) {
1061  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1062  $this->reader->localName == 'contributor' ) {
1063  break;
1064  }
1065 
1066  $tag = $this->reader->localName;
1067 
1068  if ( in_array( $tag, $fields ) ) {
1069  $info[$tag] = $this->nodeContents();
1070  }
1071  }
1072 
1073  return $info;
1074  }
1075 
1081  private function processTitle( $text, $ns = null ) {
1082  if ( is_null( $this->foreignNamespaces ) ) {
1083  $foreignTitleFactory = new NaiveForeignTitleFactory();
1084  } else {
1085  $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1086  $this->foreignNamespaces );
1087  }
1088 
1089  $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1090  intval( $ns ) );
1091 
1092  $title = $this->importTitleFactory->createTitleFromForeignTitle(
1093  $foreignTitle );
1094 
1095  $commandLineMode = $this->config->get( 'CommandLineMode' );
1096  if ( is_null( $title ) ) {
1097  # Invalid page title? Ignore the page
1098  $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1099  return false;
1100  } elseif ( $title->isExternal() ) {
1101  $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1102  return false;
1103  } elseif ( !$title->canExist() ) {
1104  $this->notice( 'import-error-special', $title->getPrefixedText() );
1105  return false;
1106  } elseif ( !$commandLineMode ) {
1107  $permissionManager = MediaWikiServices::getInstance()->getPermissionManager();
1108  $user = RequestContext::getMain()->getUser();
1109 
1110  if ( !$permissionManager->userCan( 'edit', $user, $title ) ) {
1111  # Do not import if the importing wiki user cannot edit this page
1112  $this->notice( 'import-error-edit', $title->getPrefixedText() );
1113 
1114  return false;
1115  }
1116 
1117  if ( !$title->exists() && !$permissionManager->userCan( 'create', $user, $title ) ) {
1118  # Do not import if the importing wiki user cannot create this page
1119  $this->notice( 'import-error-create', $title->getPrefixedText() );
1120 
1121  return false;
1122  }
1123  }
1124 
1125  return [ $title, $foreignTitle ];
1126  }
1127 }
NaiveImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NaiveImportTitleFactory.php:34
WikiImporter\processRevision
processRevision( $pageInfo, $revisionInfo)
Definition: WikiImporter.php:871
WikiImporter\$mUploadCallback
$mUploadCallback
Definition: WikiImporter.php:39
WikiImporter
XML file reader for the page data importer.
Definition: WikiImporter.php:35
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:316
$wgMaxArticleSize
$wgMaxArticleSize
Maximum article size in kilobytes.
Definition: DefaultSettings.php:2295
WikiImporter\setImageBasePath
setImageBasePath( $dir)
Definition: WikiImporter.php:308
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1607
UploadSourceAdapter\registerSource
static registerSource(ImportSource $source)
Definition: UploadSourceAdapter.php:48
WikiImporter\$mImportUploads
$mImportUploads
Definition: WikiImporter.php:42
NamespaceAwareForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
Definition: NamespaceAwareForeignTitleFactory.php:25
WikiImporter\$mRevisionCallback
$mRevisionCallback
Definition: WikiImporter.php:39
WikiImporter\revisionCallback
revisionCallback( $revision)
Notify the callback function of a revision.
Definition: WikiImporter.php:498
WikiImporter\setNoticeCallback
setNoticeCallback( $callback)
Set a callback that displays notice messages.
Definition: WikiImporter.php:166
DeferredUpdates\addUpdate
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the deferred list to be run later by execute()
Definition: DeferredUpdates.php:85
NaiveForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
Definition: NaiveForeignTitleFactory.php:27
WikiImporter\$mPageOutCallback
$mPageOutCallback
Definition: WikiImporter.php:40
WikiImporter\setNoUpdates
setNoUpdates( $noupdates)
Set 'no updates' mode.
Definition: WikiImporter.php:146
WikiImporter\getReader
getReader()
Definition: WikiImporter.php:105
ExternalUserNames
Class to parse and build external user names.
Definition: ExternalUserNames.php:29
WikiImporter\processLogItem
processLogItem( $logInfo)
Definition: WikiImporter.php:701
WikiImporter\handleRevision
handleRevision(&$pageInfo)
Definition: WikiImporter.php:829
WikiImporter\setRevisionCallback
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
Definition: WikiImporter.php:201
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1264
WikiImporter\handleContributor
handleContributor()
Definition: WikiImporter.php:1053
WikiImporter\setUsernamePrefix
setUsernamePrefix( $usernamePrefix, $assignKnownUsers)
Definition: WikiImporter.php:324
ImportReporter
Reporting callback.
Definition: ImportReporter.php:27
WikiImporter\$externalUserNames
ExternalUserNames $externalUserNames
Definition: WikiImporter.php:54
NamespaceImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NamespaceImportTitleFactory.php:28
WikiImporter\nodeContents
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
Definition: WikiImporter.php:538
WikiImporter\siteInfoCallback
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
Definition: WikiImporter.php:459
NS_MAIN
const NS_MAIN
Definition: Defines.php:60
Config
Interface for configuration instances.
Definition: Config.php:28
MWException
MediaWiki exception.
Definition: MWException.php:26
ImportTitleFactory
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
Definition: ImportTitleFactory.php:25
WikiPage\factory
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:142
WikiImporter\dumpTemp
dumpTemp( $contents)
Definition: WikiImporter.php:1003
WikiImporter\$countableCache
array $countableCache
Definition: WikiImporter.php:50
WikiImporter\pageOutCallback
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
Definition: WikiImporter.php:486
WikiImporter\__construct
__construct(ImportSource $source, Config $config)
Creates an ImportXMLReader drawing from the source provided.
Definition: WikiImporter.php:62
MWContentSerializationException
Exception representing a failure to serialize or unserialize a content object.
Definition: MWContentSerializationException.php:7
WikiImporter\throwXmlError
throwXmlError( $err)
Definition: WikiImporter.php:109
SubpageImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: SubpageImportTitleFactory.php:28
$title
$title
Definition: testCompression.php:34
SiteStatsUpdate\factory
static factory(array $deltas)
Definition: SiteStatsUpdate.php:71
WikiImporter\finishImportPage
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
Definition: WikiImporter.php:405
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1898
WikiImporter\$importTitleFactory
ImportTitleFactory $importTitleFactory
Definition: WikiImporter.php:48
WikiImporter\processUpload
processUpload( $pageInfo, $uploadInfo)
Definition: WikiImporter.php:1014
WikiImporter\disableStatisticsUpdate
disableStatisticsUpdate()
Statistics update can cause a lot of time.
Definition: WikiImporter.php:332
WikiImporter\setImportUploads
setImportUploads( $import)
Definition: WikiImporter.php:315
WikiImporter\$mNoUpdates
$mNoUpdates
Definition: WikiImporter.php:43
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
WikiImporter\$mPageCallback
$mPageCallback
Definition: WikiImporter.php:39
WikiImporter\$mSiteInfoCallback
$mSiteInfoCallback
Definition: WikiImporter.php:40
$content
$content
Definition: router.php:78
WikiImporter\beforeImportPage
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
Definition: WikiImporter.php:342
StatusValue\newGood
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:81
WikiImporter\importRevision
importRevision( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:354
WikiImporter\doImport
doImport()
Primary entry point.
Definition: WikiImporter.php:565
WikiImporter\processTitle
processTitle( $text, $ns=null)
Definition: WikiImporter.php:1081
WikiImporter\$mImageBasePath
$mImageBasePath
Definition: WikiImporter.php:42
WikiImporter\$foreignNamespaces
$foreignNamespaces
Definition: WikiImporter.php:38
WikiImporter\setDebug
setDebug( $debug)
Set debug mode...
Definition: WikiImporter.php:138
WikiImporter\notice
notice( $msg,... $params)
Definition: WikiImporter.php:124
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:431
WikiImporter\handleUpload
handleUpload(&$pageInfo)
Definition: WikiImporter.php:948
WikiImporter\setUploadCallback
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Definition: WikiImporter.php:212
WikiImporter\warn
warn( $data)
Definition: WikiImporter.php:120
WikiImporter\handleSiteInfo
handleSiteInfo()
Definition: WikiImporter.php:638
WikiImporter\setTargetRootPage
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
Definition: WikiImporter.php:276
WikiImporter\$mNoticeCallback
$mNoticeCallback
Definition: WikiImporter.php:41
$status
return $status
Definition: SyntaxHighlight.php:347
$debug
$debug
Definition: Setup.php:784
wfTempDir
wfTempDir()
Tries to get the system directory for temporary files.
Definition: GlobalFunctions.php:1947
WikiRevision
Represents a revision, log entry or upload during the import process.
Definition: WikiRevision.php:37
WikiImporter\debug
debug( $data)
Definition: WikiImporter.php:114
WikiImporter\importLogItem
importLogItem( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:383
WikiImporter\importUpload
importUpload( $revision)
Dummy for now...
Definition: WikiImporter.php:392
WikiImporter\setPageOffset
setPageOffset( $nthPage)
Sets 'pageOffset' value.
Definition: WikiImporter.php:156
WikiImporter\handleLogItem
handleLogItem()
Definition: WikiImporter.php:665
ImportSource
Source interface for XML import.
Definition: ImportSource.php:32
$path
$path
Definition: NoLocalSettings.php:25
WikiImporter\$pageOffset
$pageOffset
Definition: WikiImporter.php:44
WikiImporter\$config
Config $config
Definition: WikiImporter.php:46
WikiImporter\setSiteInfoCallback
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
Definition: WikiImporter.php:234
WikiImporter\setPageOutCallback
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
Definition: WikiImporter.php:190
WikiImporter\debugRevisionHandler
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
Definition: WikiImporter.php:441
WikiImporter\$disableStatisticsUpdate
bool $disableStatisticsUpdate
Definition: WikiImporter.php:52
$source
$source
Definition: mwdoc-filter.php:34
WikiImporter\$reader
XMLReader $reader
Definition: WikiImporter.php:37
WikiImporter\handlePage
handlePage()
PhanTypeInvalidDimOffset Phan not reading the reference inside the hook.
Definition: WikiImporter.php:745
WikiImporter\nodeAttribute
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
Definition: WikiImporter.php:527
WikiImporter\setPageCallback
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
Definition: WikiImporter.php:175
WikiImporter\setImportTitleFactory
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
Definition: WikiImporter.php:245
WikiImporter\setTargetNamespace
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
Definition: WikiImporter.php:254
WikiImporter\logItemCallback
logItemCallback( $revision)
Notify the callback function of a new log item.
Definition: WikiImporter.php:512
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
WikiImporter\pageCallback
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
Definition: WikiImporter.php:472
WikiImporter\$mLogItemCallback
$mLogItemCallback
Definition: WikiImporter.php:39
WikiImporter\$mDebug
$mDebug
Definition: WikiImporter.php:41
WikiImporter\setLogItemCallback
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
Definition: WikiImporter.php:223
$type
$type
Definition: testCompression.php:48