MediaWiki  1.27.2
WikiImporter.php
Go to the documentation of this file.
1 <?php
33 class WikiImporter {
34  private $reader = null;
35  private $foreignNamespaces = null;
40  private $mNoUpdates = false;
42  private $config;
46  private $countableCache = [];
47 
55  if ( !class_exists( 'XMLReader' ) ) {
56  throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
57  }
58 
59  $this->reader = new XMLReader();
60  if ( !$config ) {
61  wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
62  $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
63  }
64  $this->config = $config;
65 
66  if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
67  stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
68  }
69  $id = UploadSourceAdapter::registerSource( $source );
70 
71  // Enable the entity loader, as it is needed for loading external URLs via
72  // XMLReader::open (T86036)
73  $oldDisable = libxml_disable_entity_loader( false );
74  if ( defined( 'LIBXML_PARSEHUGE' ) ) {
75  $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
76  } else {
77  $status = $this->reader->open( "uploadsource://$id" );
78  }
79  if ( !$status ) {
80  $error = libxml_get_last_error();
81  libxml_disable_entity_loader( $oldDisable );
82  throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
83  $error->message );
84  }
85  libxml_disable_entity_loader( $oldDisable );
86 
87  // Default callbacks
88  $this->setPageCallback( [ $this, 'beforeImportPage' ] );
89  $this->setRevisionCallback( [ $this, "importRevision" ] );
90  $this->setUploadCallback( [ $this, 'importUpload' ] );
91  $this->setLogItemCallback( [ $this, 'importLogItem' ] );
92  $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
93 
94  $this->importTitleFactory = new NaiveImportTitleFactory();
95  }
96 
100  public function getReader() {
101  return $this->reader;
102  }
103 
104  public function throwXmlError( $err ) {
105  $this->debug( "FAILURE: $err" );
106  wfDebug( "WikiImporter XML error: $err\n" );
107  }
108 
109  public function debug( $data ) {
110  if ( $this->mDebug ) {
111  wfDebug( "IMPORT: $data\n" );
112  }
113  }
114 
115  public function warn( $data ) {
116  wfDebug( "IMPORT: $data\n" );
117  }
118 
119  public function notice( $msg /*, $param, ...*/ ) {
120  $params = func_get_args();
121  array_shift( $params );
122 
123  if ( is_callable( $this->mNoticeCallback ) ) {
124  call_user_func( $this->mNoticeCallback, $msg, $params );
125  } else { # No ImportReporter -> CLI
126  echo wfMessage( $msg, $params )->text() . "\n";
127  }
128  }
129 
134  function setDebug( $debug ) {
135  $this->mDebug = $debug;
136  }
137 
142  function setNoUpdates( $noupdates ) {
143  $this->mNoUpdates = $noupdates;
144  }
145 
152  public function setNoticeCallback( $callback ) {
153  return wfSetVar( $this->mNoticeCallback, $callback );
154  }
155 
161  public function setPageCallback( $callback ) {
162  $previous = $this->mPageCallback;
163  $this->mPageCallback = $callback;
164  return $previous;
165  }
166 
176  public function setPageOutCallback( $callback ) {
177  $previous = $this->mPageOutCallback;
178  $this->mPageOutCallback = $callback;
179  return $previous;
180  }
181 
187  public function setRevisionCallback( $callback ) {
188  $previous = $this->mRevisionCallback;
189  $this->mRevisionCallback = $callback;
190  return $previous;
191  }
192 
198  public function setUploadCallback( $callback ) {
199  $previous = $this->mUploadCallback;
200  $this->mUploadCallback = $callback;
201  return $previous;
202  }
203 
209  public function setLogItemCallback( $callback ) {
210  $previous = $this->mLogItemCallback;
211  $this->mLogItemCallback = $callback;
212  return $previous;
213  }
214 
220  public function setSiteInfoCallback( $callback ) {
221  $previous = $this->mSiteInfoCallback;
222  $this->mSiteInfoCallback = $callback;
223  return $previous;
224  }
225 
231  public function setImportTitleFactory( $factory ) {
232  $this->importTitleFactory = $factory;
233  }
234 
240  public function setTargetNamespace( $namespace ) {
241  if ( is_null( $namespace ) ) {
242  // Don't override namespaces
244  return true;
245  } elseif (
246  $namespace >= 0 &&
247  MWNamespace::exists( intval( $namespace ) )
248  ) {
249  $namespace = intval( $namespace );
250  $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
251  return true;
252  } else {
253  return false;
254  }
255  }
256 
262  public function setTargetRootPage( $rootpage ) {
264  if ( is_null( $rootpage ) ) {
265  // No rootpage
267  } elseif ( $rootpage !== '' ) {
268  $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
269  $title = Title::newFromText( $rootpage );
270 
271  if ( !$title || $title->isExternal() ) {
272  $status->fatal( 'import-rootpage-invalid' );
273  } else {
274  if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
276 
277  $displayNSText = $title->getNamespace() == NS_MAIN
278  ? wfMessage( 'blanknamespace' )->text()
279  : $wgContLang->getNsText( $title->getNamespace() );
280  $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
281  } else {
282  // set namespace to 'all', so the namespace check in processTitle() can pass
283  $this->setTargetNamespace( null );
285  }
286  }
287  }
288  return $status;
289  }
290 
294  public function setImageBasePath( $dir ) {
295  $this->mImageBasePath = $dir;
296  }
297 
301  public function setImportUploads( $import ) {
302  $this->mImportUploads = $import;
303  }
304 
311  public function beforeImportPage( $titleAndForeignTitle ) {
312  $title = $titleAndForeignTitle[0];
314  $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
315  return true;
316  }
317 
323  public function importRevision( $revision ) {
324  if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
325  $this->notice( 'import-error-bad-location',
326  $revision->getTitle()->getPrefixedText(),
327  $revision->getID(),
328  $revision->getModel(),
329  $revision->getFormat() );
330 
331  return false;
332  }
333 
334  try {
335  $dbw = wfGetDB( DB_MASTER );
336  return $dbw->deadlockLoop( [ $revision, 'importOldRevision' ] );
337  } catch ( MWContentSerializationException $ex ) {
338  $this->notice( 'import-error-unserialize',
339  $revision->getTitle()->getPrefixedText(),
340  $revision->getID(),
341  $revision->getModel(),
342  $revision->getFormat() );
343  }
344 
345  return false;
346  }
347 
353  public function importLogItem( $revision ) {
354  $dbw = wfGetDB( DB_MASTER );
355  return $dbw->deadlockLoop( [ $revision, 'importLogItem' ] );
356  }
357 
363  public function importUpload( $revision ) {
364  $dbw = wfGetDB( DB_MASTER );
365  return $dbw->deadlockLoop( [ $revision, 'importUpload' ] );
366  }
367 
377  public function finishImportPage( $title, $foreignTitle, $revCount,
378  $sRevCount, $pageInfo ) {
379 
380  // Update article count statistics (T42009)
381  // The normal counting logic in WikiPage->doEditUpdates() is designed for
382  // one-revision-at-a-time editing, not bulk imports. In this situation it
383  // suffers from issues of slave lag. We let WikiPage handle the total page
384  // and revision count, and we implement our own custom logic for the
385  // article (content page) count.
387  $page->loadPageData( 'fromdbmaster' );
388  $content = $page->getContent();
389  if ( $content === null ) {
390  wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
391  ' because WikiPage::getContent() returned null' );
392  } else {
393  $editInfo = $page->prepareContentForEdit( $content );
394  $countKey = 'title_' . $title->getPrefixedText();
395  $countable = $page->isCountable( $editInfo );
396  if ( array_key_exists( $countKey, $this->countableCache ) &&
397  $countable != $this->countableCache[$countKey] ) {
399  'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
400  ] ) );
401  }
402  }
403 
404  $args = func_get_args();
405  return Hooks::run( 'AfterImportPage', $args );
406  }
407 
412  public function debugRevisionHandler( &$revision ) {
413  $this->debug( "Got revision:" );
414  if ( is_object( $revision->title ) ) {
415  $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
416  } else {
417  $this->debug( "-- Title: <invalid>" );
418  }
419  $this->debug( "-- User: " . $revision->user_text );
420  $this->debug( "-- Timestamp: " . $revision->timestamp );
421  $this->debug( "-- Comment: " . $revision->comment );
422  $this->debug( "-- Text: " . $revision->text );
423  }
424 
430  private function siteInfoCallback( $siteInfo ) {
431  if ( isset( $this->mSiteInfoCallback ) ) {
432  return call_user_func_array( $this->mSiteInfoCallback,
433  [ $siteInfo, $this ] );
434  } else {
435  return false;
436  }
437  }
438 
443  function pageCallback( $title ) {
444  if ( isset( $this->mPageCallback ) ) {
445  call_user_func( $this->mPageCallback, $title );
446  }
447  }
448 
457  private function pageOutCallback( $title, $foreignTitle, $revCount,
458  $sucCount, $pageInfo ) {
459  if ( isset( $this->mPageOutCallback ) ) {
460  $args = func_get_args();
461  call_user_func_array( $this->mPageOutCallback, $args );
462  }
463  }
464 
470  private function revisionCallback( $revision ) {
471  if ( isset( $this->mRevisionCallback ) ) {
472  return call_user_func_array( $this->mRevisionCallback,
473  [ $revision, $this ] );
474  } else {
475  return false;
476  }
477  }
478 
484  private function logItemCallback( $revision ) {
485  if ( isset( $this->mLogItemCallback ) ) {
486  return call_user_func_array( $this->mLogItemCallback,
487  [ $revision, $this ] );
488  } else {
489  return false;
490  }
491  }
492 
499  public function nodeAttribute( $attr ) {
500  return $this->reader->getAttribute( $attr );
501  }
502 
510  public function nodeContents() {
511  if ( $this->reader->isEmptyElement ) {
512  return "";
513  }
514  $buffer = "";
515  while ( $this->reader->read() ) {
516  switch ( $this->reader->nodeType ) {
517  case XMLReader::TEXT:
518  case XMLReader::CDATA:
519  case XMLReader::SIGNIFICANT_WHITESPACE:
520  $buffer .= $this->reader->value;
521  break;
522  case XMLReader::END_ELEMENT:
523  return $buffer;
524  }
525  }
526 
527  $this->reader->close();
528  return '';
529  }
530 
536  public function doImport() {
537  // Calls to reader->read need to be wrapped in calls to
538  // libxml_disable_entity_loader() to avoid local file
539  // inclusion attacks (bug 46932).
540  $oldDisable = libxml_disable_entity_loader( true );
541  $this->reader->read();
542 
543  if ( $this->reader->localName != 'mediawiki' ) {
544  libxml_disable_entity_loader( $oldDisable );
545  throw new MWException( "Expected <mediawiki> tag, got " .
546  $this->reader->localName );
547  }
548  $this->debug( "<mediawiki> tag is correct." );
549 
550  $this->debug( "Starting primary dump processing loop." );
551 
552  $keepReading = $this->reader->read();
553  $skip = false;
554  $rethrow = null;
555  try {
556  while ( $keepReading ) {
557  $tag = $this->reader->localName;
558  $type = $this->reader->nodeType;
559 
560  if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
561  // Do nothing
562  } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
563  break;
564  } elseif ( $tag == 'siteinfo' ) {
565  $this->handleSiteInfo();
566  } elseif ( $tag == 'page' ) {
567  $this->handlePage();
568  } elseif ( $tag == 'logitem' ) {
569  $this->handleLogItem();
570  } elseif ( $tag != '#text' ) {
571  $this->warn( "Unhandled top-level XML tag $tag" );
572 
573  $skip = true;
574  }
575 
576  if ( $skip ) {
577  $keepReading = $this->reader->next();
578  $skip = false;
579  $this->debug( "Skip" );
580  } else {
581  $keepReading = $this->reader->read();
582  }
583  }
584  } catch ( Exception $ex ) {
585  $rethrow = $ex;
586  }
587 
588  // finally
589  libxml_disable_entity_loader( $oldDisable );
590  $this->reader->close();
591 
592  if ( $rethrow ) {
593  throw $rethrow;
594  }
595 
596  return true;
597  }
598 
599  private function handleSiteInfo() {
600  $this->debug( "Enter site info handler." );
601  $siteInfo = [];
602 
603  // Fields that can just be stuffed in the siteInfo object
604  $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
605 
606  while ( $this->reader->read() ) {
607  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
608  $this->reader->localName == 'siteinfo' ) {
609  break;
610  }
611 
612  $tag = $this->reader->localName;
613 
614  if ( $tag == 'namespace' ) {
615  $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
616  $this->nodeContents();
617  } elseif ( in_array( $tag, $normalFields ) ) {
618  $siteInfo[$tag] = $this->nodeContents();
619  }
620  }
621 
622  $siteInfo['_namespaces'] = $this->foreignNamespaces;
623  $this->siteInfoCallback( $siteInfo );
624  }
625 
626  private function handleLogItem() {
627  $this->debug( "Enter log item handler." );
628  $logInfo = [];
629 
630  // Fields that can just be stuffed in the pageInfo object
631  $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
632  'logtitle', 'params' ];
633 
634  while ( $this->reader->read() ) {
635  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
636  $this->reader->localName == 'logitem' ) {
637  break;
638  }
639 
640  $tag = $this->reader->localName;
641 
642  if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
643  $this, $logInfo
644  ] ) ) {
645  // Do nothing
646  } elseif ( in_array( $tag, $normalFields ) ) {
647  $logInfo[$tag] = $this->nodeContents();
648  } elseif ( $tag == 'contributor' ) {
649  $logInfo['contributor'] = $this->handleContributor();
650  } elseif ( $tag != '#text' ) {
651  $this->warn( "Unhandled log-item XML tag $tag" );
652  }
653  }
654 
655  $this->processLogItem( $logInfo );
656  }
657 
662  private function processLogItem( $logInfo ) {
663 
664  $revision = new WikiRevision( $this->config );
665 
666  if ( isset( $logInfo['id'] ) ) {
667  $revision->setID( $logInfo['id'] );
668  }
669  $revision->setType( $logInfo['type'] );
670  $revision->setAction( $logInfo['action'] );
671  if ( isset( $logInfo['timestamp'] ) ) {
672  $revision->setTimestamp( $logInfo['timestamp'] );
673  }
674  if ( isset( $logInfo['params'] ) ) {
675  $revision->setParams( $logInfo['params'] );
676  }
677  if ( isset( $logInfo['logtitle'] ) ) {
678  // @todo Using Title for non-local titles is a recipe for disaster.
679  // We should use ForeignTitle here instead.
680  $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
681  }
682 
683  $revision->setNoUpdates( $this->mNoUpdates );
684 
685  if ( isset( $logInfo['comment'] ) ) {
686  $revision->setComment( $logInfo['comment'] );
687  }
688 
689  if ( isset( $logInfo['contributor']['ip'] ) ) {
690  $revision->setUserIP( $logInfo['contributor']['ip'] );
691  }
692 
693  if ( !isset( $logInfo['contributor']['username'] ) ) {
694  $revision->setUsername( 'Unknown user' );
695  } else {
696  $revision->setUsername( $logInfo['contributor']['username'] );
697  }
698 
699  return $this->logItemCallback( $revision );
700  }
701 
702  private function handlePage() {
703  // Handle page data.
704  $this->debug( "Enter page handler." );
705  $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
706 
707  // Fields that can just be stuffed in the pageInfo object
708  $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
709 
710  $skip = false;
711  $badTitle = false;
712 
713  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
714  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
715  $this->reader->localName == 'page' ) {
716  break;
717  }
718 
719  $skip = false;
720 
721  $tag = $this->reader->localName;
722 
723  if ( $badTitle ) {
724  // The title is invalid, bail out of this page
725  $skip = true;
726  } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
727  &$pageInfo ] ) ) {
728  // Do nothing
729  } elseif ( in_array( $tag, $normalFields ) ) {
730  // An XML snippet:
731  // <page>
732  // <id>123</id>
733  // <title>Page</title>
734  // <redirect title="NewTitle"/>
735  // ...
736  // Because the redirect tag is built differently, we need special handling for that case.
737  if ( $tag == 'redirect' ) {
738  $pageInfo[$tag] = $this->nodeAttribute( 'title' );
739  } else {
740  $pageInfo[$tag] = $this->nodeContents();
741  }
742  } elseif ( $tag == 'revision' || $tag == 'upload' ) {
743  if ( !isset( $title ) ) {
744  $title = $this->processTitle( $pageInfo['title'],
745  isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
746 
747  // $title is either an array of two titles or false.
748  if ( is_array( $title ) ) {
749  $this->pageCallback( $title );
750  list( $pageInfo['_title'], $foreignTitle ) = $title;
751  } else {
752  $badTitle = true;
753  $skip = true;
754  }
755  }
756 
757  if ( $title ) {
758  if ( $tag == 'revision' ) {
759  $this->handleRevision( $pageInfo );
760  } else {
761  $this->handleUpload( $pageInfo );
762  }
763  }
764  } elseif ( $tag != '#text' ) {
765  $this->warn( "Unhandled page XML tag $tag" );
766  $skip = true;
767  }
768  }
769 
770  // @note $pageInfo is only set if a valid $title is processed above with
771  // no error. If we have a valid $title, then pageCallback is called
772  // above, $pageInfo['title'] is set and we do pageOutCallback here.
773  // If $pageInfo['_title'] is not set, then $foreignTitle is also not
774  // set since they both come from $title above.
775  if ( array_key_exists( '_title', $pageInfo ) ) {
776  $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
777  $pageInfo['revisionCount'],
778  $pageInfo['successfulRevisionCount'],
779  $pageInfo );
780  }
781  }
782 
786  private function handleRevision( &$pageInfo ) {
787  $this->debug( "Enter revision handler" );
788  $revisionInfo = [];
789 
790  $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ];
791 
792  $skip = false;
793 
794  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
795  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
796  $this->reader->localName == 'revision' ) {
797  break;
798  }
799 
800  $tag = $this->reader->localName;
801 
802  if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
803  $this, $pageInfo, $revisionInfo
804  ] ) ) {
805  // Do nothing
806  } elseif ( in_array( $tag, $normalFields ) ) {
807  $revisionInfo[$tag] = $this->nodeContents();
808  } elseif ( $tag == 'contributor' ) {
809  $revisionInfo['contributor'] = $this->handleContributor();
810  } elseif ( $tag != '#text' ) {
811  $this->warn( "Unhandled revision XML tag $tag" );
812  $skip = true;
813  }
814  }
815 
816  $pageInfo['revisionCount']++;
817  if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
818  $pageInfo['successfulRevisionCount']++;
819  }
820  }
821 
827  private function processRevision( $pageInfo, $revisionInfo ) {
829 
830  // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
831  // database errors and instability. Testing for revisions with only listed
832  // content models, as other content models might use serialization formats
833  // which aren't checked against $wgMaxArticleSize.
834  if ( ( !isset( $revisionInfo['model'] ) ||
835  in_array( $revisionInfo['model'], [
836  'wikitext',
837  'css',
838  'json',
839  'javascript',
840  'text',
841  ''
842  ] ) ) &&
843  (int)( strlen( $revisionInfo['text'] ) / 1024 ) > $wgMaxArticleSize
844  ) {
845  throw new MWException( 'The text of ' .
846  ( isset( $revisionInfo['id'] ) ?
847  "the revision with ID $revisionInfo[id]" :
848  'a revision'
849  ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
850  }
851 
852  $revision = new WikiRevision( $this->config );
853 
854  if ( isset( $revisionInfo['id'] ) ) {
855  $revision->setID( $revisionInfo['id'] );
856  }
857  if ( isset( $revisionInfo['model'] ) ) {
858  $revision->setModel( $revisionInfo['model'] );
859  }
860  if ( isset( $revisionInfo['format'] ) ) {
861  $revision->setFormat( $revisionInfo['format'] );
862  }
863  $revision->setTitle( $pageInfo['_title'] );
864 
865  if ( isset( $revisionInfo['text'] ) ) {
866  $handler = $revision->getContentHandler();
867  $text = $handler->importTransform(
868  $revisionInfo['text'],
869  $revision->getFormat() );
870 
871  $revision->setText( $text );
872  }
873  if ( isset( $revisionInfo['timestamp'] ) ) {
874  $revision->setTimestamp( $revisionInfo['timestamp'] );
875  } else {
876  $revision->setTimestamp( wfTimestampNow() );
877  }
878 
879  if ( isset( $revisionInfo['comment'] ) ) {
880  $revision->setComment( $revisionInfo['comment'] );
881  }
882 
883  if ( isset( $revisionInfo['minor'] ) ) {
884  $revision->setMinor( true );
885  }
886  if ( isset( $revisionInfo['contributor']['ip'] ) ) {
887  $revision->setUserIP( $revisionInfo['contributor']['ip'] );
888  } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
889  $revision->setUsername( $revisionInfo['contributor']['username'] );
890  } else {
891  $revision->setUsername( 'Unknown user' );
892  }
893  $revision->setNoUpdates( $this->mNoUpdates );
894 
895  return $this->revisionCallback( $revision );
896  }
897 
902  private function handleUpload( &$pageInfo ) {
903  $this->debug( "Enter upload handler" );
904  $uploadInfo = [];
905 
906  $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
907  'src', 'size', 'sha1base36', 'archivename', 'rel' ];
908 
909  $skip = false;
910 
911  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
912  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
913  $this->reader->localName == 'upload' ) {
914  break;
915  }
916 
917  $tag = $this->reader->localName;
918 
919  if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
920  $this, $pageInfo
921  ] ) ) {
922  // Do nothing
923  } elseif ( in_array( $tag, $normalFields ) ) {
924  $uploadInfo[$tag] = $this->nodeContents();
925  } elseif ( $tag == 'contributor' ) {
926  $uploadInfo['contributor'] = $this->handleContributor();
927  } elseif ( $tag == 'contents' ) {
928  $contents = $this->nodeContents();
929  $encoding = $this->reader->getAttribute( 'encoding' );
930  if ( $encoding === 'base64' ) {
931  $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
932  $uploadInfo['isTempSrc'] = true;
933  }
934  } elseif ( $tag != '#text' ) {
935  $this->warn( "Unhandled upload XML tag $tag" );
936  $skip = true;
937  }
938  }
939 
940  if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
941  $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
942  if ( file_exists( $path ) ) {
943  $uploadInfo['fileSrc'] = $path;
944  $uploadInfo['isTempSrc'] = false;
945  }
946  }
947 
948  if ( $this->mImportUploads ) {
949  return $this->processUpload( $pageInfo, $uploadInfo );
950  }
951  }
952 
957  private function dumpTemp( $contents ) {
958  $filename = tempnam( wfTempDir(), 'importupload' );
959  file_put_contents( $filename, $contents );
960  return $filename;
961  }
962 
968  private function processUpload( $pageInfo, $uploadInfo ) {
969  $revision = new WikiRevision( $this->config );
970  $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
971 
972  $revision->setTitle( $pageInfo['_title'] );
973  $revision->setID( $pageInfo['id'] );
974  $revision->setTimestamp( $uploadInfo['timestamp'] );
975  $revision->setText( $text );
976  $revision->setFilename( $uploadInfo['filename'] );
977  if ( isset( $uploadInfo['archivename'] ) ) {
978  $revision->setArchiveName( $uploadInfo['archivename'] );
979  }
980  $revision->setSrc( $uploadInfo['src'] );
981  if ( isset( $uploadInfo['fileSrc'] ) ) {
982  $revision->setFileSrc( $uploadInfo['fileSrc'],
983  !empty( $uploadInfo['isTempSrc'] ) );
984  }
985  if ( isset( $uploadInfo['sha1base36'] ) ) {
986  $revision->setSha1Base36( $uploadInfo['sha1base36'] );
987  }
988  $revision->setSize( intval( $uploadInfo['size'] ) );
989  $revision->setComment( $uploadInfo['comment'] );
990 
991  if ( isset( $uploadInfo['contributor']['ip'] ) ) {
992  $revision->setUserIP( $uploadInfo['contributor']['ip'] );
993  }
994  if ( isset( $uploadInfo['contributor']['username'] ) ) {
995  $revision->setUsername( $uploadInfo['contributor']['username'] );
996  }
997  $revision->setNoUpdates( $this->mNoUpdates );
998 
999  return call_user_func( $this->mUploadCallback, $revision );
1000  }
1001 
1005  private function handleContributor() {
1006  $fields = [ 'id', 'ip', 'username' ];
1007  $info = [];
1008 
1009  if ( $this->reader->isEmptyElement ) {
1010  return $info;
1011  }
1012  while ( $this->reader->read() ) {
1013  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1014  $this->reader->localName == 'contributor' ) {
1015  break;
1016  }
1017 
1018  $tag = $this->reader->localName;
1019 
1020  if ( in_array( $tag, $fields ) ) {
1021  $info[$tag] = $this->nodeContents();
1022  }
1023  }
1024 
1025  return $info;
1026  }
1027 
1033  private function processTitle( $text, $ns = null ) {
1034  if ( is_null( $this->foreignNamespaces ) ) {
1035  $foreignTitleFactory = new NaiveForeignTitleFactory();
1036  } else {
1037  $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1038  $this->foreignNamespaces );
1039  }
1040 
1041  $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1042  intval( $ns ) );
1043 
1044  $title = $this->importTitleFactory->createTitleFromForeignTitle(
1045  $foreignTitle );
1046 
1047  $commandLineMode = $this->config->get( 'CommandLineMode' );
1048  if ( is_null( $title ) ) {
1049  # Invalid page title? Ignore the page
1050  $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1051  return false;
1052  } elseif ( $title->isExternal() ) {
1053  $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1054  return false;
1055  } elseif ( !$title->canExist() ) {
1056  $this->notice( 'import-error-special', $title->getPrefixedText() );
1057  return false;
1058  } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1059  # Do not import if the importing wiki user cannot edit this page
1060  $this->notice( 'import-error-edit', $title->getPrefixedText() );
1061  return false;
1062  } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1063  # Do not import if the importing wiki user cannot create this page
1064  $this->notice( 'import-error-create', $title->getPrefixedText() );
1065  return false;
1066  }
1067 
1068  return [ $title, $foreignTitle ];
1069  }
1070 }
setTargetRootPage($rootpage)
Set a target root page under which all pages are imported.
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:99
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
$wgMaxArticleSize
Maximum article size in kilobytes.
processRevision($pageInfo, $revisionInfo)
setImageBasePath($dir)
if(count($args)==0) $dir
const NS_MAIN
Definition: Defines.php:69
pageOutCallback($title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "" is closed.
XML file reader for the page data importer.
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
setSiteInfoCallback($callback)
Sets the action to perform when site info is encountered.
$source
finishImportPage($title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
static exists($index)
Returns whether the specified namespace exists.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
setNoticeCallback($callback)
Set a callback that displays notice messages.
importLogItem($revision)
Default per-revision callback, performs the import.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
array $countableCache
setRevisionCallback($callback)
Sets the action to perform as each page revision is reached.
setTargetNamespace($namespace)
Set a target namespace to override the defaults.
setPageOutCallback($callback)
Sets the action to perform as each page in the stream is completed.
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
setImportTitleFactory($factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
if($line===false) $args
Definition: cdb.php:64
$factory
dumpTemp($contents)
handleRevision(&$pageInfo)
wfTempDir()
Tries to get the system directory for temporary files.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element...
throwXmlError($err)
logItemCallback($revision)
Notify the callback function of a new log item.
doImport()
Primary entry point.
static factory(array $deltas)
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
siteInfoCallback($siteInfo)
Notify the callback function of site info.
nodeAttribute($attr)
Retrieves the contents of the named attribute of the current element.
$params
Represents a revision, log entry or upload during the import process.
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$buffer
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
static hasSubpages($index)
Does the namespace allow subpages?
static addUpdate(DeferrableUpdate $update, $type=self::POSTSEND)
Add an update to the deferred list.
Config $config
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:965
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
revisionCallback($revision)
Notify the callback function of a revision.
setDebug($debug)
Set debug mode...
setImportUploads($import)
setPageCallback($callback)
Sets the action to perform as each new page in the stream is reached.
__construct(ImportSource $source, Config $config=null)
Creates an ImportXMLReader drawing from the source provided.
setNoUpdates($noupdates)
Set 'no updates' mode.
processTitle($text, $ns=null)
static getDefaultInstance()
static registerSource(ImportSource $source)
setUploadCallback($callback)
Sets the action to perform as each file upload version is reached.
setLogItemCallback($callback)
Sets the action to perform as each log item reached.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
processUpload($pageInfo, $uploadInfo)
Source interface for XML import.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1004
ImportTitleFactory $importTitleFactory
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1004
Reporting callback.
const DB_MASTER
Definition: Defines.php:47
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:762
processLogItem($logInfo)
$debug
Definition: mcc.php:31
beforeImportPage($titleAndForeignTitle)
Default per-page callback.
pageCallback($title)
Notify the callback function when a new "" is reached.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2338
importUpload($revision)
Dummy for now...
static newGood($value=null)
Factory function for good results.
Definition: Status.php:101
Exception representing a failure to serialize or unserialize a content object.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition: hooks.txt:2338
importRevision($revision)
Default per-revision callback, performs the import.
handleUpload(&$pageInfo)