MediaWiki  REL1_28
WikiImporter.php
Go to the documentation of this file.
1 <?php
33 class WikiImporter {
34  private $reader = null;
35  private $foreignNamespaces = null;
40  private $mNoUpdates = false;
42  private $config;
46  private $countableCache = [];
47 
55  if ( !class_exists( 'XMLReader' ) ) {
56  throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
57  }
58 
59  $this->reader = new XMLReader();
60  if ( !$config ) {
61  wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
62  $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
63  }
64  $this->config = $config;
65 
66  if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
67  stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
68  }
70 
71  // Enable the entity loader, as it is needed for loading external URLs via
72  // XMLReader::open (T86036)
73  $oldDisable = libxml_disable_entity_loader( false );
74  if ( defined( 'LIBXML_PARSEHUGE' ) ) {
75  $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
76  } else {
77  $status = $this->reader->open( "uploadsource://$id" );
78  }
79  if ( !$status ) {
80  $error = libxml_get_last_error();
81  libxml_disable_entity_loader( $oldDisable );
82  throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
83  $error->message );
84  }
85  libxml_disable_entity_loader( $oldDisable );
86 
87  // Default callbacks
88  $this->setPageCallback( [ $this, 'beforeImportPage' ] );
89  $this->setRevisionCallback( [ $this, "importRevision" ] );
90  $this->setUploadCallback( [ $this, 'importUpload' ] );
91  $this->setLogItemCallback( [ $this, 'importLogItem' ] );
92  $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
93 
94  $this->importTitleFactory = new NaiveImportTitleFactory();
95  }
96 
100  public function getReader() {
101  return $this->reader;
102  }
103 
104  public function throwXmlError( $err ) {
105  $this->debug( "FAILURE: $err" );
106  wfDebug( "WikiImporter XML error: $err\n" );
107  }
108 
109  public function debug( $data ) {
110  if ( $this->mDebug ) {
111  wfDebug( "IMPORT: $data\n" );
112  }
113  }
114 
115  public function warn( $data ) {
116  wfDebug( "IMPORT: $data\n" );
117  }
118 
119  public function notice( $msg /*, $param, ...*/ ) {
120  $params = func_get_args();
121  array_shift( $params );
122 
123  if ( is_callable( $this->mNoticeCallback ) ) {
124  call_user_func( $this->mNoticeCallback, $msg, $params );
125  } else { # No ImportReporter -> CLI
126  echo wfMessage( $msg, $params )->text() . "\n";
127  }
128  }
129 
134  function setDebug( $debug ) {
135  $this->mDebug = $debug;
136  }
137 
142  function setNoUpdates( $noupdates ) {
143  $this->mNoUpdates = $noupdates;
144  }
145 
152  public function setNoticeCallback( $callback ) {
153  return wfSetVar( $this->mNoticeCallback, $callback );
154  }
155 
161  public function setPageCallback( $callback ) {
162  $previous = $this->mPageCallback;
163  $this->mPageCallback = $callback;
164  return $previous;
165  }
166 
176  public function setPageOutCallback( $callback ) {
177  $previous = $this->mPageOutCallback;
178  $this->mPageOutCallback = $callback;
179  return $previous;
180  }
181 
187  public function setRevisionCallback( $callback ) {
188  $previous = $this->mRevisionCallback;
189  $this->mRevisionCallback = $callback;
190  return $previous;
191  }
192 
198  public function setUploadCallback( $callback ) {
199  $previous = $this->mUploadCallback;
200  $this->mUploadCallback = $callback;
201  return $previous;
202  }
203 
209  public function setLogItemCallback( $callback ) {
210  $previous = $this->mLogItemCallback;
211  $this->mLogItemCallback = $callback;
212  return $previous;
213  }
214 
220  public function setSiteInfoCallback( $callback ) {
221  $previous = $this->mSiteInfoCallback;
222  $this->mSiteInfoCallback = $callback;
223  return $previous;
224  }
225 
231  public function setImportTitleFactory( $factory ) {
232  $this->importTitleFactory = $factory;
233  }
234 
240  public function setTargetNamespace( $namespace ) {
241  if ( is_null( $namespace ) ) {
242  // Don't override namespaces
244  return true;
245  } elseif (
246  $namespace >= 0 &&
247  MWNamespace::exists( intval( $namespace ) )
248  ) {
249  $namespace = intval( $namespace );
250  $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
251  return true;
252  } else {
253  return false;
254  }
255  }
256 
262  public function setTargetRootPage( $rootpage ) {
264  if ( is_null( $rootpage ) ) {
265  // No rootpage
267  } elseif ( $rootpage !== '' ) {
268  $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
269  $title = Title::newFromText( $rootpage );
270 
271  if ( !$title || $title->isExternal() ) {
272  $status->fatal( 'import-rootpage-invalid' );
273  } else {
274  if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
276 
277  $displayNSText = $title->getNamespace() == NS_MAIN
278  ? wfMessage( 'blanknamespace' )->text()
279  : $wgContLang->getNsText( $title->getNamespace() );
280  $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
281  } else {
282  // set namespace to 'all', so the namespace check in processTitle() can pass
283  $this->setTargetNamespace( null );
285  }
286  }
287  }
288  return $status;
289  }
290 
294  public function setImageBasePath( $dir ) {
295  $this->mImageBasePath = $dir;
296  }
297 
301  public function setImportUploads( $import ) {
302  $this->mImportUploads = $import;
303  }
304 
311  public function beforeImportPage( $titleAndForeignTitle ) {
312  $title = $titleAndForeignTitle[0];
314  $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
315  return true;
316  }
317 
323  public function importRevision( $revision ) {
324  if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
325  $this->notice( 'import-error-bad-location',
326  $revision->getTitle()->getPrefixedText(),
327  $revision->getID(),
328  $revision->getModel(),
329  $revision->getFormat() );
330 
331  return false;
332  }
333 
334  try {
335  return $revision->importOldRevision();
336  } catch ( MWContentSerializationException $ex ) {
337  $this->notice( 'import-error-unserialize',
338  $revision->getTitle()->getPrefixedText(),
339  $revision->getID(),
340  $revision->getModel(),
341  $revision->getFormat() );
342  }
343 
344  return false;
345  }
346 
352  public function importLogItem( $revision ) {
353  return $revision->importLogItem();
354  }
355 
361  public function importUpload( $revision ) {
362  return $revision->importUpload();
363  }
364 
374  public function finishImportPage( $title, $foreignTitle, $revCount,
375  $sRevCount, $pageInfo ) {
376 
377  // Update article count statistics (T42009)
378  // The normal counting logic in WikiPage->doEditUpdates() is designed for
379  // one-revision-at-a-time editing, not bulk imports. In this situation it
380  // suffers from issues of replica DB lag. We let WikiPage handle the total page
381  // and revision count, and we implement our own custom logic for the
382  // article (content page) count.
384  $page->loadPageData( 'fromdbmaster' );
385  $content = $page->getContent();
386  if ( $content === null ) {
387  wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
388  ' because WikiPage::getContent() returned null' );
389  } else {
390  $editInfo = $page->prepareContentForEdit( $content );
391  $countKey = 'title_' . $title->getPrefixedText();
392  $countable = $page->isCountable( $editInfo );
393  if ( array_key_exists( $countKey, $this->countableCache ) &&
394  $countable != $this->countableCache[$countKey] ) {
396  'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
397  ] ) );
398  }
399  }
400 
401  $args = func_get_args();
402  return Hooks::run( 'AfterImportPage', $args );
403  }
404 
409  public function debugRevisionHandler( &$revision ) {
410  $this->debug( "Got revision:" );
411  if ( is_object( $revision->title ) ) {
412  $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
413  } else {
414  $this->debug( "-- Title: <invalid>" );
415  }
416  $this->debug( "-- User: " . $revision->user_text );
417  $this->debug( "-- Timestamp: " . $revision->timestamp );
418  $this->debug( "-- Comment: " . $revision->comment );
419  $this->debug( "-- Text: " . $revision->text );
420  }
421 
427  private function siteInfoCallback( $siteInfo ) {
428  if ( isset( $this->mSiteInfoCallback ) ) {
429  return call_user_func_array( $this->mSiteInfoCallback,
430  [ $siteInfo, $this ] );
431  } else {
432  return false;
433  }
434  }
435 
440  function pageCallback( $title ) {
441  if ( isset( $this->mPageCallback ) ) {
442  call_user_func( $this->mPageCallback, $title );
443  }
444  }
445 
454  private function pageOutCallback( $title, $foreignTitle, $revCount,
455  $sucCount, $pageInfo ) {
456  if ( isset( $this->mPageOutCallback ) ) {
457  $args = func_get_args();
458  call_user_func_array( $this->mPageOutCallback, $args );
459  }
460  }
461 
467  private function revisionCallback( $revision ) {
468  if ( isset( $this->mRevisionCallback ) ) {
469  return call_user_func_array( $this->mRevisionCallback,
470  [ $revision, $this ] );
471  } else {
472  return false;
473  }
474  }
475 
481  private function logItemCallback( $revision ) {
482  if ( isset( $this->mLogItemCallback ) ) {
483  return call_user_func_array( $this->mLogItemCallback,
484  [ $revision, $this ] );
485  } else {
486  return false;
487  }
488  }
489 
496  public function nodeAttribute( $attr ) {
497  return $this->reader->getAttribute( $attr );
498  }
499 
507  public function nodeContents() {
508  if ( $this->reader->isEmptyElement ) {
509  return "";
510  }
511  $buffer = "";
512  while ( $this->reader->read() ) {
513  switch ( $this->reader->nodeType ) {
514  case XMLReader::TEXT:
515  case XMLReader::CDATA:
516  case XMLReader::SIGNIFICANT_WHITESPACE:
517  $buffer .= $this->reader->value;
518  break;
519  case XMLReader::END_ELEMENT:
520  return $buffer;
521  }
522  }
523 
524  $this->reader->close();
525  return '';
526  }
527 
533  public function doImport() {
534  // Calls to reader->read need to be wrapped in calls to
535  // libxml_disable_entity_loader() to avoid local file
536  // inclusion attacks (bug 46932).
537  $oldDisable = libxml_disable_entity_loader( true );
538  $this->reader->read();
539 
540  if ( $this->reader->localName != 'mediawiki' ) {
541  libxml_disable_entity_loader( $oldDisable );
542  throw new MWException( "Expected <mediawiki> tag, got " .
543  $this->reader->localName );
544  }
545  $this->debug( "<mediawiki> tag is correct." );
546 
547  $this->debug( "Starting primary dump processing loop." );
548 
549  $keepReading = $this->reader->read();
550  $skip = false;
551  $rethrow = null;
552  try {
553  while ( $keepReading ) {
554  $tag = $this->reader->localName;
555  $type = $this->reader->nodeType;
556 
557  if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
558  // Do nothing
559  } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
560  break;
561  } elseif ( $tag == 'siteinfo' ) {
562  $this->handleSiteInfo();
563  } elseif ( $tag == 'page' ) {
564  $this->handlePage();
565  } elseif ( $tag == 'logitem' ) {
566  $this->handleLogItem();
567  } elseif ( $tag != '#text' ) {
568  $this->warn( "Unhandled top-level XML tag $tag" );
569 
570  $skip = true;
571  }
572 
573  if ( $skip ) {
574  $keepReading = $this->reader->next();
575  $skip = false;
576  $this->debug( "Skip" );
577  } else {
578  $keepReading = $this->reader->read();
579  }
580  }
581  } catch ( Exception $ex ) {
582  $rethrow = $ex;
583  }
584 
585  // finally
586  libxml_disable_entity_loader( $oldDisable );
587  $this->reader->close();
588 
589  if ( $rethrow ) {
590  throw $rethrow;
591  }
592 
593  return true;
594  }
595 
596  private function handleSiteInfo() {
597  $this->debug( "Enter site info handler." );
598  $siteInfo = [];
599 
600  // Fields that can just be stuffed in the siteInfo object
601  $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
602 
603  while ( $this->reader->read() ) {
604  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
605  $this->reader->localName == 'siteinfo' ) {
606  break;
607  }
608 
609  $tag = $this->reader->localName;
610 
611  if ( $tag == 'namespace' ) {
612  $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
613  $this->nodeContents();
614  } elseif ( in_array( $tag, $normalFields ) ) {
615  $siteInfo[$tag] = $this->nodeContents();
616  }
617  }
618 
619  $siteInfo['_namespaces'] = $this->foreignNamespaces;
620  $this->siteInfoCallback( $siteInfo );
621  }
622 
623  private function handleLogItem() {
624  $this->debug( "Enter log item handler." );
625  $logInfo = [];
626 
627  // Fields that can just be stuffed in the pageInfo object
628  $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
629  'logtitle', 'params' ];
630 
631  while ( $this->reader->read() ) {
632  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
633  $this->reader->localName == 'logitem' ) {
634  break;
635  }
636 
637  $tag = $this->reader->localName;
638 
639  if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
640  $this, $logInfo
641  ] ) ) {
642  // Do nothing
643  } elseif ( in_array( $tag, $normalFields ) ) {
644  $logInfo[$tag] = $this->nodeContents();
645  } elseif ( $tag == 'contributor' ) {
646  $logInfo['contributor'] = $this->handleContributor();
647  } elseif ( $tag != '#text' ) {
648  $this->warn( "Unhandled log-item XML tag $tag" );
649  }
650  }
651 
652  $this->processLogItem( $logInfo );
653  }
654 
659  private function processLogItem( $logInfo ) {
660 
661  $revision = new WikiRevision( $this->config );
662 
663  if ( isset( $logInfo['id'] ) ) {
664  $revision->setID( $logInfo['id'] );
665  }
666  $revision->setType( $logInfo['type'] );
667  $revision->setAction( $logInfo['action'] );
668  if ( isset( $logInfo['timestamp'] ) ) {
669  $revision->setTimestamp( $logInfo['timestamp'] );
670  }
671  if ( isset( $logInfo['params'] ) ) {
672  $revision->setParams( $logInfo['params'] );
673  }
674  if ( isset( $logInfo['logtitle'] ) ) {
675  // @todo Using Title for non-local titles is a recipe for disaster.
676  // We should use ForeignTitle here instead.
677  $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
678  }
679 
680  $revision->setNoUpdates( $this->mNoUpdates );
681 
682  if ( isset( $logInfo['comment'] ) ) {
683  $revision->setComment( $logInfo['comment'] );
684  }
685 
686  if ( isset( $logInfo['contributor']['ip'] ) ) {
687  $revision->setUserIP( $logInfo['contributor']['ip'] );
688  }
689 
690  if ( !isset( $logInfo['contributor']['username'] ) ) {
691  $revision->setUsername( 'Unknown user' );
692  } else {
693  $revision->setUsername( $logInfo['contributor']['username'] );
694  }
695 
696  return $this->logItemCallback( $revision );
697  }
698 
699  private function handlePage() {
700  // Handle page data.
701  $this->debug( "Enter page handler." );
702  $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
703 
704  // Fields that can just be stuffed in the pageInfo object
705  $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
706 
707  $skip = false;
708  $badTitle = false;
709 
710  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
711  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
712  $this->reader->localName == 'page' ) {
713  break;
714  }
715 
716  $skip = false;
717 
718  $tag = $this->reader->localName;
719 
720  if ( $badTitle ) {
721  // The title is invalid, bail out of this page
722  $skip = true;
723  } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
724  &$pageInfo ] ) ) {
725  // Do nothing
726  } elseif ( in_array( $tag, $normalFields ) ) {
727  // An XML snippet:
728  // <page>
729  // <id>123</id>
730  // <title>Page</title>
731  // <redirect title="NewTitle"/>
732  // ...
733  // Because the redirect tag is built differently, we need special handling for that case.
734  if ( $tag == 'redirect' ) {
735  $pageInfo[$tag] = $this->nodeAttribute( 'title' );
736  } else {
737  $pageInfo[$tag] = $this->nodeContents();
738  }
739  } elseif ( $tag == 'revision' || $tag == 'upload' ) {
740  if ( !isset( $title ) ) {
741  $title = $this->processTitle( $pageInfo['title'],
742  isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
743 
744  // $title is either an array of two titles or false.
745  if ( is_array( $title ) ) {
746  $this->pageCallback( $title );
747  list( $pageInfo['_title'], $foreignTitle ) = $title;
748  } else {
749  $badTitle = true;
750  $skip = true;
751  }
752  }
753 
754  if ( $title ) {
755  if ( $tag == 'revision' ) {
756  $this->handleRevision( $pageInfo );
757  } else {
758  $this->handleUpload( $pageInfo );
759  }
760  }
761  } elseif ( $tag != '#text' ) {
762  $this->warn( "Unhandled page XML tag $tag" );
763  $skip = true;
764  }
765  }
766 
767  // @note $pageInfo is only set if a valid $title is processed above with
768  // no error. If we have a valid $title, then pageCallback is called
769  // above, $pageInfo['title'] is set and we do pageOutCallback here.
770  // If $pageInfo['_title'] is not set, then $foreignTitle is also not
771  // set since they both come from $title above.
772  if ( array_key_exists( '_title', $pageInfo ) ) {
773  $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
774  $pageInfo['revisionCount'],
775  $pageInfo['successfulRevisionCount'],
776  $pageInfo );
777  }
778  }
779 
783  private function handleRevision( &$pageInfo ) {
784  $this->debug( "Enter revision handler" );
785  $revisionInfo = [];
786 
787  $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ];
788 
789  $skip = false;
790 
791  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
792  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
793  $this->reader->localName == 'revision' ) {
794  break;
795  }
796 
797  $tag = $this->reader->localName;
798 
799  if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
800  $this, $pageInfo, $revisionInfo
801  ] ) ) {
802  // Do nothing
803  } elseif ( in_array( $tag, $normalFields ) ) {
804  $revisionInfo[$tag] = $this->nodeContents();
805  } elseif ( $tag == 'contributor' ) {
806  $revisionInfo['contributor'] = $this->handleContributor();
807  } elseif ( $tag != '#text' ) {
808  $this->warn( "Unhandled revision XML tag $tag" );
809  $skip = true;
810  }
811  }
812 
813  $pageInfo['revisionCount']++;
814  if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
815  $pageInfo['successfulRevisionCount']++;
816  }
817  }
818 
824  private function processRevision( $pageInfo, $revisionInfo ) {
826 
827  // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
828  // database errors and instability. Testing for revisions with only listed
829  // content models, as other content models might use serialization formats
830  // which aren't checked against $wgMaxArticleSize.
831  if ( ( !isset( $revisionInfo['model'] ) ||
832  in_array( $revisionInfo['model'], [
833  'wikitext',
834  'css',
835  'json',
836  'javascript',
837  'text',
838  ''
839  ] ) ) &&
840  strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
841  ) {
842  throw new MWException( 'The text of ' .
843  ( isset( $revisionInfo['id'] ) ?
844  "the revision with ID $revisionInfo[id]" :
845  'a revision'
846  ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
847  }
848 
849  $revision = new WikiRevision( $this->config );
850 
851  if ( isset( $revisionInfo['id'] ) ) {
852  $revision->setID( $revisionInfo['id'] );
853  }
854  if ( isset( $revisionInfo['model'] ) ) {
855  $revision->setModel( $revisionInfo['model'] );
856  }
857  if ( isset( $revisionInfo['format'] ) ) {
858  $revision->setFormat( $revisionInfo['format'] );
859  }
860  $revision->setTitle( $pageInfo['_title'] );
861 
862  if ( isset( $revisionInfo['text'] ) ) {
863  $handler = $revision->getContentHandler();
864  $text = $handler->importTransform(
865  $revisionInfo['text'],
866  $revision->getFormat() );
867 
868  $revision->setText( $text );
869  }
870  if ( isset( $revisionInfo['timestamp'] ) ) {
871  $revision->setTimestamp( $revisionInfo['timestamp'] );
872  } else {
873  $revision->setTimestamp( wfTimestampNow() );
874  }
875 
876  if ( isset( $revisionInfo['comment'] ) ) {
877  $revision->setComment( $revisionInfo['comment'] );
878  }
879 
880  if ( isset( $revisionInfo['minor'] ) ) {
881  $revision->setMinor( true );
882  }
883  if ( isset( $revisionInfo['contributor']['ip'] ) ) {
884  $revision->setUserIP( $revisionInfo['contributor']['ip'] );
885  } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
886  $revision->setUsername( $revisionInfo['contributor']['username'] );
887  } else {
888  $revision->setUsername( 'Unknown user' );
889  }
890  $revision->setNoUpdates( $this->mNoUpdates );
891 
892  return $this->revisionCallback( $revision );
893  }
894 
899  private function handleUpload( &$pageInfo ) {
900  $this->debug( "Enter upload handler" );
901  $uploadInfo = [];
902 
903  $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
904  'src', 'size', 'sha1base36', 'archivename', 'rel' ];
905 
906  $skip = false;
907 
908  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
909  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
910  $this->reader->localName == 'upload' ) {
911  break;
912  }
913 
914  $tag = $this->reader->localName;
915 
916  if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
917  $this, $pageInfo
918  ] ) ) {
919  // Do nothing
920  } elseif ( in_array( $tag, $normalFields ) ) {
921  $uploadInfo[$tag] = $this->nodeContents();
922  } elseif ( $tag == 'contributor' ) {
923  $uploadInfo['contributor'] = $this->handleContributor();
924  } elseif ( $tag == 'contents' ) {
925  $contents = $this->nodeContents();
926  $encoding = $this->reader->getAttribute( 'encoding' );
927  if ( $encoding === 'base64' ) {
928  $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
929  $uploadInfo['isTempSrc'] = true;
930  }
931  } elseif ( $tag != '#text' ) {
932  $this->warn( "Unhandled upload XML tag $tag" );
933  $skip = true;
934  }
935  }
936 
937  if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
938  $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
939  if ( file_exists( $path ) ) {
940  $uploadInfo['fileSrc'] = $path;
941  $uploadInfo['isTempSrc'] = false;
942  }
943  }
944 
945  if ( $this->mImportUploads ) {
946  return $this->processUpload( $pageInfo, $uploadInfo );
947  }
948  }
949 
954  private function dumpTemp( $contents ) {
955  $filename = tempnam( wfTempDir(), 'importupload' );
956  file_put_contents( $filename, $contents );
957  return $filename;
958  }
959 
965  private function processUpload( $pageInfo, $uploadInfo ) {
966  $revision = new WikiRevision( $this->config );
967  $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
968 
969  $revision->setTitle( $pageInfo['_title'] );
970  $revision->setID( $pageInfo['id'] );
971  $revision->setTimestamp( $uploadInfo['timestamp'] );
972  $revision->setText( $text );
973  $revision->setFilename( $uploadInfo['filename'] );
974  if ( isset( $uploadInfo['archivename'] ) ) {
975  $revision->setArchiveName( $uploadInfo['archivename'] );
976  }
977  $revision->setSrc( $uploadInfo['src'] );
978  if ( isset( $uploadInfo['fileSrc'] ) ) {
979  $revision->setFileSrc( $uploadInfo['fileSrc'],
980  !empty( $uploadInfo['isTempSrc'] ) );
981  }
982  if ( isset( $uploadInfo['sha1base36'] ) ) {
983  $revision->setSha1Base36( $uploadInfo['sha1base36'] );
984  }
985  $revision->setSize( intval( $uploadInfo['size'] ) );
986  $revision->setComment( $uploadInfo['comment'] );
987 
988  if ( isset( $uploadInfo['contributor']['ip'] ) ) {
989  $revision->setUserIP( $uploadInfo['contributor']['ip'] );
990  }
991  if ( isset( $uploadInfo['contributor']['username'] ) ) {
992  $revision->setUsername( $uploadInfo['contributor']['username'] );
993  }
994  $revision->setNoUpdates( $this->mNoUpdates );
995 
996  return call_user_func( $this->mUploadCallback, $revision );
997  }
998 
1002  private function handleContributor() {
1003  $fields = [ 'id', 'ip', 'username' ];
1004  $info = [];
1005 
1006  if ( $this->reader->isEmptyElement ) {
1007  return $info;
1008  }
1009  while ( $this->reader->read() ) {
1010  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1011  $this->reader->localName == 'contributor' ) {
1012  break;
1013  }
1014 
1015  $tag = $this->reader->localName;
1016 
1017  if ( in_array( $tag, $fields ) ) {
1018  $info[$tag] = $this->nodeContents();
1019  }
1020  }
1021 
1022  return $info;
1023  }
1024 
1030  private function processTitle( $text, $ns = null ) {
1031  if ( is_null( $this->foreignNamespaces ) ) {
1032  $foreignTitleFactory = new NaiveForeignTitleFactory();
1033  } else {
1034  $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1035  $this->foreignNamespaces );
1036  }
1037 
1038  $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1039  intval( $ns ) );
1040 
1041  $title = $this->importTitleFactory->createTitleFromForeignTitle(
1042  $foreignTitle );
1043 
1044  $commandLineMode = $this->config->get( 'CommandLineMode' );
1045  if ( is_null( $title ) ) {
1046  # Invalid page title? Ignore the page
1047  $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1048  return false;
1049  } elseif ( $title->isExternal() ) {
1050  $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1051  return false;
1052  } elseif ( !$title->canExist() ) {
1053  $this->notice( 'import-error-special', $title->getPrefixedText() );
1054  return false;
1055  } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1056  # Do not import if the importing wiki user cannot edit this page
1057  $this->notice( 'import-error-edit', $title->getPrefixedText() );
1058  return false;
1059  } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1060  # Do not import if the importing wiki user cannot create this page
1061  $this->notice( 'import-error-create', $title->getPrefixedText() );
1062  return false;
1063  }
1064 
1065  return [ $title, $foreignTitle ];
1066  }
1067 }
NaiveImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NaiveImportTitleFactory.php:33
WikiImporter\processRevision
processRevision( $pageInfo, $revisionInfo)
Definition: WikiImporter.php:824
WikiImporter\$mUploadCallback
$mUploadCallback
Definition: WikiImporter.php:36
$handler
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:805
WikiImporter
XML file reader for the page data importer.
Definition: WikiImporter.php:33
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:262
$wgMaxArticleSize
$wgMaxArticleSize
Maximum article size in kilobytes.
Definition: DefaultSettings.php:2170
WikiImporter\setImageBasePath
setImageBasePath( $dir)
Definition: WikiImporter.php:294
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
array
the array() calling protocol came about after MediaWiki 1.4rc1.
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1723
UploadSourceAdapter\registerSource
static registerSource(ImportSource $source)
Definition: UploadSourceAdapter.php:48
WikiImporter\$mImportUploads
$mImportUploads
Definition: WikiImporter.php:39
NamespaceAwareForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
Definition: NamespaceAwareForeignTitleFactory.php:26
WikiImporter\$mRevisionCallback
$mRevisionCallback
Definition: WikiImporter.php:36
WikiImporter\revisionCallback
revisionCallback( $revision)
Notify the callback function of a revision.
Definition: WikiImporter.php:467
WikiImporter\setNoticeCallback
setNoticeCallback( $callback)
Set a callback that displays notice messages.
Definition: WikiImporter.php:152
DeferredUpdates\addUpdate
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the deferred list to be run later by execute()
Definition: DeferredUpdates.php:73
NaiveForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
Definition: NaiveForeignTitleFactory.php:26
WikiImporter\$mPageOutCallback
$mPageOutCallback
Definition: WikiImporter.php:37
WikiImporter\setNoUpdates
setNoUpdates( $noupdates)
Set 'no updates' mode.
Definition: WikiImporter.php:142
WikiImporter\getReader
getReader()
Definition: WikiImporter.php:100
WikiImporter\processLogItem
processLogItem( $logInfo)
Definition: WikiImporter.php:659
$params
$params
Definition: styleTest.css.php:40
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:956
WikiImporter\handleRevision
handleRevision(&$pageInfo)
Definition: WikiImporter.php:783
WikiImporter\setRevisionCallback
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
Definition: WikiImporter.php:187
WikiImporter\handleContributor
handleContributor()
Definition: WikiImporter.php:1002
ImportReporter
Reporting callback.
Definition: SpecialImport.php:535
NamespaceImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NamespaceImportTitleFactory.php:27
WikiImporter\nodeContents
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
Definition: WikiImporter.php:507
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
WikiImporter\siteInfoCallback
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
Definition: WikiImporter.php:427
$debug
$debug
Definition: mcc.php:31
NS_MAIN
const NS_MAIN
Definition: Defines.php:56
Config
Interface for configuration instances.
Definition: Config.php:28
MWException
MediaWiki exception.
Definition: MWException.php:26
ImportTitleFactory
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
Definition: ImportTitleFactory.php:26
WikiPage\factory
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:115
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1126
WikiImporter\dumpTemp
dumpTemp( $contents)
Definition: WikiImporter.php:954
$tag
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:1010
WikiImporter\$countableCache
array $countableCache
Definition: WikiImporter.php:46
WikiImporter\pageOutCallback
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
Definition: WikiImporter.php:454
MWContentSerializationException
Exception representing a failure to serialize or unserialize a content object.
Definition: ContentHandler.php:36
ConfigFactory\getDefaultInstance
static getDefaultInstance()
Definition: ConfigFactory.php:51
WikiImporter\throwXmlError
throwXmlError( $err)
Definition: WikiImporter.php:104
MWNamespace\hasSubpages
static hasSubpages( $index)
Does the namespace allow subpages?
Definition: MWNamespace.php:331
SubpageImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: SubpageImportTitleFactory.php:27
$page
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition: hooks.txt:2259
SiteStatsUpdate\factory
static factory(array $deltas)
Definition: SiteStatsUpdate.php:60
WikiImporter\finishImportPage
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
Definition: WikiImporter.php:374
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:2036
WikiImporter\$importTitleFactory
ImportTitleFactory $importTitleFactory
Definition: WikiImporter.php:44
WikiImporter\processUpload
processUpload( $pageInfo, $uploadInfo)
Definition: WikiImporter.php:965
WikiImporter\setImportUploads
setImportUploads( $import)
Definition: WikiImporter.php:301
WikiImporter\$mNoUpdates
$mNoUpdates
Definition: WikiImporter.php:40
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:997
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
$type
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2259
WikiImporter\$mPageCallback
$mPageCallback
Definition: WikiImporter.php:36
WikiImporter\$mSiteInfoCallback
$mSiteInfoCallback
Definition: WikiImporter.php:37
WikiImporter\beforeImportPage
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
Definition: WikiImporter.php:311
StatusValue\newGood
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:76
WikiImporter\importRevision
importRevision( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:323
WikiImporter\doImport
doImport()
Primary entry point.
Definition: WikiImporter.php:533
WikiImporter\processTitle
processTitle( $text, $ns=null)
Definition: WikiImporter.php:1030
WikiImporter\$mImageBasePath
$mImageBasePath
Definition: WikiImporter.php:39
WikiImporter\$foreignNamespaces
$foreignNamespaces
Definition: WikiImporter.php:35
WikiImporter\notice
notice( $msg)
Definition: WikiImporter.php:119
WikiImporter\setDebug
setDebug( $debug)
Set debug mode...
Definition: WikiImporter.php:134
MWNamespace\exists
static exists( $index)
Returns whether the specified namespace exists.
Definition: MWNamespace.php:161
WikiImporter\$reader
$reader
Definition: WikiImporter.php:34
WikiImporter\handleUpload
handleUpload(&$pageInfo)
Definition: WikiImporter.php:899
WikiImporter\setUploadCallback
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Definition: WikiImporter.php:198
WikiImporter\warn
warn( $data)
Definition: WikiImporter.php:115
WikiImporter\handleSiteInfo
handleSiteInfo()
Definition: WikiImporter.php:596
WikiImporter\setTargetRootPage
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
Definition: WikiImporter.php:262
$args
if( $line===false) $args
Definition: cdb.php:64
WikiImporter\$mNoticeCallback
$mNoticeCallback
Definition: WikiImporter.php:38
wfTempDir
wfTempDir()
Tries to get the system directory for temporary files.
Definition: GlobalFunctions.php:2074
WikiRevision
Represents a revision, log entry or upload during the import process.
Definition: WikiRevision.php:33
WikiImporter\debug
debug( $data)
Definition: WikiImporter.php:109
WikiImporter\importLogItem
importLogItem( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:352
$dir
if(count( $args)==0) $dir
Definition: importImages.php:56
WikiImporter\importUpload
importUpload( $revision)
Dummy for now...
Definition: WikiImporter.php:361
WikiImporter\__construct
__construct(ImportSource $source, Config $config=null)
Creates an ImportXMLReader drawing from the source provided.
Definition: WikiImporter.php:54
WikiImporter\handleLogItem
handleLogItem()
Definition: WikiImporter.php:623
ImportSource
Source interface for XML import.
Definition: ImportSource.php:32
$path
$path
Definition: NoLocalSettings.php:26
WikiImporter\$config
Config $config
Definition: WikiImporter.php:42
WikiImporter\setSiteInfoCallback
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
Definition: WikiImporter.php:220
WikiImporter\setPageOutCallback
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
Definition: WikiImporter.php:176
WikiImporter\debugRevisionHandler
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
Definition: WikiImporter.php:409
$source
$source
Definition: mwdoc-filter.php:45
WikiImporter\handlePage
handlePage()
Definition: WikiImporter.php:699
WikiImporter\nodeAttribute
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
Definition: WikiImporter.php:496
WikiImporter\setPageCallback
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
Definition: WikiImporter.php:161
WikiImporter\setImportTitleFactory
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
Definition: WikiImporter.php:231
WikiImporter\setTargetNamespace
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
Definition: WikiImporter.php:240
$content
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1049
WikiImporter\logItemCallback
logItemCallback( $revision)
Notify the callback function of a new log item.
Definition: WikiImporter.php:481
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
WikiImporter\pageCallback
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
Definition: WikiImporter.php:440
WikiImporter\$mLogItemCallback
$mLogItemCallback
Definition: WikiImporter.php:36
$buffer
$buffer
Definition: mwdoc-filter.php:48
$status
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1049
WikiImporter\$mDebug
$mDebug
Definition: WikiImporter.php:38
WikiImporter\setLogItemCallback
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
Definition: WikiImporter.php:209
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56