MediaWiki  1.29.1
WikiImporter.php
Go to the documentation of this file.
1 <?php
27 
34 class WikiImporter {
35  private $reader = null;
36  private $foreignNamespaces = null;
41  private $mNoUpdates = false;
43  private $config;
47  private $countableCache = [];
49  private $disableStatisticsUpdate = false;
50 
57  function __construct( ImportSource $source, Config $config = null ) {
58  if ( !class_exists( 'XMLReader' ) ) {
59  throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
60  }
61 
62  $this->reader = new XMLReader();
63  if ( !$config ) {
64  wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
65  $config = MediaWikiServices::getInstance()->getMainConfig();
66  }
67  $this->config = $config;
68 
69  if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
70  stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
71  }
73 
74  // Enable the entity loader, as it is needed for loading external URLs via
75  // XMLReader::open (T86036)
76  $oldDisable = libxml_disable_entity_loader( false );
77  if ( defined( 'LIBXML_PARSEHUGE' ) ) {
78  $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
79  } else {
80  $status = $this->reader->open( "uploadsource://$id" );
81  }
82  if ( !$status ) {
83  $error = libxml_get_last_error();
84  libxml_disable_entity_loader( $oldDisable );
85  throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
86  $error->message );
87  }
88  libxml_disable_entity_loader( $oldDisable );
89 
90  // Default callbacks
91  $this->setPageCallback( [ $this, 'beforeImportPage' ] );
92  $this->setRevisionCallback( [ $this, "importRevision" ] );
93  $this->setUploadCallback( [ $this, 'importUpload' ] );
94  $this->setLogItemCallback( [ $this, 'importLogItem' ] );
95  $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
96 
97  $this->importTitleFactory = new NaiveImportTitleFactory();
98  }
99 
103  public function getReader() {
104  return $this->reader;
105  }
106 
107  public function throwXmlError( $err ) {
108  $this->debug( "FAILURE: $err" );
109  wfDebug( "WikiImporter XML error: $err\n" );
110  }
111 
112  public function debug( $data ) {
113  if ( $this->mDebug ) {
114  wfDebug( "IMPORT: $data\n" );
115  }
116  }
117 
118  public function warn( $data ) {
119  wfDebug( "IMPORT: $data\n" );
120  }
121 
122  public function notice( $msg /*, $param, ...*/ ) {
123  $params = func_get_args();
124  array_shift( $params );
125 
126  if ( is_callable( $this->mNoticeCallback ) ) {
127  call_user_func( $this->mNoticeCallback, $msg, $params );
128  } else { # No ImportReporter -> CLI
129  echo wfMessage( $msg, $params )->text() . "\n";
130  }
131  }
132 
137  function setDebug( $debug ) {
138  $this->mDebug = $debug;
139  }
140 
145  function setNoUpdates( $noupdates ) {
146  $this->mNoUpdates = $noupdates;
147  }
148 
155  public function setNoticeCallback( $callback ) {
156  return wfSetVar( $this->mNoticeCallback, $callback );
157  }
158 
164  public function setPageCallback( $callback ) {
165  $previous = $this->mPageCallback;
166  $this->mPageCallback = $callback;
167  return $previous;
168  }
169 
179  public function setPageOutCallback( $callback ) {
180  $previous = $this->mPageOutCallback;
181  $this->mPageOutCallback = $callback;
182  return $previous;
183  }
184 
190  public function setRevisionCallback( $callback ) {
191  $previous = $this->mRevisionCallback;
192  $this->mRevisionCallback = $callback;
193  return $previous;
194  }
195 
201  public function setUploadCallback( $callback ) {
202  $previous = $this->mUploadCallback;
203  $this->mUploadCallback = $callback;
204  return $previous;
205  }
206 
212  public function setLogItemCallback( $callback ) {
213  $previous = $this->mLogItemCallback;
214  $this->mLogItemCallback = $callback;
215  return $previous;
216  }
217 
223  public function setSiteInfoCallback( $callback ) {
224  $previous = $this->mSiteInfoCallback;
225  $this->mSiteInfoCallback = $callback;
226  return $previous;
227  }
228 
234  public function setImportTitleFactory( $factory ) {
235  $this->importTitleFactory = $factory;
236  }
237 
243  public function setTargetNamespace( $namespace ) {
244  if ( is_null( $namespace ) ) {
245  // Don't override namespaces
247  return true;
248  } elseif (
249  $namespace >= 0 &&
250  MWNamespace::exists( intval( $namespace ) )
251  ) {
252  $namespace = intval( $namespace );
253  $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
254  return true;
255  } else {
256  return false;
257  }
258  }
259 
265  public function setTargetRootPage( $rootpage ) {
267  if ( is_null( $rootpage ) ) {
268  // No rootpage
270  } elseif ( $rootpage !== '' ) {
271  $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
272  $title = Title::newFromText( $rootpage );
273 
274  if ( !$title || $title->isExternal() ) {
275  $status->fatal( 'import-rootpage-invalid' );
276  } else {
277  if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
279 
280  $displayNSText = $title->getNamespace() == NS_MAIN
281  ? wfMessage( 'blanknamespace' )->text()
282  : $wgContLang->getNsText( $title->getNamespace() );
283  $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
284  } else {
285  // set namespace to 'all', so the namespace check in processTitle() can pass
286  $this->setTargetNamespace( null );
288  }
289  }
290  }
291  return $status;
292  }
293 
297  public function setImageBasePath( $dir ) {
298  $this->mImageBasePath = $dir;
299  }
300 
304  public function setImportUploads( $import ) {
305  $this->mImportUploads = $import;
306  }
307 
312  public function disableStatisticsUpdate() {
313  $this->disableStatisticsUpdate = true;
314  }
315 
322  public function beforeImportPage( $titleAndForeignTitle ) {
323  $title = $titleAndForeignTitle[0];
325  $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
326  return true;
327  }
328 
334  public function importRevision( $revision ) {
335  if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
336  $this->notice( 'import-error-bad-location',
337  $revision->getTitle()->getPrefixedText(),
338  $revision->getID(),
339  $revision->getModel(),
340  $revision->getFormat() );
341 
342  return false;
343  }
344 
345  try {
346  return $revision->importOldRevision();
347  } catch ( MWContentSerializationException $ex ) {
348  $this->notice( 'import-error-unserialize',
349  $revision->getTitle()->getPrefixedText(),
350  $revision->getID(),
351  $revision->getModel(),
352  $revision->getFormat() );
353  }
354 
355  return false;
356  }
357 
363  public function importLogItem( $revision ) {
364  return $revision->importLogItem();
365  }
366 
372  public function importUpload( $revision ) {
373  return $revision->importUpload();
374  }
375 
385  public function finishImportPage( $title, $foreignTitle, $revCount,
386  $sRevCount, $pageInfo ) {
387 
388  // Update article count statistics (T42009)
389  // The normal counting logic in WikiPage->doEditUpdates() is designed for
390  // one-revision-at-a-time editing, not bulk imports. In this situation it
391  // suffers from issues of replica DB lag. We let WikiPage handle the total page
392  // and revision count, and we implement our own custom logic for the
393  // article (content page) count.
394  if ( !$this->disableStatisticsUpdate ) {
396  $page->loadPageData( 'fromdbmaster' );
397  $content = $page->getContent();
398  if ( $content === null ) {
399  wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
400  ' because WikiPage::getContent() returned null' );
401  } else {
402  $editInfo = $page->prepareContentForEdit( $content );
403  $countKey = 'title_' . $title->getPrefixedText();
404  $countable = $page->isCountable( $editInfo );
405  if ( array_key_exists( $countKey, $this->countableCache ) &&
406  $countable != $this->countableCache[$countKey] ) {
408  'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
409  ] ) );
410  }
411  }
412  }
413 
414  $args = func_get_args();
415  return Hooks::run( 'AfterImportPage', $args );
416  }
417 
422  public function debugRevisionHandler( &$revision ) {
423  $this->debug( "Got revision:" );
424  if ( is_object( $revision->title ) ) {
425  $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
426  } else {
427  $this->debug( "-- Title: <invalid>" );
428  }
429  $this->debug( "-- User: " . $revision->user_text );
430  $this->debug( "-- Timestamp: " . $revision->timestamp );
431  $this->debug( "-- Comment: " . $revision->comment );
432  $this->debug( "-- Text: " . $revision->text );
433  }
434 
440  private function siteInfoCallback( $siteInfo ) {
441  if ( isset( $this->mSiteInfoCallback ) ) {
442  return call_user_func_array( $this->mSiteInfoCallback,
443  [ $siteInfo, $this ] );
444  } else {
445  return false;
446  }
447  }
448 
453  function pageCallback( $title ) {
454  if ( isset( $this->mPageCallback ) ) {
455  call_user_func( $this->mPageCallback, $title );
456  }
457  }
458 
467  private function pageOutCallback( $title, $foreignTitle, $revCount,
468  $sucCount, $pageInfo ) {
469  if ( isset( $this->mPageOutCallback ) ) {
470  $args = func_get_args();
471  call_user_func_array( $this->mPageOutCallback, $args );
472  }
473  }
474 
480  private function revisionCallback( $revision ) {
481  if ( isset( $this->mRevisionCallback ) ) {
482  return call_user_func_array( $this->mRevisionCallback,
483  [ $revision, $this ] );
484  } else {
485  return false;
486  }
487  }
488 
494  private function logItemCallback( $revision ) {
495  if ( isset( $this->mLogItemCallback ) ) {
496  return call_user_func_array( $this->mLogItemCallback,
497  [ $revision, $this ] );
498  } else {
499  return false;
500  }
501  }
502 
509  public function nodeAttribute( $attr ) {
510  return $this->reader->getAttribute( $attr );
511  }
512 
520  public function nodeContents() {
521  if ( $this->reader->isEmptyElement ) {
522  return "";
523  }
524  $buffer = "";
525  while ( $this->reader->read() ) {
526  switch ( $this->reader->nodeType ) {
527  case XMLReader::TEXT:
528  case XMLReader::CDATA:
529  case XMLReader::SIGNIFICANT_WHITESPACE:
530  $buffer .= $this->reader->value;
531  break;
532  case XMLReader::END_ELEMENT:
533  return $buffer;
534  }
535  }
536 
537  $this->reader->close();
538  return '';
539  }
540 
546  public function doImport() {
547  // Calls to reader->read need to be wrapped in calls to
548  // libxml_disable_entity_loader() to avoid local file
549  // inclusion attacks (T48932).
550  $oldDisable = libxml_disable_entity_loader( true );
551  $this->reader->read();
552 
553  if ( $this->reader->localName != 'mediawiki' ) {
554  libxml_disable_entity_loader( $oldDisable );
555  throw new MWException( "Expected <mediawiki> tag, got " .
556  $this->reader->localName );
557  }
558  $this->debug( "<mediawiki> tag is correct." );
559 
560  $this->debug( "Starting primary dump processing loop." );
561 
562  $keepReading = $this->reader->read();
563  $skip = false;
564  $rethrow = null;
565  try {
566  while ( $keepReading ) {
567  $tag = $this->reader->localName;
568  $type = $this->reader->nodeType;
569 
570  if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
571  // Do nothing
572  } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
573  break;
574  } elseif ( $tag == 'siteinfo' ) {
575  $this->handleSiteInfo();
576  } elseif ( $tag == 'page' ) {
577  $this->handlePage();
578  } elseif ( $tag == 'logitem' ) {
579  $this->handleLogItem();
580  } elseif ( $tag != '#text' ) {
581  $this->warn( "Unhandled top-level XML tag $tag" );
582 
583  $skip = true;
584  }
585 
586  if ( $skip ) {
587  $keepReading = $this->reader->next();
588  $skip = false;
589  $this->debug( "Skip" );
590  } else {
591  $keepReading = $this->reader->read();
592  }
593  }
594  } catch ( Exception $ex ) {
595  $rethrow = $ex;
596  }
597 
598  // finally
599  libxml_disable_entity_loader( $oldDisable );
600  $this->reader->close();
601 
602  if ( $rethrow ) {
603  throw $rethrow;
604  }
605 
606  return true;
607  }
608 
609  private function handleSiteInfo() {
610  $this->debug( "Enter site info handler." );
611  $siteInfo = [];
612 
613  // Fields that can just be stuffed in the siteInfo object
614  $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
615 
616  while ( $this->reader->read() ) {
617  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
618  $this->reader->localName == 'siteinfo' ) {
619  break;
620  }
621 
622  $tag = $this->reader->localName;
623 
624  if ( $tag == 'namespace' ) {
625  $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
626  $this->nodeContents();
627  } elseif ( in_array( $tag, $normalFields ) ) {
628  $siteInfo[$tag] = $this->nodeContents();
629  }
630  }
631 
632  $siteInfo['_namespaces'] = $this->foreignNamespaces;
633  $this->siteInfoCallback( $siteInfo );
634  }
635 
636  private function handleLogItem() {
637  $this->debug( "Enter log item handler." );
638  $logInfo = [];
639 
640  // Fields that can just be stuffed in the pageInfo object
641  $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
642  'logtitle', 'params' ];
643 
644  while ( $this->reader->read() ) {
645  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
646  $this->reader->localName == 'logitem' ) {
647  break;
648  }
649 
650  $tag = $this->reader->localName;
651 
652  if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
653  $this, $logInfo
654  ] ) ) {
655  // Do nothing
656  } elseif ( in_array( $tag, $normalFields ) ) {
657  $logInfo[$tag] = $this->nodeContents();
658  } elseif ( $tag == 'contributor' ) {
659  $logInfo['contributor'] = $this->handleContributor();
660  } elseif ( $tag != '#text' ) {
661  $this->warn( "Unhandled log-item XML tag $tag" );
662  }
663  }
664 
665  $this->processLogItem( $logInfo );
666  }
667 
672  private function processLogItem( $logInfo ) {
673 
674  $revision = new WikiRevision( $this->config );
675 
676  if ( isset( $logInfo['id'] ) ) {
677  $revision->setID( $logInfo['id'] );
678  }
679  $revision->setType( $logInfo['type'] );
680  $revision->setAction( $logInfo['action'] );
681  if ( isset( $logInfo['timestamp'] ) ) {
682  $revision->setTimestamp( $logInfo['timestamp'] );
683  }
684  if ( isset( $logInfo['params'] ) ) {
685  $revision->setParams( $logInfo['params'] );
686  }
687  if ( isset( $logInfo['logtitle'] ) ) {
688  // @todo Using Title for non-local titles is a recipe for disaster.
689  // We should use ForeignTitle here instead.
690  $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
691  }
692 
693  $revision->setNoUpdates( $this->mNoUpdates );
694 
695  if ( isset( $logInfo['comment'] ) ) {
696  $revision->setComment( $logInfo['comment'] );
697  }
698 
699  if ( isset( $logInfo['contributor']['ip'] ) ) {
700  $revision->setUserIP( $logInfo['contributor']['ip'] );
701  }
702 
703  if ( !isset( $logInfo['contributor']['username'] ) ) {
704  $revision->setUsername( 'Unknown user' );
705  } else {
706  $revision->setUsername( $logInfo['contributor']['username'] );
707  }
708 
709  return $this->logItemCallback( $revision );
710  }
711 
712  private function handlePage() {
713  // Handle page data.
714  $this->debug( "Enter page handler." );
715  $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
716 
717  // Fields that can just be stuffed in the pageInfo object
718  $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
719 
720  $skip = false;
721  $badTitle = false;
722 
723  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
724  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
725  $this->reader->localName == 'page' ) {
726  break;
727  }
728 
729  $skip = false;
730 
731  $tag = $this->reader->localName;
732 
733  if ( $badTitle ) {
734  // The title is invalid, bail out of this page
735  $skip = true;
736  } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
737  &$pageInfo ] ) ) {
738  // Do nothing
739  } elseif ( in_array( $tag, $normalFields ) ) {
740  // An XML snippet:
741  // <page>
742  // <id>123</id>
743  // <title>Page</title>
744  // <redirect title="NewTitle"/>
745  // ...
746  // Because the redirect tag is built differently, we need special handling for that case.
747  if ( $tag == 'redirect' ) {
748  $pageInfo[$tag] = $this->nodeAttribute( 'title' );
749  } else {
750  $pageInfo[$tag] = $this->nodeContents();
751  }
752  } elseif ( $tag == 'revision' || $tag == 'upload' ) {
753  if ( !isset( $title ) ) {
754  $title = $this->processTitle( $pageInfo['title'],
755  isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
756 
757  // $title is either an array of two titles or false.
758  if ( is_array( $title ) ) {
759  $this->pageCallback( $title );
760  list( $pageInfo['_title'], $foreignTitle ) = $title;
761  } else {
762  $badTitle = true;
763  $skip = true;
764  }
765  }
766 
767  if ( $title ) {
768  if ( $tag == 'revision' ) {
769  $this->handleRevision( $pageInfo );
770  } else {
771  $this->handleUpload( $pageInfo );
772  }
773  }
774  } elseif ( $tag != '#text' ) {
775  $this->warn( "Unhandled page XML tag $tag" );
776  $skip = true;
777  }
778  }
779 
780  // @note $pageInfo is only set if a valid $title is processed above with
781  // no error. If we have a valid $title, then pageCallback is called
782  // above, $pageInfo['title'] is set and we do pageOutCallback here.
783  // If $pageInfo['_title'] is not set, then $foreignTitle is also not
784  // set since they both come from $title above.
785  if ( array_key_exists( '_title', $pageInfo ) ) {
786  $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
787  $pageInfo['revisionCount'],
788  $pageInfo['successfulRevisionCount'],
789  $pageInfo );
790  }
791  }
792 
796  private function handleRevision( &$pageInfo ) {
797  $this->debug( "Enter revision handler" );
798  $revisionInfo = [];
799 
800  $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ];
801 
802  $skip = false;
803 
804  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
805  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
806  $this->reader->localName == 'revision' ) {
807  break;
808  }
809 
810  $tag = $this->reader->localName;
811 
812  if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
813  $this, $pageInfo, $revisionInfo
814  ] ) ) {
815  // Do nothing
816  } elseif ( in_array( $tag, $normalFields ) ) {
817  $revisionInfo[$tag] = $this->nodeContents();
818  } elseif ( $tag == 'contributor' ) {
819  $revisionInfo['contributor'] = $this->handleContributor();
820  } elseif ( $tag != '#text' ) {
821  $this->warn( "Unhandled revision XML tag $tag" );
822  $skip = true;
823  }
824  }
825 
826  $pageInfo['revisionCount']++;
827  if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
828  $pageInfo['successfulRevisionCount']++;
829  }
830  }
831 
837  private function processRevision( $pageInfo, $revisionInfo ) {
839 
840  // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
841  // database errors and instability. Testing for revisions with only listed
842  // content models, as other content models might use serialization formats
843  // which aren't checked against $wgMaxArticleSize.
844  if ( ( !isset( $revisionInfo['model'] ) ||
845  in_array( $revisionInfo['model'], [
846  'wikitext',
847  'css',
848  'json',
849  'javascript',
850  'text',
851  ''
852  ] ) ) &&
853  strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
854  ) {
855  throw new MWException( 'The text of ' .
856  ( isset( $revisionInfo['id'] ) ?
857  "the revision with ID $revisionInfo[id]" :
858  'a revision'
859  ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
860  }
861 
862  $revision = new WikiRevision( $this->config );
863 
864  if ( isset( $revisionInfo['id'] ) ) {
865  $revision->setID( $revisionInfo['id'] );
866  }
867  if ( isset( $revisionInfo['model'] ) ) {
868  $revision->setModel( $revisionInfo['model'] );
869  }
870  if ( isset( $revisionInfo['format'] ) ) {
871  $revision->setFormat( $revisionInfo['format'] );
872  }
873  $revision->setTitle( $pageInfo['_title'] );
874 
875  if ( isset( $revisionInfo['text'] ) ) {
876  $handler = $revision->getContentHandler();
877  $text = $handler->importTransform(
878  $revisionInfo['text'],
879  $revision->getFormat() );
880 
881  $revision->setText( $text );
882  }
883  if ( isset( $revisionInfo['timestamp'] ) ) {
884  $revision->setTimestamp( $revisionInfo['timestamp'] );
885  } else {
886  $revision->setTimestamp( wfTimestampNow() );
887  }
888 
889  if ( isset( $revisionInfo['comment'] ) ) {
890  $revision->setComment( $revisionInfo['comment'] );
891  }
892 
893  if ( isset( $revisionInfo['minor'] ) ) {
894  $revision->setMinor( true );
895  }
896  if ( isset( $revisionInfo['contributor']['ip'] ) ) {
897  $revision->setUserIP( $revisionInfo['contributor']['ip'] );
898  } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
899  $revision->setUsername( $revisionInfo['contributor']['username'] );
900  } else {
901  $revision->setUsername( 'Unknown user' );
902  }
903  $revision->setNoUpdates( $this->mNoUpdates );
904 
905  return $this->revisionCallback( $revision );
906  }
907 
912  private function handleUpload( &$pageInfo ) {
913  $this->debug( "Enter upload handler" );
914  $uploadInfo = [];
915 
916  $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
917  'src', 'size', 'sha1base36', 'archivename', 'rel' ];
918 
919  $skip = false;
920 
921  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
922  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
923  $this->reader->localName == 'upload' ) {
924  break;
925  }
926 
927  $tag = $this->reader->localName;
928 
929  if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
930  $this, $pageInfo
931  ] ) ) {
932  // Do nothing
933  } elseif ( in_array( $tag, $normalFields ) ) {
934  $uploadInfo[$tag] = $this->nodeContents();
935  } elseif ( $tag == 'contributor' ) {
936  $uploadInfo['contributor'] = $this->handleContributor();
937  } elseif ( $tag == 'contents' ) {
938  $contents = $this->nodeContents();
939  $encoding = $this->reader->getAttribute( 'encoding' );
940  if ( $encoding === 'base64' ) {
941  $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
942  $uploadInfo['isTempSrc'] = true;
943  }
944  } elseif ( $tag != '#text' ) {
945  $this->warn( "Unhandled upload XML tag $tag" );
946  $skip = true;
947  }
948  }
949 
950  if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
951  $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
952  if ( file_exists( $path ) ) {
953  $uploadInfo['fileSrc'] = $path;
954  $uploadInfo['isTempSrc'] = false;
955  }
956  }
957 
958  if ( $this->mImportUploads ) {
959  return $this->processUpload( $pageInfo, $uploadInfo );
960  }
961  }
962 
967  private function dumpTemp( $contents ) {
968  $filename = tempnam( wfTempDir(), 'importupload' );
969  file_put_contents( $filename, $contents );
970  return $filename;
971  }
972 
978  private function processUpload( $pageInfo, $uploadInfo ) {
979  $revision = new WikiRevision( $this->config );
980  $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
981 
982  $revision->setTitle( $pageInfo['_title'] );
983  $revision->setID( $pageInfo['id'] );
984  $revision->setTimestamp( $uploadInfo['timestamp'] );
985  $revision->setText( $text );
986  $revision->setFilename( $uploadInfo['filename'] );
987  if ( isset( $uploadInfo['archivename'] ) ) {
988  $revision->setArchiveName( $uploadInfo['archivename'] );
989  }
990  $revision->setSrc( $uploadInfo['src'] );
991  if ( isset( $uploadInfo['fileSrc'] ) ) {
992  $revision->setFileSrc( $uploadInfo['fileSrc'],
993  !empty( $uploadInfo['isTempSrc'] ) );
994  }
995  if ( isset( $uploadInfo['sha1base36'] ) ) {
996  $revision->setSha1Base36( $uploadInfo['sha1base36'] );
997  }
998  $revision->setSize( intval( $uploadInfo['size'] ) );
999  $revision->setComment( $uploadInfo['comment'] );
1000 
1001  if ( isset( $uploadInfo['contributor']['ip'] ) ) {
1002  $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1003  }
1004  if ( isset( $uploadInfo['contributor']['username'] ) ) {
1005  $revision->setUsername( $uploadInfo['contributor']['username'] );
1006  }
1007  $revision->setNoUpdates( $this->mNoUpdates );
1008 
1009  return call_user_func( $this->mUploadCallback, $revision );
1010  }
1011 
1015  private function handleContributor() {
1016  $fields = [ 'id', 'ip', 'username' ];
1017  $info = [];
1018 
1019  if ( $this->reader->isEmptyElement ) {
1020  return $info;
1021  }
1022  while ( $this->reader->read() ) {
1023  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1024  $this->reader->localName == 'contributor' ) {
1025  break;
1026  }
1027 
1028  $tag = $this->reader->localName;
1029 
1030  if ( in_array( $tag, $fields ) ) {
1031  $info[$tag] = $this->nodeContents();
1032  }
1033  }
1034 
1035  return $info;
1036  }
1037 
1043  private function processTitle( $text, $ns = null ) {
1044  if ( is_null( $this->foreignNamespaces ) ) {
1045  $foreignTitleFactory = new NaiveForeignTitleFactory();
1046  } else {
1047  $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1048  $this->foreignNamespaces );
1049  }
1050 
1051  $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1052  intval( $ns ) );
1053 
1054  $title = $this->importTitleFactory->createTitleFromForeignTitle(
1055  $foreignTitle );
1056 
1057  $commandLineMode = $this->config->get( 'CommandLineMode' );
1058  if ( is_null( $title ) ) {
1059  # Invalid page title? Ignore the page
1060  $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1061  return false;
1062  } elseif ( $title->isExternal() ) {
1063  $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1064  return false;
1065  } elseif ( !$title->canExist() ) {
1066  $this->notice( 'import-error-special', $title->getPrefixedText() );
1067  return false;
1068  } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1069  # Do not import if the importing wiki user cannot edit this page
1070  $this->notice( 'import-error-edit', $title->getPrefixedText() );
1071  return false;
1072  } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1073  # Do not import if the importing wiki user cannot create this page
1074  $this->notice( 'import-error-create', $title->getPrefixedText() );
1075  return false;
1076  }
1077 
1078  return [ $title, $foreignTitle ];
1079  }
1080 }
NaiveImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NaiveImportTitleFactory.php:33
WikiImporter\processRevision
processRevision( $pageInfo, $revisionInfo)
Definition: WikiImporter.php:837
WikiImporter\$mUploadCallback
$mUploadCallback
Definition: WikiImporter.php:37
WikiImporter
XML file reader for the page data importer.
Definition: WikiImporter.php:34
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:265
$wgMaxArticleSize
$wgMaxArticleSize
Maximum article size in kilobytes.
Definition: DefaultSettings.php:2174
WikiImporter\setImageBasePath
setImageBasePath( $dir)
Definition: WikiImporter.php:297
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1713
UploadSourceAdapter\registerSource
static registerSource(ImportSource $source)
Definition: UploadSourceAdapter.php:48
WikiImporter\$mImportUploads
$mImportUploads
Definition: WikiImporter.php:40
NamespaceAwareForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
Definition: NamespaceAwareForeignTitleFactory.php:26
$status
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1049
WikiImporter\$mRevisionCallback
$mRevisionCallback
Definition: WikiImporter.php:37
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
WikiImporter\revisionCallback
revisionCallback( $revision)
Notify the callback function of a revision.
Definition: WikiImporter.php:480
WikiImporter\setNoticeCallback
setNoticeCallback( $callback)
Set a callback that displays notice messages.
Definition: WikiImporter.php:155
DeferredUpdates\addUpdate
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the deferred list to be run later by execute()
Definition: DeferredUpdates.php:76
NaiveForeignTitleFactory
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
Definition: NaiveForeignTitleFactory.php:26
WikiImporter\$mPageOutCallback
$mPageOutCallback
Definition: WikiImporter.php:38
WikiImporter\setNoUpdates
setNoUpdates( $noupdates)
Set 'no updates' mode.
Definition: WikiImporter.php:145
WikiImporter\getReader
getReader()
Definition: WikiImporter.php:103
WikiImporter\processLogItem
processLogItem( $logInfo)
Definition: WikiImporter.php:672
$params
$params
Definition: styleTest.css.php:40
WikiImporter\handleRevision
handleRevision(&$pageInfo)
Definition: WikiImporter.php:796
WikiImporter\setRevisionCallback
setRevisionCallback( $callback)
Sets the action to perform as each page revision is reached.
Definition: WikiImporter.php:190
WikiImporter\handleContributor
handleContributor()
Definition: WikiImporter.php:1015
ImportReporter
Reporting callback.
Definition: SpecialImport.php:537
$type
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2536
NamespaceImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: NamespaceImportTitleFactory.php:27
WikiImporter\nodeContents
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element,...
Definition: WikiImporter.php:520
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
WikiImporter\siteInfoCallback
siteInfoCallback( $siteInfo)
Notify the callback function of site info.
Definition: WikiImporter.php:440
$debug
$debug
Definition: mcc.php:31
NS_MAIN
const NS_MAIN
Definition: Defines.php:62
MWException
MediaWiki exception.
Definition: MWException.php:26
ImportTitleFactory
Represents an object that can convert page titles on a foreign wiki (ForeignTitle objects) into page ...
Definition: ImportTitleFactory.php:26
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:934
WikiPage\factory
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:120
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1128
WikiImporter\dumpTemp
dumpTemp( $contents)
Definition: WikiImporter.php:967
WikiImporter\$countableCache
array $countableCache
Definition: WikiImporter.php:47
$content
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1049
WikiImporter\pageOutCallback
pageOutCallback( $title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
Definition: WikiImporter.php:467
$page
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition: hooks.txt:2536
$tag
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:1028
MWContentSerializationException
Exception representing a failure to serialize or unserialize a content object.
Definition: MWContentSerializationException.php:7
WikiImporter\throwXmlError
throwXmlError( $err)
Definition: WikiImporter.php:107
MWNamespace\hasSubpages
static hasSubpages( $index)
Does the namespace allow subpages?
Definition: MWNamespace.php:330
SubpageImportTitleFactory
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
Definition: SubpageImportTitleFactory.php:27
SiteStatsUpdate\factory
static factory(array $deltas)
Definition: SiteStatsUpdate.php:62
WikiImporter\finishImportPage
finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
Definition: WikiImporter.php:385
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:2023
WikiImporter\$importTitleFactory
ImportTitleFactory $importTitleFactory
Definition: WikiImporter.php:45
WikiImporter\processUpload
processUpload( $pageInfo, $uploadInfo)
Definition: WikiImporter.php:978
WikiImporter\disableStatisticsUpdate
disableStatisticsUpdate()
Statistics update can cause a lot of time.
Definition: WikiImporter.php:312
WikiImporter\setImportUploads
setImportUploads( $import)
Definition: WikiImporter.php:304
WikiImporter\$mNoUpdates
$mNoUpdates
Definition: WikiImporter.php:41
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:999
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
$dir
$dir
Definition: Autoload.php:8
WikiImporter\$mPageCallback
$mPageCallback
Definition: WikiImporter.php:37
WikiImporter\$mSiteInfoCallback
$mSiteInfoCallback
Definition: WikiImporter.php:38
WikiImporter\beforeImportPage
beforeImportPage( $titleAndForeignTitle)
Default per-page callback.
Definition: WikiImporter.php:322
StatusValue\newGood
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:76
WikiImporter\importRevision
importRevision( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:334
WikiImporter\doImport
doImport()
Primary entry point.
Definition: WikiImporter.php:546
WikiImporter\processTitle
processTitle( $text, $ns=null)
Definition: WikiImporter.php:1043
WikiImporter\$mImageBasePath
$mImageBasePath
Definition: WikiImporter.php:40
WikiImporter\$foreignNamespaces
$foreignNamespaces
Definition: WikiImporter.php:36
WikiImporter\notice
notice( $msg)
Definition: WikiImporter.php:122
WikiImporter\setDebug
setDebug( $debug)
Set debug mode...
Definition: WikiImporter.php:137
MWNamespace\exists
static exists( $index)
Returns whether the specified namespace exists.
Definition: MWNamespace.php:160
WikiImporter\$reader
$reader
Definition: WikiImporter.php:35
$handler
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:783
WikiImporter\handleUpload
handleUpload(&$pageInfo)
Definition: WikiImporter.php:912
WikiImporter\setUploadCallback
setUploadCallback( $callback)
Sets the action to perform as each file upload version is reached.
Definition: WikiImporter.php:201
WikiImporter\warn
warn( $data)
Definition: WikiImporter.php:118
WikiImporter\handleSiteInfo
handleSiteInfo()
Definition: WikiImporter.php:609
WikiImporter\setTargetRootPage
setTargetRootPage( $rootpage)
Set a target root page under which all pages are imported.
Definition: WikiImporter.php:265
$args
if( $line===false) $args
Definition: cdb.php:63
WikiImporter\$mNoticeCallback
$mNoticeCallback
Definition: WikiImporter.php:39
wfTempDir
wfTempDir()
Tries to get the system directory for temporary files.
Definition: GlobalFunctions.php:2061
WikiRevision
Represents a revision, log entry or upload during the import process.
Definition: WikiRevision.php:35
WikiImporter\debug
debug( $data)
Definition: WikiImporter.php:112
WikiImporter\importLogItem
importLogItem( $revision)
Default per-revision callback, performs the import.
Definition: WikiImporter.php:363
WikiImporter\importUpload
importUpload( $revision)
Dummy for now...
Definition: WikiImporter.php:372
WikiImporter\__construct
__construct(ImportSource $source, Config $config=null)
Creates an ImportXMLReader drawing from the source provided.
Definition: WikiImporter.php:57
WikiImporter\handleLogItem
handleLogItem()
Definition: WikiImporter.php:636
ImportSource
Source interface for XML import.
Definition: ImportSource.php:32
$path
$path
Definition: NoLocalSettings.php:26
WikiImporter\$config
Config $config
Definition: WikiImporter.php:43
WikiImporter\setSiteInfoCallback
setSiteInfoCallback( $callback)
Sets the action to perform when site info is encountered.
Definition: WikiImporter.php:223
WikiImporter\setPageOutCallback
setPageOutCallback( $callback)
Sets the action to perform as each page in the stream is completed.
Definition: WikiImporter.php:179
WikiImporter\debugRevisionHandler
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
Definition: WikiImporter.php:422
WikiImporter\$disableStatisticsUpdate
bool $disableStatisticsUpdate
Definition: WikiImporter.php:49
$source
$source
Definition: mwdoc-filter.php:45
WikiImporter\handlePage
handlePage()
Definition: WikiImporter.php:712
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
WikiImporter\nodeAttribute
nodeAttribute( $attr)
Retrieves the contents of the named attribute of the current element.
Definition: WikiImporter.php:509
WikiImporter\setPageCallback
setPageCallback( $callback)
Sets the action to perform as each new page in the stream is reached.
Definition: WikiImporter.php:164
WikiImporter\setImportTitleFactory
setImportTitleFactory( $factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
Definition: WikiImporter.php:234
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
WikiImporter\setTargetNamespace
setTargetNamespace( $namespace)
Set a target namespace to override the defaults.
Definition: WikiImporter.php:243
WikiImporter\logItemCallback
logItemCallback( $revision)
Notify the callback function of a new log item.
Definition: WikiImporter.php:494
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
WikiImporter\pageCallback
pageCallback( $title)
Notify the callback function when a new "<page>" is reached.
Definition: WikiImporter.php:453
WikiImporter\$mLogItemCallback
$mLogItemCallback
Definition: WikiImporter.php:37
$buffer
$buffer
Definition: mwdoc-filter.php:48
WikiImporter\$mDebug
$mDebug
Definition: WikiImporter.php:39
array
the array() calling protocol came about after MediaWiki 1.4rc1.
WikiImporter\setLogItemCallback
setLogItemCallback( $callback)
Sets the action to perform as each log item reached.
Definition: WikiImporter.php:212
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56