MediaWiki  master
ImportableOldRevisionImporter.php
Go to the documentation of this file.
1 <?php
2 
12 use Psr\Log\LoggerInterface;
15 
20 
24  private $logger;
25 
29  private $doUpdates;
30 
34  private $dbProvider;
35 
39  private $revisionStore;
40 
44  private $slotRoleRegistry;
45 
49  private $wikiPageFactory;
50 
52  private $pageUpdaterFactory;
53 
55  private $userFactory;
56 
67  public function __construct(
68  $doUpdates,
69  LoggerInterface $logger,
70  IConnectionProvider $dbProvider,
71  RevisionStore $revisionStore,
72  SlotRoleRegistry $slotRoleRegistry,
73  WikiPageFactory $wikiPageFactory = null,
74  PageUpdaterFactory $pageUpdaterFactory = null,
75  UserFactory $userFactory = null
76  ) {
77  $this->doUpdates = $doUpdates;
78  $this->logger = $logger;
79  $this->dbProvider = $dbProvider;
80  $this->revisionStore = $revisionStore;
81  $this->slotRoleRegistry = $slotRoleRegistry;
82 
83  $services = MediaWikiServices::getInstance();
84  // @todo: temporary - remove when FileImporter extension is updated
85  $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory();
86  $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory();
87  $this->userFactory = $userFactory ?? $services->getUserFactory();
88  }
89 
91  public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) {
92  $dbw = $this->dbProvider->getPrimaryDatabase();
93 
94  # Sneak a single revision into place
95  $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() );
96  if ( $user ) {
97  $userId = $user->getId();
98  $userText = $user->getName();
99  } else {
100  $userId = 0;
101  $userText = $importableRevision->getUser();
102  $user = $this->userFactory->newAnonymous();
103  }
104 
105  // avoid memory leak...?
106  Title::clearCaches();
107 
108  $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() );
109  $page->loadPageData( WikiPage::READ_LATEST );
110  $mustCreatePage = !$page->exists();
111  if ( $mustCreatePage ) {
112  $pageId = $page->insertOn( $dbw );
113  } else {
114  $pageId = $page->getId();
115 
116  // Note: sha1 has been in XML dumps since 2012. If you have an
117  // older dump, the duplicate detection here won't work.
118  if ( $importableRevision->getSha1Base36() !== false ) {
119  $prior = (bool)$dbw->newSelectQueryBuilder()
120  ->select( '1' )
121  ->from( 'revision' )
122  ->where( [
123  'rev_page' => $pageId,
124  'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ),
125  'rev_sha1' => $importableRevision->getSha1Base36()
126  ] )
127  ->caller( __METHOD__ )->fetchField();
128  if ( $prior ) {
129  // @todo FIXME: This could fail slightly for multiple matches :P
130  $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" .
131  $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " .
132  $importableRevision->getTimestamp() . "\n" );
133  return false;
134  }
135  }
136  }
137 
138  if ( !$pageId ) {
139  // This seems to happen if two clients simultaneously try to import the
140  // same page
141  $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
142  $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' .
143  $importableRevision->getTimestamp() . "\n" );
144  return false;
145  }
146 
147  // Select previous version to make size diffs correct
148  // @todo This assumes that multiple revisions of the same page are imported
149  // in order from oldest to newest.
150  $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $dbw )
151  ->joinComment()
152  ->where( [ 'rev_page' => $pageId ] )
153  ->andWhere( $dbw->buildComparison(
154  '<=',
155  [ 'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ) ]
156  ) )
157  ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC );
158  $prevRevRow = $queryBuilder->caller( __METHOD__ )->fetchRow();
159 
160  # @todo FIXME: Use original rev_id optionally (better for backups)
161  # Insert the row
162  $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() );
163  $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 );
164  $revisionRecord->setComment(
165  CommentStoreComment::newUnsavedComment( $importableRevision->getComment() )
166  );
167 
168  try {
169  $revUser = $this->userFactory->newFromAnyId( $userId, $userText );
170  } catch ( InvalidArgumentException $ex ) {
171  $revUser = RequestContext::getMain()->getUser();
172  }
173  $revisionRecord->setUser( $revUser );
174 
175  $originalRevision = $prevRevRow
176  ? $this->revisionStore->newRevisionFromRow(
177  $prevRevRow,
178  IDBAccessObject::READ_LATEST,
179  $importableRevision->getTitle()
180  )
181  : null;
182 
183  foreach ( $importableRevision->getSlotRoles() as $role ) {
184  if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) {
185  throw new RuntimeException( "Undefined slot role $role" );
186  }
187 
188  $newContent = $importableRevision->getContent( $role );
189  if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) {
190  $revisionRecord->setContent( $role, $newContent );
191  } else {
192  $originalSlot = $originalRevision->getSlot( $role );
193  if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) {
194  $revisionRecord->setContent( $role, $newContent );
195  } else {
196  $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) );
197  }
198  }
199  }
200 
201  $revisionRecord->setTimestamp( $importableRevision->getTimestamp() );
202  $revisionRecord->setMinorEdit( $importableRevision->getMinor() );
203  $revisionRecord->setPageId( $pageId );
204 
205  $latestRevId = $page->getLatest();
206 
207  $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw );
208  if ( $latestRevId ) {
209  // If not found (false), cast to 0 so that the page is updated
210  // Just to be on the safe side, even though it should always be found
211  $latestRevTimestamp = (int)$this->revisionStore->getTimestampFromId(
212  $latestRevId,
213  RevisionStore::READ_LATEST
214  );
215  } else {
216  $latestRevTimestamp = 0;
217  }
218  if ( $importableRevision->getTimestamp() >= $latestRevTimestamp ) {
219  $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId );
220  } else {
221  $changed = false;
222  }
223 
224  $tags = $importableRevision->getTags();
225  if ( $tags !== [] ) {
226  ChangeTags::addTags( $tags, null, $inserted->getId() );
227  }
228 
229  if ( $changed !== false && $this->doUpdates ) {
230  $this->logger->debug( __METHOD__ . ": running updates" );
231  // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
232 
233  $options = [
234  'created' => $mustCreatePage,
235  'oldcountable' => 'no-change',
236  'causeAction' => 'import-page',
237  'causeAgent' => $user->getName(),
238  ];
239 
240  $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page );
241  $updater->prepareUpdate( $inserted, $options );
242  $updater->doUpdates();
243  }
244 
245  return true;
246  }
247 
248 }
static addTags( $tags, $rc_id=null, $rev_id=null, $log_id=null, $params=null, RecentChange $rc=null)
Add tags to a change given its rc_id, rev_id and/or log_id.
Definition: ChangeTags.php:282
__construct( $doUpdates, LoggerInterface $logger, IConnectionProvider $dbProvider, RevisionStore $revisionStore, SlotRoleRegistry $slotRoleRegistry, WikiPageFactory $wikiPageFactory=null, PageUpdaterFactory $pageUpdaterFactory=null, UserFactory $userFactory=null)
Value object for a comment stored by CommentStore.
Service locator for MediaWiki core services.
Service for creating WikiPage objects.
Service for looking up page revisions.
A registry service for SlotRoleHandlers, used to define which slot roles are available on which page.
A factory for PageUpdater and DerivedPageDataUpdater instances.
Represents a title within MediaWiki.
Definition: Title.php:76
Creates User objects.
Definition: UserFactory.php:41
static getMain()
Get the RequestContext object associated with the main request.
Build SELECT queries with a fluent interface.
getContent( $role=SlotRecord::MAIN)
Provide primary and replica IDatabase connections.
return true
Definition: router.php:90