MediaWiki  master
ImportableOldRevisionImporter.php
Go to the documentation of this file.
1 <?php
2 
12 use Psr\Log\LoggerInterface;
14 
19 
23  private $logger;
24 
28  private $doUpdates;
29 
33  private $loadBalancer;
34 
38  private $revisionStore;
39 
43  private $slotRoleRegistry;
44 
48  private $wikiPageFactory;
49 
51  private $pageUpdaterFactory;
52 
54  private $userFactory;
55 
66  public function __construct(
67  $doUpdates,
68  LoggerInterface $logger,
69  ILoadBalancer $loadBalancer,
70  RevisionStore $revisionStore,
71  SlotRoleRegistry $slotRoleRegistry,
72  WikiPageFactory $wikiPageFactory = null,
73  PageUpdaterFactory $pageUpdaterFactory = null,
74  UserFactory $userFactory = null
75  ) {
76  $this->doUpdates = $doUpdates;
77  $this->logger = $logger;
78  $this->loadBalancer = $loadBalancer;
79  $this->revisionStore = $revisionStore;
80  $this->slotRoleRegistry = $slotRoleRegistry;
81 
82  $services = MediaWikiServices::getInstance();
83  // @todo: temporary - remove when FileImporter extension is updated
84  $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory();
85  $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory();
86  $this->userFactory = $userFactory ?? $services->getUserFactory();
87  }
88 
90  public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) {
91  $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
92 
93  # Sneak a single revision into place
94  $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() );
95  if ( $user ) {
96  $userId = $user->getId();
97  $userText = $user->getName();
98  } else {
99  $userId = 0;
100  $userText = $importableRevision->getUser();
101  $user = $this->userFactory->newAnonymous();
102  }
103 
104  // avoid memory leak...?
105  Title::clearCaches();
106 
107  $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() );
108  $page->loadPageData( WikiPage::READ_LATEST );
109  $mustCreatePage = !$page->exists();
110  if ( $mustCreatePage ) {
111  $pageId = $page->insertOn( $dbw );
112  } else {
113  $pageId = $page->getId();
114 
115  // Note: sha1 has been in XML dumps since 2012. If you have an
116  // older dump, the duplicate detection here won't work.
117  if ( $importableRevision->getSha1Base36() !== false ) {
118  $prior = (bool)$dbw->selectField( 'revision', '1',
119  [ 'rev_page' => $pageId,
120  'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ),
121  'rev_sha1' => $importableRevision->getSha1Base36() ],
122  __METHOD__
123  );
124  if ( $prior ) {
125  // @todo FIXME: This could fail slightly for multiple matches :P
126  $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" .
127  $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " .
128  $importableRevision->getTimestamp() . "\n" );
129  return false;
130  }
131  }
132  }
133 
134  if ( !$pageId ) {
135  // This seems to happen if two clients simultaneously try to import the
136  // same page
137  $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
138  $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' .
139  $importableRevision->getTimestamp() . "\n" );
140  return false;
141  }
142 
143  // Select previous version to make size diffs correct
144  // @todo This assumes that multiple revisions of the same page are imported
145  // in order from oldest to newest.
146  $qi = $this->revisionStore->getQueryInfo();
147  $prevRevRow = $dbw->selectRow( $qi['tables'], $qi['fields'],
148  [
149  'rev_page' => $pageId,
150  'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $importableRevision->getTimestamp() ) ),
151  ],
152  __METHOD__,
153  [ 'ORDER BY' => [
154  'rev_timestamp DESC',
155  'rev_id DESC', // timestamp is not unique per page
156  ]
157  ],
158  $qi['joins']
159  );
160 
161  # @todo FIXME: Use original rev_id optionally (better for backups)
162  # Insert the row
163  $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() );
164  $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 );
165  $revisionRecord->setComment(
166  CommentStoreComment::newUnsavedComment( $importableRevision->getComment() )
167  );
168 
169  try {
170  $revUser = $this->userFactory->newFromAnyId( $userId, $userText );
171  } catch ( InvalidArgumentException $ex ) {
172  $revUser = RequestContext::getMain()->getUser();
173  }
174  $revisionRecord->setUser( $revUser );
175 
176  $originalRevision = $prevRevRow
177  ? $this->revisionStore->newRevisionFromRow(
178  $prevRevRow,
179  IDBAccessObject::READ_LATEST,
180  $importableRevision->getTitle()
181  )
182  : null;
183 
184  foreach ( $importableRevision->getSlotRoles() as $role ) {
185  if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) {
186  throw new MWException( "Undefined slot role $role" );
187  }
188 
189  $newContent = $importableRevision->getContent( $role );
190  if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) {
191  $revisionRecord->setContent( $role, $newContent );
192  } else {
193  $originalSlot = $originalRevision->getSlot( $role );
194  if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) {
195  $revisionRecord->setContent( $role, $newContent );
196  } else {
197  $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) );
198  }
199  }
200  }
201 
202  $revisionRecord->setTimestamp( $importableRevision->getTimestamp() );
203  $revisionRecord->setMinorEdit( $importableRevision->getMinor() );
204  $revisionRecord->setPageId( $pageId );
205 
206  $latestRevId = $page->getLatest();
207 
208  $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw );
209  if ( $latestRevId ) {
210  // If not found (false), cast to 0 so that the page is updated
211  // Just to be on the safe side, even though it should always be found
212  $latestRevTimestamp = (int)$this->revisionStore->getTimestampFromId(
213  $latestRevId,
214  RevisionStore::READ_LATEST
215  );
216  } else {
217  $latestRevTimestamp = 0;
218  }
219  if ( $importableRevision->getTimestamp() >= $latestRevTimestamp ) {
220  $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId );
221  } else {
222  $changed = false;
223  }
224 
225  $tags = $importableRevision->getTags();
226  if ( $tags !== [] ) {
227  ChangeTags::addTags( $tags, null, $inserted->getId() );
228  }
229 
230  if ( $changed !== false && $this->doUpdates ) {
231  $this->logger->debug( __METHOD__ . ": running updates" );
232  // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
233 
234  $options = [
235  'created' => $mustCreatePage,
236  'oldcountable' => 'no-change',
237  'causeAction' => 'import-page',
238  'causeAgent' => $user->getName(),
239  ];
240 
241  $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page );
242  $updater->prepareUpdate( $inserted, $options );
243  $updater->doUpdates();
244  }
245 
246  return true;
247  }
248 
249 }
static addTags( $tags, $rc_id=null, $rev_id=null, $log_id=null, $params=null, RecentChange $rc=null)
Add tags to a change given its rc_id, rev_id and/or log_id.
Definition: ChangeTags.php:332
__construct( $doUpdates, LoggerInterface $logger, ILoadBalancer $loadBalancer, RevisionStore $revisionStore, SlotRoleRegistry $slotRoleRegistry, WikiPageFactory $wikiPageFactory=null, PageUpdaterFactory $pageUpdaterFactory=null, UserFactory $userFactory=null)
MediaWiki exception.
Definition: MWException.php:32
Value object for a comment stored by CommentStore.
Service locator for MediaWiki core services.
Service for creating WikiPage objects.
Service for looking up page revisions.
A registry service for SlotRoleHandlers, used to define which slot roles are available on which page.
A factory for PageUpdater and DerivedPageDataUpdater instances.
Represents a title within MediaWiki.
Definition: Title.php:82
Creates User objects.
Definition: UserFactory.php:38
static getMain()
Get the RequestContext object associated with the main request.
getContent( $role=SlotRecord::MAIN)
This class is a delegate to ILBFactory for a given database cluster.
const DB_PRIMARY
Definition: defines.php:28
return true
Definition: router.php:90