MediaWiki  master
ImportableOldRevisionImporter.php
Go to the documentation of this file.
1 <?php
2 
10 use Psr\Log\LoggerInterface;
12 
17 
21  private $logger;
22 
26  private $doUpdates;
27 
31  private $loadBalancer;
32 
36  private $revisionStore;
37 
42 
47 
50 
52  private $userFactory;
53 
64  public function __construct(
65  $doUpdates,
66  LoggerInterface $logger,
73  ) {
74  $this->doUpdates = $doUpdates;
75  $this->logger = $logger;
76  $this->loadBalancer = $loadBalancer;
77  $this->revisionStore = $revisionStore;
78  $this->slotRoleRegistry = $slotRoleRegistry;
79 
80  $services = MediaWikiServices::getInstance();
81  // @todo: temporary - remove when FileImporter extension is updated
82  $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory();
83  $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory();
84  $this->userFactory = $userFactory ?? $services->getUserFactory();
85  }
86 
88  public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) {
89  $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
90 
91  # Sneak a single revision into place
92  $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() );
93  if ( $user ) {
94  $userId = $user->getId();
95  $userText = $user->getName();
96  } else {
97  $userId = 0;
98  $userText = $importableRevision->getUser();
99  $user = $this->userFactory->newAnonymous();
100  }
101 
102  // avoid memory leak...?
104 
105  $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() );
106  $page->loadPageData( WikiPage::READ_LATEST );
107  $mustCreatePage = !$page->exists();
108  if ( $mustCreatePage ) {
109  $pageId = $page->insertOn( $dbw );
110  } else {
111  $pageId = $page->getId();
112 
113  // Note: sha1 has been in XML dumps since 2012. If you have an
114  // older dump, the duplicate detection here won't work.
115  if ( $importableRevision->getSha1Base36() !== false ) {
116  $prior = (bool)$dbw->selectField( 'revision', '1',
117  [ 'rev_page' => $pageId,
118  'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ),
119  'rev_sha1' => $importableRevision->getSha1Base36() ],
120  __METHOD__
121  );
122  if ( $prior ) {
123  // @todo FIXME: This could fail slightly for multiple matches :P
124  $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" .
125  $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " .
126  $importableRevision->getTimestamp() . "\n" );
127  return false;
128  }
129  }
130  }
131 
132  if ( !$pageId ) {
133  // This seems to happen if two clients simultaneously try to import the
134  // same page
135  $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
136  $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' .
137  $importableRevision->getTimestamp() . "\n" );
138  return false;
139  }
140 
141  // Select previous version to make size diffs correct
142  // @todo This assumes that multiple revisions of the same page are imported
143  // in order from oldest to newest.
144  $qi = $this->revisionStore->getQueryInfo();
145  $prevRevRow = $dbw->selectRow( $qi['tables'], $qi['fields'],
146  [
147  'rev_page' => $pageId,
148  'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $importableRevision->getTimestamp() ) ),
149  ],
150  __METHOD__,
151  [ 'ORDER BY' => [
152  'rev_timestamp DESC',
153  'rev_id DESC', // timestamp is not unique per page
154  ]
155  ],
156  $qi['joins']
157  );
158 
159  # @todo FIXME: Use original rev_id optionally (better for backups)
160  # Insert the row
161  $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() );
162  $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 );
163  $revisionRecord->setComment(
164  CommentStoreComment::newUnsavedComment( $importableRevision->getComment() )
165  );
166 
167  try {
168  $revUser = $this->userFactory->newFromAnyId( $userId, $userText );
169  } catch ( InvalidArgumentException $ex ) {
170  $revUser = RequestContext::getMain()->getUser();
171  }
172  $revisionRecord->setUser( $revUser );
173 
174  $originalRevision = $prevRevRow
175  ? $this->revisionStore->newRevisionFromRow(
176  $prevRevRow,
177  IDBAccessObject::READ_LATEST,
178  $importableRevision->getTitle()
179  )
180  : null;
181 
182  foreach ( $importableRevision->getSlotRoles() as $role ) {
183  if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) {
184  throw new MWException( "Undefined slot role $role" );
185  }
186 
187  $newContent = $importableRevision->getContent( $role );
188  if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) {
189  $revisionRecord->setContent( $role, $newContent );
190  } else {
191  $originalSlot = $originalRevision->getSlot( $role );
192  if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) {
193  $revisionRecord->setContent( $role, $newContent );
194  } else {
195  $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) );
196  }
197  }
198  }
199 
200  $revisionRecord->setTimestamp( $importableRevision->getTimestamp() );
201  $revisionRecord->setMinorEdit( $importableRevision->getMinor() );
202  $revisionRecord->setPageId( $pageId );
203 
204  $latestRevId = $page->getLatest();
205 
206  $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw );
207  if ( $latestRevId ) {
208  // If not found (false), cast to 0 so that the page is updated
209  // Just to be on the safe side, even though it should always be found
210  $latestRevTimestamp = (int)$this->revisionStore->getTimestampFromId(
211  $latestRevId,
212  RevisionStore::READ_LATEST
213  );
214  } else {
215  $latestRevTimestamp = 0;
216  }
217  if ( $importableRevision->getTimestamp() > $latestRevTimestamp ) {
218  $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId );
219  } else {
220  $changed = false;
221  }
222 
223  $tags = $importableRevision->getTags();
224  if ( $tags !== [] ) {
225  ChangeTags::addTags( $tags, null, $inserted->getId() );
226  }
227 
228  if ( $changed !== false && $this->doUpdates ) {
229  $this->logger->debug( __METHOD__ . ": running updates" );
230  // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
231 
232  $options = [
233  'created' => $mustCreatePage,
234  'oldcountable' => 'no-change',
235  'causeAction' => 'edit-page',
236  'causeAgent' => $user->getName(),
237  ];
238 
239  $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page );
240  $updater->prepareUpdate( $inserted, $options );
241  $updater->doUpdates();
242  }
243 
244  return true;
245  }
246 
247 }
static addTags( $tags, $rc_id=null, $rev_id=null, $log_id=null, $params=null, RecentChange $rc=null)
Add tags to a change given its rc_id, rev_id and/or log_id.
Definition: ChangeTags.php:321
static newUnsavedComment( $comment, array $data=null)
Create a new, unsaved CommentStoreComment.
__construct( $doUpdates, LoggerInterface $logger, ILoadBalancer $loadBalancer, RevisionStore $revisionStore, SlotRoleRegistry $slotRoleRegistry, WikiPageFactory $wikiPageFactory=null, PageUpdaterFactory $pageUpdaterFactory=null, UserFactory $userFactory=null)
MediaWiki exception.
Definition: MWException.php:29
MediaWikiServices is the service locator for the application scope of MediaWiki.
Service for creating WikiPage objects.
Service for looking up page revisions.
A registry service for SlotRoleHandlers, used to define which slot roles are available on which page.
A factory for PageUpdater instances.
Creates User objects.
Definition: UserFactory.php:38
static getMain()
Get the RequestContext object associated with the main request.
static clearCaches()
Definition: Title.php:2954
getContent( $role=SlotRecord::MAIN)
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_PRIMARY
Definition: defines.php:27
return true
Definition: router.php:90