MediaWiki master
ImportableOldRevisionImporter.php
Go to the documentation of this file.
1<?php
2
15use Psr\Log\LoggerInterface;
19
24
25 private bool $doUpdates;
26 private LoggerInterface $logger;
27 private IConnectionProvider $dbProvider;
28 private RevisionStore $revisionStore;
29 private SlotRoleRegistry $slotRoleRegistry;
30 private WikiPageFactory $wikiPageFactory;
31 private PageUpdaterFactory $pageUpdaterFactory;
32 private UserFactory $userFactory;
33
34 public function __construct(
35 bool $doUpdates,
36 LoggerInterface $logger,
37 IConnectionProvider $dbProvider,
38 RevisionStore $revisionStore,
39 SlotRoleRegistry $slotRoleRegistry,
40 ?WikiPageFactory $wikiPageFactory = null,
41 ?PageUpdaterFactory $pageUpdaterFactory = null,
42 ?UserFactory $userFactory = null
43 ) {
44 $this->doUpdates = $doUpdates;
45 $this->logger = $logger;
46 $this->dbProvider = $dbProvider;
47 $this->revisionStore = $revisionStore;
48 $this->slotRoleRegistry = $slotRoleRegistry;
49
50 $services = MediaWikiServices::getInstance();
51 // @todo: temporary - remove when FileImporter extension is updated
52 $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory();
53 $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory();
54 $this->userFactory = $userFactory ?? $services->getUserFactory();
55 }
56
58 public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) {
59 $dbw = $this->dbProvider->getPrimaryDatabase();
60
61 # Sneak a single revision into place
62 $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() );
63 if ( $user ) {
64 $userId = $user->getId();
65 $userText = $user->getName();
66 } else {
67 $userId = 0;
68 $userText = $importableRevision->getUser();
69 $user = $this->userFactory->newAnonymous();
70 }
71
72 // avoid memory leak...?
73 Title::clearCaches();
74
75 $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() );
76 $page->loadPageData( IDBAccessObject::READ_LATEST );
77 $mustCreatePage = !$page->exists();
78 if ( $mustCreatePage ) {
79 $pageId = $page->insertOn( $dbw );
80 } else {
81 $pageId = $page->getId();
82
83 // Note: sha1 has been in XML dumps since 2012. If you have an
84 // older dump, the duplicate detection here won't work.
85 $importContentHash = $importableRevision->getSha1Base36();
86
87 if ( $importContentHash ) {
88 // Get revision IDs for the page at the given timestamp
89 $revIds = $dbw->newSelectQueryBuilder()
90 ->select( 'rev_id' )
91 ->from( 'revision' )
92 ->where( [
93 'rev_page' => $pageId,
94 'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ),
95 ] )
96 ->caller( __METHOD__ )
97 ->fetchFieldValues();
98
99 foreach ( $revIds as $revId ) {
100 $revision = $this->revisionStore->getRevisionById( $revId );
101 if ( !$revision ) {
102 throw new RuntimeException( "Revision $revId not found" );
103 }
104
105 if ( $revision->getSha1() === $importContentHash ) {
106 $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" .
107 $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " .
108 $importableRevision->getTimestamp() . "\n" );
109 return false;
110 }
111 }
112 }
113 }
114
115 if ( !$pageId ) {
116 // This seems to happen if two clients simultaneously try to import the
117 // same page
118 $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
119 $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' .
120 $importableRevision->getTimestamp() . "\n" );
121 return false;
122 }
123
124 // Select previous version to make size diffs correct
125 // @todo This assumes that multiple revisions of the same page are imported
126 // in order from oldest to newest.
127 $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $dbw )
128 ->joinComment()
129 ->where( [ 'rev_page' => $pageId ] )
130 ->andWhere( $dbw->expr(
131 'rev_timestamp', '<=', $dbw->timestamp( $importableRevision->getTimestamp() )
132 ) )
133 ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC );
134 $prevRevRow = $queryBuilder->caller( __METHOD__ )->fetchRow();
135
136 # @todo FIXME: Use original rev_id optionally (better for backups)
137 # Insert the row
138 $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() );
139 $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 );
140 $revisionRecord->setComment(
141 CommentStoreComment::newUnsavedComment( $importableRevision->getComment() )
142 );
143
144 try {
145 $revUser = $this->userFactory->newFromAnyId( $userId, $userText );
146 } catch ( InvalidArgumentException ) {
147 $revUser = RequestContext::getMain()->getUser();
148 }
149 $revisionRecord->setUser( $revUser );
150
151 $originalRevision = $prevRevRow
152 ? $this->revisionStore->newRevisionFromRow(
153 $prevRevRow,
154 IDBAccessObject::READ_LATEST,
155 $importableRevision->getTitle()
156 )
157 : null;
158
159 foreach ( $importableRevision->getSlotRoles() as $role ) {
160 if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) {
161 throw new RuntimeException( "Undefined slot role $role" );
162 }
163
164 $newContent = $importableRevision->getContent( $role );
165 if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) {
166 $revisionRecord->setContent( $role, $newContent );
167 } else {
168 $originalSlot = $originalRevision->getSlot( $role );
169 if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) {
170 $revisionRecord->setContent( $role, $newContent );
171 } else {
172 $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) );
173 }
174 }
175 }
176
177 $revisionRecord->setTimestamp( $importableRevision->getTimestamp() );
178 $revisionRecord->setMinorEdit( $importableRevision->getMinor() );
179 $revisionRecord->setPageId( $pageId );
180
181 $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page );
182 $latestRev = $updater->grabCurrentRevision();
183 $latestRevId = $latestRev ? $latestRev->getId() : null;
184
185 $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw );
186 if ( $latestRev ) {
187 // If not found (false), cast to 0 so that the page is updated
188 // Just to be on the safe side, even though it should always be found
189 $latestRevTimestamp = $latestRev->getTimestamp();
190 } else {
191 $latestRevTimestamp = 0;
192 }
193 if ( $importableRevision->getTimestamp() >= $latestRevTimestamp ) {
194 $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId );
195 } else {
196 $changed = false;
197 }
198
199 $tags = $importableRevision->getTags();
200 if ( $tags !== [] ) {
201 MediaWikiServices::getInstance()->getChangeTagsStore()->addTags( $tags, null, $inserted->getId() );
202 }
203
204 if ( $changed !== false && $this->doUpdates ) {
205 $this->logger->debug( __METHOD__ . ": running updates" );
206 // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
207
208 $options = [
209 PageLatestRevisionChangedEvent::FLAG_SILENT => true,
210 PageLatestRevisionChangedEvent::FLAG_IMPLICIT => true,
211 'created' => $mustCreatePage,
212 'oldcountable' => 'no-change',
213 ];
214
215 $updater->setCause( PageUpdater::CAUSE_IMPORT );
216 $updater->setPerformer( $user ); // TODO: get the actual performer, not the revision author.
217 $updater->prepareUpdate( $inserted, $options );
218 $updater->doUpdates();
219 }
220
221 return true;
222 }
223
224}
__construct(bool $doUpdates, LoggerInterface $logger, IConnectionProvider $dbProvider, RevisionStore $revisionStore, SlotRoleRegistry $slotRoleRegistry, ?WikiPageFactory $wikiPageFactory=null, ?PageUpdaterFactory $pageUpdaterFactory=null, ?UserFactory $userFactory=null)
Value object for a comment stored by CommentStore.
Group all the pieces relevant to the context of a request into one instance.
Service locator for MediaWiki core services.
Domain event representing a change to the page's latest revision.
Service for creating WikiPage objects.
Service for looking up page revisions.
A registry service for SlotRoleHandlers, used to define which slot roles are available on which page.
A factory for PageUpdater and DerivedPageDataUpdater instances.
Controller-like object for creating and updating pages by creating new revisions.
Represents a title within MediaWiki.
Definition Title.php:69
Create User objects.
Build SELECT queries with a fluent interface.
getContent( $role=SlotRecord::MAIN)
Provide primary and replica IDatabase connections.
Interface for database access objects.