Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
91.07% |
102 / 112 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
| ImportableOldRevisionImporter | |
91.89% |
102 / 111 |
|
50.00% |
1 / 2 |
24.31 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
| import | |
91.18% |
93 / 102 |
|
0.00% |
0 / 1 |
23.36 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Import; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use MediaWiki\CommentStore\CommentStoreComment; |
| 7 | use MediaWiki\Context\RequestContext; |
| 8 | use MediaWiki\MediaWikiServices; |
| 9 | use MediaWiki\Page\Event\PageLatestRevisionChangedEvent; |
| 10 | use MediaWiki\Page\WikiPageFactory; |
| 11 | use MediaWiki\Revision\MutableRevisionRecord; |
| 12 | use MediaWiki\Revision\RevisionStore; |
| 13 | use MediaWiki\Revision\SlotRoleRegistry; |
| 14 | use MediaWiki\Storage\PageUpdater; |
| 15 | use MediaWiki\Storage\PageUpdaterFactory; |
| 16 | use MediaWiki\Title\Title; |
| 17 | use MediaWiki\User\UserFactory; |
| 18 | use Psr\Log\LoggerInterface; |
| 19 | use RuntimeException; |
| 20 | use Wikimedia\Rdbms\IConnectionProvider; |
| 21 | use Wikimedia\Rdbms\IDBAccessObject; |
| 22 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 23 | |
| 24 | /** |
| 25 | * @since 1.31 |
| 26 | */ |
| 27 | class ImportableOldRevisionImporter implements OldRevisionImporter { |
| 28 | |
| 29 | private bool $doUpdates; |
| 30 | private LoggerInterface $logger; |
| 31 | private IConnectionProvider $dbProvider; |
| 32 | private RevisionStore $revisionStore; |
| 33 | private SlotRoleRegistry $slotRoleRegistry; |
| 34 | private WikiPageFactory $wikiPageFactory; |
| 35 | private PageUpdaterFactory $pageUpdaterFactory; |
| 36 | private UserFactory $userFactory; |
| 37 | |
| 38 | public function __construct( |
| 39 | bool $doUpdates, |
| 40 | LoggerInterface $logger, |
| 41 | IConnectionProvider $dbProvider, |
| 42 | RevisionStore $revisionStore, |
| 43 | SlotRoleRegistry $slotRoleRegistry, |
| 44 | ?WikiPageFactory $wikiPageFactory = null, |
| 45 | ?PageUpdaterFactory $pageUpdaterFactory = null, |
| 46 | ?UserFactory $userFactory = null |
| 47 | ) { |
| 48 | $this->doUpdates = $doUpdates; |
| 49 | $this->logger = $logger; |
| 50 | $this->dbProvider = $dbProvider; |
| 51 | $this->revisionStore = $revisionStore; |
| 52 | $this->slotRoleRegistry = $slotRoleRegistry; |
| 53 | |
| 54 | $services = MediaWikiServices::getInstance(); |
| 55 | // @todo: temporary - remove when FileImporter extension is updated |
| 56 | $this->wikiPageFactory = $wikiPageFactory ?? $services->getWikiPageFactory(); |
| 57 | $this->pageUpdaterFactory = $pageUpdaterFactory ?? $services->getPageUpdaterFactory(); |
| 58 | $this->userFactory = $userFactory ?? $services->getUserFactory(); |
| 59 | } |
| 60 | |
| 61 | /** @inheritDoc */ |
| 62 | public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) { |
| 63 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 64 | |
| 65 | # Sneak a single revision into place |
| 66 | $user = $importableRevision->getUserObj() ?: $this->userFactory->newFromName( $importableRevision->getUser() ); |
| 67 | if ( $user ) { |
| 68 | $userId = $user->getId(); |
| 69 | $userText = $user->getName(); |
| 70 | } else { |
| 71 | $userId = 0; |
| 72 | $userText = $importableRevision->getUser(); |
| 73 | } |
| 74 | |
| 75 | // avoid memory leak...? |
| 76 | Title::clearCaches(); |
| 77 | |
| 78 | $page = $this->wikiPageFactory->newFromTitle( $importableRevision->getTitle() ); |
| 79 | $page->loadPageData( IDBAccessObject::READ_LATEST ); |
| 80 | $mustCreatePage = !$page->exists(); |
| 81 | if ( $mustCreatePage ) { |
| 82 | $pageId = $page->insertOn( $dbw ); |
| 83 | } else { |
| 84 | $pageId = $page->getId(); |
| 85 | |
| 86 | // Note: sha1 has been in XML dumps since 2012. If you have an |
| 87 | // older dump, the duplicate detection here won't work. |
| 88 | $importContentHash = $importableRevision->getSha1Base36(); |
| 89 | |
| 90 | if ( $importContentHash ) { |
| 91 | // Get revision IDs for the page at the given timestamp |
| 92 | $revIds = $dbw->newSelectQueryBuilder() |
| 93 | ->select( 'rev_id' ) |
| 94 | ->from( 'revision' ) |
| 95 | ->where( [ |
| 96 | 'rev_page' => $pageId, |
| 97 | 'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ), |
| 98 | ] ) |
| 99 | ->caller( __METHOD__ ) |
| 100 | ->fetchFieldValues(); |
| 101 | |
| 102 | foreach ( $revIds as $revId ) { |
| 103 | $revision = $this->revisionStore->getRevisionById( $revId ); |
| 104 | if ( !$revision ) { |
| 105 | throw new RuntimeException( "Revision $revId not found" ); |
| 106 | } |
| 107 | |
| 108 | if ( $revision->getSha1() === $importContentHash ) { |
| 109 | $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" . |
| 110 | $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " . |
| 111 | $importableRevision->getTimestamp() . "\n" ); |
| 112 | return false; |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | if ( !$pageId ) { |
| 119 | // This seems to happen if two clients simultaneously try to import the |
| 120 | // same page |
| 121 | $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' . |
| 122 | $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' . |
| 123 | $importableRevision->getTimestamp() . "\n" ); |
| 124 | return false; |
| 125 | } |
| 126 | |
| 127 | // Select previous version to make size diffs correct |
| 128 | // @todo This assumes that multiple revisions of the same page are imported |
| 129 | // in order from oldest to newest. |
| 130 | $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $dbw ) |
| 131 | ->joinComment() |
| 132 | ->where( [ 'rev_page' => $pageId ] ) |
| 133 | ->andWhere( $dbw->expr( |
| 134 | 'rev_timestamp', '<=', $dbw->timestamp( $importableRevision->getTimestamp() ) |
| 135 | ) ) |
| 136 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC ); |
| 137 | $prevRevRow = $queryBuilder->caller( __METHOD__ )->fetchRow(); |
| 138 | |
| 139 | # @todo FIXME: Use original rev_id optionally (better for backups) |
| 140 | # Insert the row |
| 141 | $revisionRecord = new MutableRevisionRecord( $importableRevision->getTitle() ); |
| 142 | $revisionRecord->setParentId( $prevRevRow ? (int)$prevRevRow->rev_id : 0 ); |
| 143 | $revisionRecord->setComment( |
| 144 | CommentStoreComment::newUnsavedComment( $importableRevision->getComment() ) |
| 145 | ); |
| 146 | |
| 147 | try { |
| 148 | $revUser = $this->userFactory->newFromAnyId( $userId, $userText ); |
| 149 | } catch ( InvalidArgumentException ) { |
| 150 | $revUser = RequestContext::getMain()->getUser(); |
| 151 | } |
| 152 | $revisionRecord->setUser( $revUser ); |
| 153 | |
| 154 | $originalRevision = $prevRevRow |
| 155 | ? $this->revisionStore->newRevisionFromRow( |
| 156 | $prevRevRow, |
| 157 | IDBAccessObject::READ_LATEST, |
| 158 | $importableRevision->getTitle() |
| 159 | ) |
| 160 | : null; |
| 161 | |
| 162 | foreach ( $importableRevision->getSlotRoles() as $role ) { |
| 163 | if ( !$this->slotRoleRegistry->isDefinedRole( $role ) ) { |
| 164 | throw new RuntimeException( "Undefined slot role $role" ); |
| 165 | } |
| 166 | |
| 167 | $newContent = $importableRevision->getContent( $role ); |
| 168 | if ( !$originalRevision || !$originalRevision->hasSlot( $role ) ) { |
| 169 | $revisionRecord->setContent( $role, $newContent ); |
| 170 | } else { |
| 171 | $originalSlot = $originalRevision->getSlot( $role ); |
| 172 | if ( !$originalSlot->hasSameContent( $importableRevision->getSlot( $role ) ) ) { |
| 173 | $revisionRecord->setContent( $role, $newContent ); |
| 174 | } else { |
| 175 | $revisionRecord->inheritSlot( $originalRevision->getSlot( $role ) ); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | $revisionRecord->setTimestamp( $importableRevision->getTimestamp() ); |
| 181 | $revisionRecord->setMinorEdit( $importableRevision->getMinor() ); |
| 182 | $revisionRecord->setPageId( $pageId ); |
| 183 | |
| 184 | $updater = $this->pageUpdaterFactory->newDerivedPageDataUpdater( $page ); |
| 185 | $latestRev = $updater->grabCurrentRevision(); |
| 186 | $latestRevId = $latestRev ? $latestRev->getId() : null; |
| 187 | |
| 188 | $inserted = $this->revisionStore->insertRevisionOn( $revisionRecord, $dbw ); |
| 189 | if ( $latestRev ) { |
| 190 | // If not found (false), cast to 0 so that the page is updated |
| 191 | // Just to be on the safe side, even though it should always be found |
| 192 | $latestRevTimestamp = $latestRev->getTimestamp(); |
| 193 | } else { |
| 194 | $latestRevTimestamp = 0; |
| 195 | } |
| 196 | if ( $importableRevision->getTimestamp() >= $latestRevTimestamp ) { |
| 197 | $changed = $page->updateRevisionOn( $dbw, $inserted, $latestRevId ); |
| 198 | } else { |
| 199 | $changed = false; |
| 200 | } |
| 201 | |
| 202 | $tags = $importableRevision->getTags(); |
| 203 | if ( $tags !== [] ) { |
| 204 | MediaWikiServices::getInstance()->getChangeTagsStore()->addTags( $tags, null, $inserted->getId() ); |
| 205 | } |
| 206 | |
| 207 | if ( $changed !== false && $this->doUpdates ) { |
| 208 | $this->logger->debug( __METHOD__ . ": running updates" ); |
| 209 | // countable/oldcountable stuff is handled in WikiImporter::finishImportPage |
| 210 | |
| 211 | $options = [ |
| 212 | PageLatestRevisionChangedEvent::FLAG_SILENT => true, |
| 213 | PageLatestRevisionChangedEvent::FLAG_IMPLICIT => true, |
| 214 | 'created' => $mustCreatePage, |
| 215 | 'oldcountable' => 'no-change', |
| 216 | ]; |
| 217 | |
| 218 | $updater->setCause( PageUpdater::CAUSE_IMPORT ); |
| 219 | $updater->setPerformer( RequestContext::getMain()->getUser() ); |
| 220 | $updater->prepareUpdate( $inserted, $options ); |
| 221 | $updater->doUpdates(); |
| 222 | } |
| 223 | |
| 224 | return true; |
| 225 | } |
| 226 | |
| 227 | } |
| 228 | |
| 229 | /** @deprecated class alias since 1.46 */ |
| 230 | class_alias( ImportableOldRevisionImporter::class, 'ImportableOldRevisionImporter' ); |