Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
55.78% covered (warning)
55.78%
304 / 545
25.49% covered (danger)
25.49%
13 / 51
CRAP
0.00% covered (danger)
0.00%
0 / 1
WikiImporter
55.78% covered (warning)
55.78%
304 / 545
25.49% covered (danger)
25.49%
13 / 51
3413.89
0.00% covered (danger)
0.00%
0 / 1
 __construct
96.00% covered (success)
96.00%
24 / 25
0.00% covered (danger)
0.00%
0 / 1
2
 getReader
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 throwXmlError
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 debug
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 warn
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 notice
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 setDebug
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setNoUpdates
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setPageOffset
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setNoticeCallback
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setPageCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setPageOutCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setRevisionCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setUploadCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setLogItemCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setSiteInfoCallback
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 setImportTitleFactory
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setTargetNamespace
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
20
 setTargetRootPage
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 1
56
 setImageBasePath
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setImportUploads
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setUsernamePrefix
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 disableStatisticsUpdate
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 beforeImportPage
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 importRevision
17.65% covered (danger)
17.65%
3 / 17
0.00% covered (danger)
0.00%
0 / 1
8.03
 importLogItem
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 importUpload
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 finishImportPage
72.22% covered (warning)
72.22%
13 / 18
0.00% covered (danger)
0.00%
0 / 1
5.54
 siteInfoCallback
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 pageCallback
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 pageOutCallback
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 revisionCallback
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 logItemCallback
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 nodeAttribute
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 nodeContents
80.00% covered (warning)
80.00%
8 / 10
0.00% covered (danger)
0.00%
0 / 1
7.39
 doImport
79.17% covered (warning)
79.17%
38 / 48
0.00% covered (danger)
0.00%
0 / 1
17.03
 handleSiteInfo
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
6
 handleLogItem
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
72
 processLogItem
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
72
 handlePage
90.70% covered (success)
90.70%
39 / 43
0.00% covered (danger)
0.00%
0 / 1
17.23
 handleRevision
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
11
 handleContent
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
8.10
 makeContent
80.00% covered (warning)
80.00%
20 / 25
0.00% covered (danger)
0.00%
0 / 1
6.29
 processRevision
86.21% covered (warning)
86.21%
25 / 29
0.00% covered (danger)
0.00%
0 / 1
9.21
 handleUpload
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
240
 dumpTemp
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 processUpload
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 1
42
 handleContributor
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
6.02
 processTitle
65.22% covered (warning)
65.22%
15 / 23
0.00% covered (danger)
0.00%
0 / 1
7.51
 openReader
57.14% covered (warning)
57.14%
8 / 14
0.00% covered (danger)
0.00%
0 / 1
3.71
 syntaxCheckXML
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
4.00
1<?php
2/**
3 * MediaWiki page data importer.
4 *
5 * Copyright © 2003,2005 Brooke Vibber <bvibber@wikimedia.org>
6 * https://www.mediawiki.org/
7 *
8 * @license GPL-2.0-or-later
9 * @file
10 * @ingroup SpecialPage
11 */
12
13use MediaWiki\Cache\CacheKeyHelper;
14use MediaWiki\Config\Config;
15use MediaWiki\Content\Content;
16use MediaWiki\Content\IContentHandlerFactory;
17use MediaWiki\Deferred\DeferredUpdates;
18use MediaWiki\Deferred\SiteStatsUpdate;
19use MediaWiki\Exception\MWContentSerializationException;
20use MediaWiki\HookContainer\HookContainer;
21use MediaWiki\HookContainer\HookRunner;
22use MediaWiki\Language\Language;
23use MediaWiki\MainConfigNames;
24use MediaWiki\Page\PageIdentity;
25use MediaWiki\Page\WikiPageFactory;
26use MediaWiki\Permissions\Authority;
27use MediaWiki\Revision\SlotRecord;
28use MediaWiki\Revision\SlotRoleRegistry;
29use MediaWiki\Status\Status;
30use MediaWiki\Title\ForeignTitle;
31use MediaWiki\Title\ImportTitleFactory;
32use MediaWiki\Title\NaiveForeignTitleFactory;
33use MediaWiki\Title\NaiveImportTitleFactory;
34use MediaWiki\Title\NamespaceAwareForeignTitleFactory;
35use MediaWiki\Title\NamespaceImportTitleFactory;
36use MediaWiki\Title\NamespaceInfo;
37use MediaWiki\Title\SubpageImportTitleFactory;
38use MediaWiki\Title\Title;
39use MediaWiki\Title\TitleFactory;
40use MediaWiki\User\ExternalUserNames;
41use Wikimedia\AtEase\AtEase;
42use Wikimedia\Message\MessageParam;
43use Wikimedia\Message\MessageSpecifier;
44use Wikimedia\NormalizedException\NormalizedException;
45use Wikimedia\Rdbms\IDBAccessObject;
46
47/**
48 * XML file reader for the page data importer.
49 *
50 * implements Special:Import
51 * @ingroup SpecialPage
52 */
53class WikiImporter {
54    /** @var XMLReader|null */
55    private $reader;
56
57    /** @var string */
58    private $sourceAdapterId;
59
60    /** @var array|null */
61    private $foreignNamespaces = null;
62
63    /** @var callable|null */
64    private $mLogItemCallback;
65
66    /** @var callable */
67    private $mUploadCallback;
68
69    /** @var callable|null */
70    private $mRevisionCallback;
71
72    /** @var callable|null */
73    private $mPageCallback;
74
75    /** @var callable|null */
76    private $mSiteInfoCallback;
77
78    /** @var callable|null */
79    private $mPageOutCallback;
80
81    /** @var callable|null */
82    private $mNoticeCallback;
83
84    /** @var bool|null */
85    private $mDebug;
86
87    /** @var bool|null */
88    private $mImportUploads;
89
90    /** @var string|null */
91    private $mImageBasePath;
92
93    /** @var bool */
94    private $mNoUpdates = false;
95
96    /** @var int */
97    private $pageOffset = 0;
98
99    private ImportTitleFactory $importTitleFactory;
100    private ExternalUserNames $externalUserNames;
101
102    /** @var array */
103    private $countableCache = [];
104
105    /** @var bool */
106    private $disableStatisticsUpdate = false;
107
108    /**
109     * Authority used for permission checks only (to ensure that the user performing the import is
110     * allowed to edit the pages they're importing). To skip the checks, use UltimateAuthority.
111     *
112     * If you want to also log the import actions, see ImportReporter.
113     */
114    private Authority $performer;
115
116    private Config $config;
117    private HookRunner $hookRunner;
118    private Language $contentLanguage;
119    private NamespaceInfo $namespaceInfo;
120    private TitleFactory $titleFactory;
121    private WikiPageFactory $wikiPageFactory;
122    private UploadRevisionImporter $uploadRevisionImporter;
123    private IContentHandlerFactory $contentHandlerFactory;
124    private SlotRoleRegistry $slotRoleRegistry;
125
126    /**
127     * Creates an ImportXMLReader drawing from the source provided
128     */
129    public function __construct(
130        ImportSource $source,
131        Authority $performer,
132        Config $config,
133        HookContainer $hookContainer,
134        Language $contentLanguage,
135        NamespaceInfo $namespaceInfo,
136        TitleFactory $titleFactory,
137        WikiPageFactory $wikiPageFactory,
138        UploadRevisionImporter $uploadRevisionImporter,
139        IContentHandlerFactory $contentHandlerFactory,
140        SlotRoleRegistry $slotRoleRegistry
141    ) {
142        $this->performer = $performer;
143        $this->config = $config;
144        $this->hookRunner = new HookRunner( $hookContainer );
145        $this->contentLanguage = $contentLanguage;
146        $this->namespaceInfo = $namespaceInfo;
147        $this->titleFactory = $titleFactory;
148        $this->wikiPageFactory = $wikiPageFactory;
149        $this->uploadRevisionImporter = $uploadRevisionImporter;
150        $this->contentHandlerFactory = $contentHandlerFactory;
151        $this->slotRoleRegistry = $slotRoleRegistry;
152
153        if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
154            stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
155        }
156        $this->sourceAdapterId = UploadSourceAdapter::registerSource( $source );
157
158        $this->openReader();
159
160        // Default callbacks
161        $this->setPageCallback( $this->beforeImportPage( ... ) );
162        $this->setRevisionCallback( $this->importRevision( ... ) );
163        $this->setUploadCallback( $this->importUpload( ... ) );
164        $this->setLogItemCallback( $this->importLogItem( ... ) );
165        $this->setPageOutCallback( $this->finishImportPage( ... ) );
166
167        $this->importTitleFactory = new NaiveImportTitleFactory(
168            $this->contentLanguage,
169            $this->namespaceInfo,
170            $this->titleFactory
171        );
172        $this->externalUserNames = new ExternalUserNames( 'imported', false );
173    }
174
175    /**
176     * @return null|XMLReader
177     */
178    public function getReader() {
179        return $this->reader;
180    }
181
182    /**
183     * @param string $err
184     */
185    public function throwXmlError( $err ) {
186        $this->debug( "FAILURE: $err" );
187        wfDebug( "WikiImporter XML error: $err" );
188    }
189
190    /**
191     * @param string $data
192     */
193    public function debug( $data ) {
194        if ( $this->mDebug ) {
195            wfDebug( "IMPORT: $data" );
196        }
197    }
198
199    /**
200     * @param string $data
201     */
202    public function warn( $data ) {
203        wfDebug( "IMPORT: $data" );
204    }
205
206    /**
207     * @param string $msg
208     * @phpcs:ignore Generic.Files.LineLength
209     * @param MessageParam|MessageSpecifier|string|int|float|list<MessageParam|MessageSpecifier|string|int|float> ...$params
210     *   See Message::params()
211     */
212    public function notice( $msg, ...$params ) {
213        if ( is_callable( $this->mNoticeCallback ) ) {
214            ( $this->mNoticeCallback )( $msg, $params );
215        } else { # No ImportReporter -> CLI
216            // T177997: the command line importers should call setNoticeCallback()
217            // for their own custom callback to echo the notice
218            wfDebug( wfMessage( $msg, $params )->text() );
219        }
220    }
221
222    /**
223     * Set debug mode...
224     * @param bool $debug
225     */
226    public function setDebug( $debug ) {
227        $this->mDebug = $debug;
228    }
229
230    /**
231     * Set 'no updates' mode. In this mode, the link tables will not be updated by the importer
232     * @param bool $noupdates
233     */
234    public function setNoUpdates( $noupdates ) {
235        $this->mNoUpdates = $noupdates;
236    }
237
238    /**
239     * Sets 'pageOffset' value. So it will skip the first n-1 pages
240     * and start from the nth page. It's 1-based indexing.
241     * @param int $nthPage
242     * @since 1.29
243     */
244    public function setPageOffset( $nthPage ) {
245        $this->pageOffset = $nthPage;
246    }
247
248    /**
249     * Set a callback that displays notice messages
250     *
251     * @param callable $callback
252     * @return callable
253     */
254    public function setNoticeCallback( $callback ) {
255        return wfSetVar( $this->mNoticeCallback, $callback );
256    }
257
258    /**
259     * Sets the action to perform as each new page in the stream is reached.
260     * @param callable|null $callback
261     * @return callable|null
262     */
263    public function setPageCallback( $callback ) {
264        $previous = $this->mPageCallback;
265        $this->mPageCallback = $callback;
266        return $previous;
267    }
268
269    /**
270     * Sets the action to perform as each page in the stream is completed.
271     * Callback accepts the page title (as a Title object), a second object
272     * with the original title form (in case it's been overridden into a
273     * local namespace), and a count of revisions.
274     *
275     * @param callable|null $callback
276     * @return callable|null
277     */
278    public function setPageOutCallback( $callback ) {
279        $previous = $this->mPageOutCallback;
280        $this->mPageOutCallback = $callback;
281        return $previous;
282    }
283
284    /**
285     * Sets the action to perform as each page revision is reached.
286     * @param callable|null $callback
287     * @return callable|null
288     */
289    public function setRevisionCallback( $callback ) {
290        $previous = $this->mRevisionCallback;
291        $this->mRevisionCallback = $callback;
292        return $previous;
293    }
294
295    /**
296     * Sets the action to perform as each file upload version is reached.
297     * @param callable $callback
298     * @return callable
299     */
300    public function setUploadCallback( $callback ) {
301        $previous = $this->mUploadCallback;
302        $this->mUploadCallback = $callback;
303        return $previous;
304    }
305
306    /**
307     * Sets the action to perform as each log item reached.
308     * @param callable $callback
309     * @return callable
310     */
311    public function setLogItemCallback( $callback ) {
312        $previous = $this->mLogItemCallback;
313        $this->mLogItemCallback = $callback;
314        return $previous;
315    }
316
317    /**
318     * Sets the action to perform when site info is encountered
319     * @param callable $callback
320     * @return callable
321     */
322    public function setSiteInfoCallback( $callback ) {
323        $previous = $this->mSiteInfoCallback;
324        $this->mSiteInfoCallback = $callback;
325        return $previous;
326    }
327
328    /**
329     * Sets the factory object to use to convert ForeignTitle objects into local
330     * Title objects
331     * @param ImportTitleFactory $factory
332     */
333    public function setImportTitleFactory( $factory ) {
334        $this->importTitleFactory = $factory;
335    }
336
337    /**
338     * Set a target namespace to override the defaults
339     * @param null|int $namespace
340     * @return bool
341     */
342    public function setTargetNamespace( $namespace ) {
343        if ( $namespace === null ) {
344            // Don't override namespaces
345            $this->setImportTitleFactory(
346                new NaiveImportTitleFactory(
347                    $this->contentLanguage,
348                    $this->namespaceInfo,
349                    $this->titleFactory
350                )
351            );
352            return true;
353        } elseif (
354            $namespace >= 0 &&
355            $this->namespaceInfo->exists( intval( $namespace ) )
356        ) {
357            $namespace = intval( $namespace );
358            $this->setImportTitleFactory(
359                new NamespaceImportTitleFactory(
360                    $this->namespaceInfo,
361                    $this->titleFactory,
362                    $namespace
363                )
364            );
365            return true;
366        } else {
367            return false;
368        }
369    }
370
371    /**
372     * Set a target root page under which all pages are imported
373     * @param null|string $rootpage
374     * @return Status
375     */
376    public function setTargetRootPage( $rootpage ) {
377        $status = Status::newGood();
378        $nsInfo = $this->namespaceInfo;
379        if ( $rootpage === null ) {
380            // No rootpage
381            $this->setImportTitleFactory(
382                new NaiveImportTitleFactory(
383                    $this->contentLanguage,
384                    $nsInfo,
385                    $this->titleFactory
386                )
387            );
388        } elseif ( $rootpage !== '' ) {
389            $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
390            $title = Title::newFromText( $rootpage );
391
392            if ( !$title || $title->isExternal() ) {
393                $status->fatal( 'import-rootpage-invalid' );
394            } elseif ( !$nsInfo->hasSubpages( $title->getNamespace() ) ) {
395                $displayNSText = $title->getNamespace() === NS_MAIN
396                    ? wfMessage( 'blanknamespace' )->text()
397                    : $this->contentLanguage->getNsText( $title->getNamespace() );
398                $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
399            } else {
400                // set namespace to 'all', so the namespace check in processTitle() can pass
401                $this->setTargetNamespace( null );
402                $this->setImportTitleFactory(
403                    new SubpageImportTitleFactory(
404                        $nsInfo,
405                        $this->titleFactory,
406                        $title
407                    )
408                );
409            }
410        }
411        return $status;
412    }
413
414    /**
415     * @param string $dir
416     */
417    public function setImageBasePath( $dir ) {
418        $this->mImageBasePath = $dir;
419    }
420
421    /**
422     * @param bool $import
423     */
424    public function setImportUploads( $import ) {
425        $this->mImportUploads = $import;
426    }
427
428    /**
429     * @since 1.31
430     * @param string $usernamePrefix Prefix to apply to unknown (and possibly also known) usernames
431     * @param bool $assignKnownUsers Whether to apply the prefix to usernames that exist locally
432     */
433    public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
434        $this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
435    }
436
437    /**
438     * Statistics update can cause a lot of time
439     * @since 1.29
440     */
441    public function disableStatisticsUpdate() {
442        $this->disableStatisticsUpdate = true;
443    }
444
445    /**
446     * Default per-page callback. Sets up some things related to site statistics
447     * @param array $titleAndForeignTitle Two-element array, with Title object at
448     * index 0 and ForeignTitle object at index 1
449     * @return bool
450     */
451    public function beforeImportPage( $titleAndForeignTitle ) {
452        $title = $titleAndForeignTitle[0];
453        $page = $this->wikiPageFactory->newFromTitle( $title );
454        $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
455        return true;
456    }
457
458    /**
459     * Default per-revision callback, performs the import.
460     * @param WikiRevision $revision
461     * @return bool
462     */
463    public function importRevision( $revision ) {
464        if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
465            $this->notice( 'import-error-bad-location',
466                $revision->getTitle()->getPrefixedText(),
467                $revision->getID(),
468                $revision->getModel(),
469                $revision->getFormat()
470            );
471
472            return false;
473        }
474
475        try {
476            return $revision->importOldRevision();
477        } catch ( MWContentSerializationException ) {
478            $this->notice( 'import-error-unserialize',
479                $revision->getTitle()->getPrefixedText(),
480                $revision->getID(),
481                $revision->getModel(),
482                $revision->getFormat()
483            );
484        }
485
486        return false;
487    }
488
489    /**
490     * Default per-revision callback, performs the import.
491     * @param WikiRevision $revision
492     * @return bool
493     */
494    public function importLogItem( $revision ) {
495        return $revision->importLogItem();
496    }
497
498    /**
499     * Dummy for now...
500     * @param WikiRevision $revision
501     * @return bool
502     */
503    public function importUpload( $revision ) {
504        $status = $this->uploadRevisionImporter->import( $revision );
505        return $status->isGood();
506    }
507
508    /**
509     * Mostly for hook use
510     * @param PageIdentity $pageIdentity
511     * @param ForeignTitle $foreignTitle
512     * @param int $revCount
513     * @param int $sRevCount
514     * @param array $pageInfo
515     * @return bool
516     */
517    public function finishImportPage( PageIdentity $pageIdentity, $foreignTitle, $revCount,
518        $sRevCount, $pageInfo
519    ) {
520        // Update article count statistics (T42009)
521        // The normal counting logic in WikiPage->doEditUpdates() is designed for
522        // one-revision-at-a-time editing, not bulk imports. In this situation it
523        // suffers from issues of replica DB lag. We let WikiPage handle the total page
524        // and revision count, and we implement our own custom logic for the
525        // article (content page) count.
526        if ( !$this->disableStatisticsUpdate ) {
527            $page = $this->wikiPageFactory->newFromTitle( $pageIdentity );
528
529            $page->loadPageData( IDBAccessObject::READ_LATEST );
530            $rev = $page->getRevisionRecord();
531            if ( $rev === null ) {
532
533                wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $pageIdentity .
534                    ' because WikiPage::getRevisionRecord() returned null' );
535            } else {
536                $update = $page->newPageUpdater( $this->performer )->prepareUpdate();
537                $countKey = 'title_' . CacheKeyHelper::getKeyForPage( $pageIdentity );
538                $countable = $update->isCountable();
539                if ( array_key_exists( $countKey, $this->countableCache ) &&
540                    $countable != $this->countableCache[$countKey] ) {
541                    DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
542                        'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
543                    ] ) );
544                }
545            }
546        }
547
548        $title = Title::newFromPageIdentity( $pageIdentity );
549        return $this->hookRunner->onAfterImportPage( $title, $foreignTitle,
550            $revCount, $sRevCount, $pageInfo );
551    }
552
553    /**
554     * Notify the callback function of site info
555     * @param array $siteInfo
556     * @return mixed|false
557     */
558    private function siteInfoCallback( $siteInfo ) {
559        if ( $this->mSiteInfoCallback ) {
560            return ( $this->mSiteInfoCallback )( $siteInfo, $this );
561        } else {
562            return false;
563        }
564    }
565
566    /**
567     * Notify the callback function when a new "<page>" is reached.
568     * @param array $title
569     */
570    public function pageCallback( $title ) {
571        if ( $this->mPageCallback ) {
572            ( $this->mPageCallback )( $title );
573        }
574    }
575
576    /**
577     * Notify the callback function when a "</page>" is closed.
578     * @param PageIdentity $pageIdentity
579     * @param ForeignTitle $foreignTitle
580     * @param int $revCount
581     * @param int $sucCount Number of revisions for which callback returned true
582     * @param array $pageInfo Associative array of page information
583     */
584    private function pageOutCallback( PageIdentity $pageIdentity, $foreignTitle, $revCount,
585            $sucCount, $pageInfo ) {
586        if ( $this->mPageOutCallback ) {
587            ( $this->mPageOutCallback )( $pageIdentity, $foreignTitle, $revCount, $sucCount, $pageInfo );
588        }
589    }
590
591    /**
592     * Notify the callback function of a revision
593     * @param WikiRevision $revision
594     * @return bool|mixed
595     */
596    private function revisionCallback( $revision ) {
597        if ( $this->mRevisionCallback ) {
598            return ( $this->mRevisionCallback )( $revision, $this );
599        } else {
600            return false;
601        }
602    }
603
604    /**
605     * Notify the callback function of a new log item
606     * @param WikiRevision $revision
607     * @return mixed|false
608     */
609    private function logItemCallback( $revision ) {
610        if ( $this->mLogItemCallback ) {
611            return ( $this->mLogItemCallback )( $revision, $this );
612        } else {
613            return false;
614        }
615    }
616
617    /**
618     * Retrieves the contents of the named attribute of the current element.
619     * @param string $attr The name of the attribute
620     * @return string The value of the attribute or an empty string if it is not set in the current
621     * element.
622     */
623    public function nodeAttribute( $attr ) {
624        return $this->reader->getAttribute( $attr ) ?? '';
625    }
626
627    /**
628     * Shouldn't something like this be built-in to XMLReader?
629     * Fetches text contents of the current element, assuming
630     * no sub-elements or such scary things.
631     * @return string
632     * @internal
633     */
634    public function nodeContents() {
635        if ( $this->reader->isEmptyElement ) {
636            return "";
637        }
638        $buffer = "";
639        while ( $this->reader->read() ) {
640            switch ( $this->reader->nodeType ) {
641                case XMLReader::TEXT:
642                case XMLReader::CDATA:
643                case XMLReader::SIGNIFICANT_WHITESPACE:
644                    $buffer .= $this->reader->value;
645                    break;
646                case XMLReader::END_ELEMENT:
647                    return $buffer;
648            }
649        }
650
651        $this->reader->close();
652        return '';
653    }
654
655    /**
656     * Primary entry point
657     * @throws Exception
658     * @return bool
659     */
660    public function doImport() {
661        $this->syntaxCheckXML();
662
663        // Calls to reader->read need to be wrapped in calls to
664        // libxml_disable_entity_loader() to avoid local file
665        // inclusion attacks (T48932).
666        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
667        $oldDisable = @libxml_disable_entity_loader( true );
668        try {
669            $this->reader->read();
670
671            if ( $this->reader->localName != 'mediawiki' ) {
672                // phpcs:ignore Generic.PHP.NoSilencedErrors
673                @libxml_disable_entity_loader( $oldDisable );
674                $error = libxml_get_last_error();
675                if ( $error ) {
676                    throw new NormalizedException( "XML error at line {line}: {message}", [
677                        'line' => $error->line,
678                        'message' => $error->message,
679                    ] );
680                } else {
681                    throw new UnexpectedValueException(
682                        "Expected '<mediawiki>' tag, got '<{$this->reader->localName}>' tag."
683                    );
684                }
685            }
686            $this->debug( "<mediawiki> tag is correct." );
687
688            $this->debug( "Starting primary dump processing loop." );
689
690            $keepReading = $this->reader->read();
691            $skip = false;
692            $pageCount = 0;
693            while ( $keepReading ) {
694                $tag = $this->reader->localName;
695                if ( $this->pageOffset ) {
696                    if ( $tag === 'page' ) {
697                        $pageCount++;
698                    }
699                    if ( $pageCount < $this->pageOffset ) {
700                        $keepReading = $this->reader->next();
701                        continue;
702                    }
703                }
704                $type = $this->reader->nodeType;
705
706                if ( !$this->hookRunner->onImportHandleToplevelXMLTag( $this ) ) {
707                    // Do nothing
708                } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
709                    break;
710                } elseif ( $tag == 'siteinfo' ) {
711                    $this->handleSiteInfo();
712                } elseif ( $tag == 'page' ) {
713                    $this->handlePage();
714                } elseif ( $tag == 'logitem' ) {
715                    $this->handleLogItem();
716                } elseif ( $tag != '#text' ) {
717                    $this->warn( "Unhandled top-level XML tag $tag" );
718
719                    $skip = true;
720                }
721
722                if ( $skip ) {
723                    $keepReading = $this->reader->next();
724                    $skip = false;
725                    $this->debug( "Skip" );
726                } else {
727                    $keepReading = $this->reader->read();
728                }
729            }
730        } finally {
731            // phpcs:ignore Generic.PHP.NoSilencedErrors
732            @libxml_disable_entity_loader( $oldDisable );
733            $this->reader->close();
734        }
735
736        return true;
737    }
738
739    private function handleSiteInfo() {
740        $this->debug( "Enter site info handler." );
741        $siteInfo = [];
742
743        // Fields that can just be stuffed in the siteInfo object
744        $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
745
746        while ( $this->reader->read() ) {
747            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
748                    $this->reader->localName == 'siteinfo' ) {
749                break;
750            }
751
752            $tag = $this->reader->localName;
753
754            if ( $tag == 'namespace' ) {
755                $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
756                    $this->nodeContents();
757            } elseif ( in_array( $tag, $normalFields ) ) {
758                $siteInfo[$tag] = $this->nodeContents();
759            }
760        }
761
762        $siteInfo['_namespaces'] = $this->foreignNamespaces;
763        $this->siteInfoCallback( $siteInfo );
764    }
765
766    private function handleLogItem() {
767        $this->debug( "Enter log item handler." );
768        $logInfo = [];
769
770        // Fields that can just be stuffed in the pageInfo object
771        $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
772            'logtitle', 'params' ];
773
774        while ( $this->reader->read() ) {
775            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
776                    $this->reader->localName == 'logitem' ) {
777                break;
778            }
779
780            $tag = $this->reader->localName;
781
782            if ( !$this->hookRunner->onImportHandleLogItemXMLTag( $this, $logInfo ) ) {
783                // Do nothing
784            } elseif ( in_array( $tag, $normalFields ) ) {
785                $logInfo[$tag] = $this->nodeContents();
786            } elseif ( $tag == 'contributor' ) {
787                $logInfo['contributor'] = $this->handleContributor();
788            } elseif ( $tag != '#text' ) {
789                $this->warn( "Unhandled log-item XML tag $tag" );
790            }
791        }
792
793        $this->processLogItem( $logInfo );
794    }
795
796    /**
797     * @param array $logInfo
798     * @return mixed|false
799     */
800    private function processLogItem( $logInfo ) {
801        $revision = new WikiRevision();
802
803        if ( isset( $logInfo['id'] ) ) {
804            $revision->setID( $logInfo['id'] );
805        }
806        $revision->setType( $logInfo['type'] );
807        $revision->setAction( $logInfo['action'] );
808        if ( isset( $logInfo['timestamp'] ) ) {
809            $revision->setTimestamp( $logInfo['timestamp'] );
810        }
811        if ( isset( $logInfo['params'] ) ) {
812            $revision->setParams( $logInfo['params'] );
813        }
814        if ( isset( $logInfo['logtitle'] ) ) {
815            // @todo Using Title for non-local titles is a recipe for disaster.
816            // We should use ForeignTitle here instead.
817            $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
818        }
819
820        $revision->setNoUpdates( $this->mNoUpdates );
821
822        if ( isset( $logInfo['comment'] ) ) {
823            $revision->setComment( $logInfo['comment'] );
824        }
825
826        if ( isset( $logInfo['contributor']['username'] ) ) {
827            $revision->setUsername(
828                $this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
829            );
830        } elseif ( isset( $logInfo['contributor']['ip'] ) ) {
831            $revision->setUserIP( $logInfo['contributor']['ip'] );
832        } else {
833            $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
834        }
835
836        return $this->logItemCallback( $revision );
837    }
838
839    private function handlePage() {
840        // Handle page data.
841        $this->debug( "Enter page handler." );
842        $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
843
844        // Fields that can just be stuffed in the pageInfo object
845        $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
846
847        $skip = false;
848        $badTitle = false;
849
850        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
851            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
852                    $this->reader->localName == 'page' ) {
853                break;
854            }
855
856            $skip = false;
857
858            $tag = $this->reader->localName;
859
860            if ( $badTitle ) {
861                // The title is invalid, bail out of this page
862                $skip = true;
863            } elseif ( !$this->hookRunner->onImportHandlePageXMLTag( $this, $pageInfo ) ) {
864                // Do nothing
865            } elseif ( in_array( $tag, $normalFields ) ) {
866                // An XML snippet:
867                // <page>
868                //     <id>123</id>
869                //     <title>Page</title>
870                //     <redirect title="NewTitle"/>
871                //     ...
872                // Because the redirect tag is built differently, we need special handling for that case.
873                if ( $tag == 'redirect' ) {
874                    $pageInfo[$tag] = $this->nodeAttribute( 'title' );
875                } else {
876                    $pageInfo[$tag] = $this->nodeContents();
877                }
878            } elseif ( $tag == 'revision' || $tag == 'upload' ) {
879                if ( !isset( $title ) ) {
880                    $title = $this->processTitle( $pageInfo['title'],
881                        $pageInfo['ns'] ?? null );
882
883                    // $title is either an array of two titles or false.
884                    if ( is_array( $title ) ) {
885                        $this->pageCallback( $title );
886                        [ $pageInfo['_title'], $foreignTitle ] = $title;
887                    } else {
888                        $badTitle = true;
889                        $skip = true;
890                    }
891                }
892
893                if ( $title ) {
894                    if ( $tag == 'revision' ) {
895                        $this->handleRevision( $pageInfo );
896                    } else {
897                        $this->handleUpload( $pageInfo );
898                    }
899                }
900            } elseif ( $tag != '#text' ) {
901                $this->warn( "Unhandled page XML tag $tag" );
902                $skip = true;
903            }
904        }
905
906        // @note $pageInfo is only set if a valid $title is processed above with
907        //       no error. If we have a valid $title, then pageCallback is called
908        //       above, $pageInfo['title'] is set and we do pageOutCallback here.
909        //       If $pageInfo['_title'] is not set, then $foreignTitle is also not
910        //       set since they both come from $title above.
911        if ( array_key_exists( '_title', $pageInfo ) ) {
912            /** @var Title $title */
913            $title = $pageInfo['_title'];
914            $this->pageOutCallback(
915                $title,
916                // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Set together with _title key
917                $foreignTitle,
918                $pageInfo['revisionCount'],
919                $pageInfo['successfulRevisionCount'],
920                $pageInfo
921            );
922        }
923    }
924
925    /**
926     * @param array &$pageInfo
927     */
928    private function handleRevision( &$pageInfo ) {
929        $this->debug( "Enter revision handler" );
930        $revisionInfo = [];
931
932        $normalFields = [ 'id', 'parentid', 'timestamp', 'comment', 'minor', 'origin',
933            'model', 'format', 'text', 'sha1' ];
934
935        $skip = false;
936
937        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
938            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
939                    $this->reader->localName == 'revision' ) {
940                break;
941            }
942
943            $tag = $this->reader->localName;
944
945            if ( !$this->hookRunner->onImportHandleRevisionXMLTag(
946                $this, $pageInfo, $revisionInfo )
947            ) {
948                // Do nothing
949            } elseif ( in_array( $tag, $normalFields ) ) {
950                $revisionInfo[$tag] = $this->nodeContents();
951            } elseif ( $tag == 'content' ) {
952                // We can have multiple content tags, so make this an array.
953                $revisionInfo[$tag][] = $this->handleContent();
954            } elseif ( $tag == 'contributor' ) {
955                $revisionInfo['contributor'] = $this->handleContributor();
956            } elseif ( $tag != '#text' ) {
957                $this->warn( "Unhandled revision XML tag $tag" );
958                $skip = true;
959            }
960        }
961
962        $pageInfo['revisionCount']++;
963        if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
964            $pageInfo['successfulRevisionCount']++;
965        }
966    }
967
968    private function handleContent(): array {
969        $this->debug( "Enter content handler" );
970        $contentInfo = [];
971
972        $normalFields = [ 'role', 'origin', 'model', 'format', 'text' ];
973
974        $skip = false;
975
976        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
977            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
978                $this->reader->localName == 'content' ) {
979                break;
980            }
981
982            $tag = $this->reader->localName;
983
984            if ( !$this->hookRunner->onImportHandleContentXMLTag(
985                $this, $contentInfo )
986            ) {
987                // Do nothing
988            } elseif ( in_array( $tag, $normalFields ) ) {
989                $contentInfo[$tag] = $this->nodeContents();
990            } elseif ( $tag != '#text' ) {
991                $this->warn( "Unhandled content XML tag $tag" );
992                $skip = true;
993            }
994        }
995
996        return $contentInfo;
997    }
998
999    /**
1000     * @param PageIdentity $page
1001     * @param int $revisionId
1002     * @param array $contentInfo
1003     *
1004     * @return Content
1005     */
1006    private function makeContent( PageIdentity $page, $revisionId, $contentInfo ) {
1007        $maxArticleSize = $this->config->get( MainConfigNames::MaxArticleSize );
1008
1009        if ( !isset( $contentInfo['text'] ) ) {
1010            throw new InvalidArgumentException( 'Missing text field in import.' );
1011        }
1012
1013        // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
1014        // database errors and instability. Testing for revisions with only listed
1015        // content models, as other content models might use serialization formats
1016        // which aren't checked against $wgMaxArticleSize.
1017        if ( ( !isset( $contentInfo['model'] ) ||
1018                in_array( $contentInfo['model'], [
1019                    'wikitext',
1020                    'css',
1021                    'json',
1022                    'javascript',
1023                    'text',
1024                    ''
1025                ] ) ) &&
1026            strlen( $contentInfo['text'] ) > $maxArticleSize * 1024
1027        ) {
1028            throw new RuntimeException( 'The text of ' .
1029                ( $revisionId ?
1030                    "the revision with ID $revisionId" :
1031                    'a revision'
1032                ) . " exceeds the maximum allowable size ({$maxArticleSize} KiB)" );
1033        }
1034
1035        $role = $contentInfo['role'] ?? SlotRecord::MAIN;
1036        $model = $contentInfo['model'] ?? $this->slotRoleRegistry
1037            ->getRoleHandler( $role )
1038            ->getDefaultModel( $page );
1039        $handler = $this->contentHandlerFactory->getContentHandler( $model );
1040
1041        $text = $handler->importTransform( $contentInfo['text'] );
1042
1043        return $handler->unserializeContent( $text );
1044    }
1045
1046    /**
1047     * @param array $pageInfo
1048     * @param array $revisionInfo
1049     * @return mixed|false
1050     */
1051    private function processRevision( $pageInfo, $revisionInfo ) {
1052        $revision = new WikiRevision();
1053
1054        $revId = $revisionInfo['id'] ?? 0;
1055        if ( $revId ) {
1056            $revision->setID( $revisionInfo['id'] );
1057        }
1058
1059        $title = $pageInfo['_title'];
1060        $revision->setTitle( $title );
1061
1062        $content = $this->makeContent( $title, $revId, $revisionInfo );
1063        $revision->setContent( SlotRecord::MAIN, $content );
1064
1065        foreach ( $revisionInfo['content'] ?? [] as $slotInfo ) {
1066            if ( !isset( $slotInfo['role'] ) ) {
1067                throw new RuntimeException( "Missing role for imported slot." );
1068            }
1069
1070            $content = $this->makeContent( $title, $revId, $slotInfo );
1071            $revision->setContent( $slotInfo['role'], $content );
1072        }
1073        $revision->setTimestamp( $revisionInfo['timestamp'] ?? wfTimestampNow() );
1074
1075        if ( isset( $revisionInfo['comment'] ) ) {
1076            $revision->setComment( $revisionInfo['comment'] );
1077        }
1078
1079        if ( isset( $revisionInfo['minor'] ) ) {
1080            $revision->setMinor( true );
1081        }
1082        if ( isset( $revisionInfo['contributor']['username'] ) ) {
1083            $revision->setUsername(
1084                $this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
1085            );
1086        } elseif ( isset( $revisionInfo['contributor']['ip'] ) ) {
1087            $revision->setUserIP( $revisionInfo['contributor']['ip'] );
1088        } else {
1089            $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
1090        }
1091        if ( isset( $revisionInfo['sha1'] ) ) {
1092            $revision->setSha1Base36( $revisionInfo['sha1'] );
1093        }
1094        $revision->setNoUpdates( $this->mNoUpdates );
1095
1096        return $this->revisionCallback( $revision );
1097    }
1098
1099    /**
1100     * @param array &$pageInfo
1101     * @return mixed
1102     */
1103    private function handleUpload( &$pageInfo ) {
1104        $this->debug( "Enter upload handler" );
1105        $uploadInfo = [];
1106
1107        $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
1108            'src', 'size', 'sha1base36', 'archivename', 'rel' ];
1109
1110        $skip = false;
1111
1112        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
1113            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1114                    $this->reader->localName == 'upload' ) {
1115                break;
1116            }
1117
1118            $tag = $this->reader->localName;
1119
1120            if ( !$this->hookRunner->onImportHandleUploadXMLTag( $this, $pageInfo ) ) {
1121                // Do nothing
1122            } elseif ( in_array( $tag, $normalFields ) ) {
1123                $uploadInfo[$tag] = $this->nodeContents();
1124            } elseif ( $tag == 'contributor' ) {
1125                $uploadInfo['contributor'] = $this->handleContributor();
1126            } elseif ( $tag == 'contents' ) {
1127                $contents = $this->nodeContents();
1128                $encoding = $this->reader->getAttribute( 'encoding' );
1129                if ( $encoding === 'base64' ) {
1130                    $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
1131                    $uploadInfo['isTempSrc'] = true;
1132                }
1133            } elseif ( $tag != '#text' ) {
1134                $this->warn( "Unhandled upload XML tag $tag" );
1135                $skip = true;
1136            }
1137        }
1138
1139        if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
1140            $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
1141            if ( file_exists( $path ) ) {
1142                $uploadInfo['fileSrc'] = $path;
1143                $uploadInfo['isTempSrc'] = false;
1144            }
1145        }
1146
1147        if ( $this->mImportUploads ) {
1148            return $this->processUpload( $pageInfo, $uploadInfo );
1149        }
1150    }
1151
1152    /**
1153     * @param string $contents
1154     * @return string
1155     */
1156    private function dumpTemp( $contents ) {
1157        $filename = tempnam( wfTempDir(), 'importupload' );
1158        file_put_contents( $filename, $contents );
1159        return $filename;
1160    }
1161
1162    /**
1163     * @param array $pageInfo
1164     * @param array $uploadInfo
1165     * @return mixed
1166     */
1167    private function processUpload( $pageInfo, $uploadInfo ) {
1168        $revision = new WikiRevision();
1169        $revId = $pageInfo['id'];
1170        $title = $pageInfo['_title'];
1171        // T292348: text key may be absent, force addition if null
1172        $uploadInfo['text'] ??= '';
1173        $content = $this->makeContent( $title, $revId, $uploadInfo );
1174
1175        $revision->setTitle( $title );
1176        $revision->setID( $revId );
1177        $revision->setTimestamp( $uploadInfo['timestamp'] );
1178        $revision->setContent( SlotRecord::MAIN, $content );
1179        $revision->setFilename( $uploadInfo['filename'] );
1180        if ( isset( $uploadInfo['archivename'] ) ) {
1181            $revision->setArchiveName( $uploadInfo['archivename'] );
1182        }
1183        $revision->setSrc( $uploadInfo['src'] );
1184        if ( isset( $uploadInfo['fileSrc'] ) ) {
1185            $revision->setFileSrc( $uploadInfo['fileSrc'],
1186                !empty( $uploadInfo['isTempSrc'] )
1187            );
1188        }
1189        if ( isset( $uploadInfo['sha1base36'] ) ) {
1190            $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1191        }
1192        $revision->setSize( intval( $uploadInfo['size'] ) );
1193        $revision->setComment( $uploadInfo['comment'] );
1194
1195        if ( isset( $uploadInfo['contributor']['username'] ) ) {
1196            $revision->setUsername(
1197                $this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
1198            );
1199        } elseif ( isset( $uploadInfo['contributor']['ip'] ) ) {
1200            $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1201        }
1202        $revision->setNoUpdates( $this->mNoUpdates );
1203
1204        return ( $this->mUploadCallback )( $revision );
1205    }
1206
1207    /**
1208     * @return array
1209     */
1210    private function handleContributor() {
1211        $this->debug( "Enter contributor handler." );
1212
1213        if ( $this->reader->isEmptyElement ) {
1214            return [];
1215        }
1216
1217        $fields = [ 'id', 'ip', 'username' ];
1218        $info = [];
1219
1220        while ( $this->reader->read() ) {
1221            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1222                    $this->reader->localName == 'contributor' ) {
1223                break;
1224            }
1225
1226            $tag = $this->reader->localName;
1227
1228            if ( in_array( $tag, $fields ) ) {
1229                $info[$tag] = $this->nodeContents();
1230            }
1231        }
1232
1233        return $info;
1234    }
1235
1236    /**
1237     * @param string $text
1238     * @param string|null $ns
1239     * @return array|false
1240     */
1241    private function processTitle( $text, $ns = null ) {
1242        if ( $this->foreignNamespaces === null ) {
1243            $foreignTitleFactory = new NaiveForeignTitleFactory(
1244                $this->contentLanguage
1245            );
1246        } else {
1247            $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1248                $this->foreignNamespaces );
1249        }
1250
1251        $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1252            intval( $ns ) );
1253
1254        $title = $this->importTitleFactory->createTitleFromForeignTitle(
1255            $foreignTitle );
1256
1257        if ( $title === null ) {
1258            # Invalid page title? Ignore the page
1259            $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1260            return false;
1261        } elseif ( $title->isExternal() ) {
1262            $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1263            return false;
1264        } elseif ( !$title->canExist() ) {
1265            $this->notice( 'import-error-special', $title->getPrefixedText() );
1266            return false;
1267        } elseif ( !$this->performer->definitelyCan( 'edit', $title ) ) {
1268            # Do not import if the importing wiki user cannot edit this page
1269            $this->notice( 'import-error-edit', $title->getPrefixedText() );
1270            return false;
1271        }
1272
1273        return [ $title, $foreignTitle ];
1274    }
1275
1276    /**
1277     * Open the XMLReader connected to the source adapter id
1278     */
1279    private function openReader() {
1280        // Enable the entity loader, as it is needed for loading external URLs via
1281        // XMLReader::open (T86036)
1282        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
1283        $oldDisable = @libxml_disable_entity_loader( false );
1284
1285        // A static call, to avoid https://github.com/php/php-src/issues/11548
1286        $reader = XMLReader::open(
1287            'uploadsource://' . $this->sourceAdapterId, null, LIBXML_PARSEHUGE );
1288        if ( $reader instanceof XMLReader ) {
1289            $this->reader = $reader;
1290            $status = true;
1291        } else {
1292            $status = false;
1293        }
1294        if ( !$status ) {
1295            $error = libxml_get_last_error();
1296            // phpcs:ignore Generic.PHP.NoSilencedErrors
1297            @libxml_disable_entity_loader( $oldDisable );
1298            throw new RuntimeException(
1299                'Encountered an internal error while initializing WikiImporter object: ' . $error->message
1300            );
1301        }
1302        // phpcs:ignore Generic.PHP.NoSilencedErrors
1303        @libxml_disable_entity_loader( $oldDisable );
1304    }
1305
1306    /**
1307     * Check the syntax of the given xml
1308     */
1309    private function syntaxCheckXML() {
1310        if ( !UploadSourceAdapter::isSeekableSource( $this->sourceAdapterId ) ) {
1311            return;
1312        }
1313        AtEase::suppressWarnings();
1314        $oldDisable = libxml_disable_entity_loader( false );
1315        try {
1316            while ( $this->reader->read() );
1317            $error = libxml_get_last_error();
1318            if ( $error ) {
1319                $errorMessage = 'XML error at line ' . $error->line . ': ' . $error->message;
1320                wfDebug( __METHOD__ . ': Invalid xml found - ' . $errorMessage );
1321                throw new RuntimeException( $errorMessage );
1322            }
1323        } finally {
1324            libxml_disable_entity_loader( $oldDisable );
1325            AtEase::restoreWarnings();
1326            $this->reader->close();
1327        }
1328
1329        // Reopen for the real import
1330        UploadSourceAdapter::seekSource( $this->sourceAdapterId, 0 );
1331        $this->openReader();
1332    }
1333}