Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
56.25% covered (warning)
56.25%
306 / 544
29.41% covered (danger)
29.41%
15 / 51
CRAP
0.00% covered (danger)
0.00%
0 / 1
WikiImporter
56.35% covered (warning)
56.35%
306 / 543
29.41% covered (danger)
29.41%
15 / 51
3290.14
0.00% covered (danger)
0.00%
0 / 1
 __construct
96.00% covered (success)
96.00%
24 / 25
0.00% covered (danger)
0.00%
0 / 1
2
 getReader
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 throwXmlError
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 debug
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 warn
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 notice
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 setDebug
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setNoUpdates
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setPageOffset
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setNoticeCallback
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setPageCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setPageOutCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setRevisionCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setUploadCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setLogItemCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setSiteInfoCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 setImportTitleFactory
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setTargetNamespace
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
20
 setTargetRootPage
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 1
56
 setImageBasePath
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setImportUploads
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setUsernamePrefix
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 disableStatisticsUpdate
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 beforeImportPage
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 importRevision
17.65% covered (danger)
17.65%
3 / 17
0.00% covered (danger)
0.00%
0 / 1
8.03
 importLogItem
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 importUpload
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 finishImportPage
72.22% covered (warning)
72.22%
13 / 18
0.00% covered (danger)
0.00%
0 / 1
5.54
 siteInfoCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 pageCallback
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 pageOutCallback
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 revisionCallback
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 logItemCallback
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 nodeAttribute
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 nodeContents
80.00% covered (warning)
80.00%
8 / 10
0.00% covered (danger)
0.00%
0 / 1
7.39
 doImport
79.17% covered (warning)
79.17%
38 / 48
0.00% covered (danger)
0.00%
0 / 1
17.03
 handleSiteInfo
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
6
 handleLogItem
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
72
 processLogItem
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
72
 handlePage
90.70% covered (success)
90.70%
39 / 43
0.00% covered (danger)
0.00%
0 / 1
17.23
 handleRevision
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
11
 handleContent
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
8.10
 makeContent
80.00% covered (warning)
80.00%
20 / 25
0.00% covered (danger)
0.00%
0 / 1
6.29
 processRevision
86.21% covered (warning)
86.21%
25 / 29
0.00% covered (danger)
0.00%
0 / 1
9.21
 handleUpload
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
240
 dumpTemp
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 processUpload
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 1
42
 handleContributor
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
6.02
 processTitle
65.22% covered (warning)
65.22%
15 / 23
0.00% covered (danger)
0.00%
0 / 1
7.51
 openReader
57.14% covered (warning)
57.14%
8 / 14
0.00% covered (danger)
0.00%
0 / 1
3.71
 syntaxCheckXML
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
4.01
1<?php
2/**
3 * MediaWiki page data importer.
4 *
5 * Copyright © 2003,2005 Brooke Vibber <bvibber@wikimedia.org>
6 * https://www.mediawiki.org/
7 *
8 * @license GPL-2.0-or-later
9 * @file
10 * @ingroup SpecialPage
11 */
12
13namespace MediaWiki\Import;
14
15use InvalidArgumentException;
16use MediaWiki\Config\Config;
17use MediaWiki\Content\Content;
18use MediaWiki\Content\ContentSerializationException;
19use MediaWiki\Content\IContentHandlerFactory;
20use MediaWiki\Deferred\DeferredUpdates;
21use MediaWiki\Deferred\SiteStatsUpdate;
22use MediaWiki\HookContainer\HookContainer;
23use MediaWiki\HookContainer\HookRunner;
24use MediaWiki\Language\Language;
25use MediaWiki\MainConfigNames;
26use MediaWiki\Page\CacheKeyHelper;
27use MediaWiki\Page\PageIdentity;
28use MediaWiki\Page\WikiPageFactory;
29use MediaWiki\Permissions\Authority;
30use MediaWiki\Revision\SlotRecord;
31use MediaWiki\Revision\SlotRoleRegistry;
32use MediaWiki\Status\Status;
33use MediaWiki\Title\ForeignTitle;
34use MediaWiki\Title\ImportTitleFactory;
35use MediaWiki\Title\NaiveForeignTitleFactory;
36use MediaWiki\Title\NaiveImportTitleFactory;
37use MediaWiki\Title\NamespaceAwareForeignTitleFactory;
38use MediaWiki\Title\NamespaceImportTitleFactory;
39use MediaWiki\Title\NamespaceInfo;
40use MediaWiki\Title\SubpageImportTitleFactory;
41use MediaWiki\Title\Title;
42use MediaWiki\Title\TitleFactory;
43use MediaWiki\User\ExternalUserNames;
44use RuntimeException;
45use UnexpectedValueException;
46use Wikimedia\Message\MessageParam;
47use Wikimedia\Message\MessageSpecifier;
48use Wikimedia\NormalizedException\NormalizedException;
49use Wikimedia\Rdbms\IDBAccessObject;
50use XMLReader;
51
52/**
53 * XML file reader for the page data importer.
54 *
55 * implements Special:Import
56 * @ingroup SpecialPage
57 */
58class WikiImporter {
59    /** @var XMLReader|null */
60    private $reader;
61
62    /** @var string */
63    private $sourceAdapterId;
64
65    /** @var array|null */
66    private $foreignNamespaces = null;
67
68    /** @var callable|null */
69    private $mLogItemCallback;
70
71    /** @var callable */
72    private $mUploadCallback;
73
74    /** @var callable|null */
75    private $mRevisionCallback;
76
77    /** @var callable|null */
78    private $mPageCallback;
79
80    /** @var callable|null */
81    private $mSiteInfoCallback;
82
83    /** @var callable|null */
84    private $mPageOutCallback;
85
86    /** @var callable|null */
87    private $mNoticeCallback;
88
89    /** @var bool|null */
90    private $mDebug;
91
92    /** @var bool|null */
93    private $mImportUploads;
94
95    /** @var string|null */
96    private $mImageBasePath;
97
98    /** @var bool */
99    private $mNoUpdates = false;
100
101    /** @var int */
102    private $pageOffset = 0;
103
104    private ImportTitleFactory $importTitleFactory;
105    private ExternalUserNames $externalUserNames;
106
107    /** @var array */
108    private $countableCache = [];
109
110    /** @var bool */
111    private $disableStatisticsUpdate = false;
112
113    /**
114     * Authority used for permission checks only (to ensure that the user performing the import is
115     * allowed to edit the pages they're importing). To skip the checks, use UltimateAuthority.
116     *
117     * If you want to also log the import actions, see ImportReporter.
118     */
119    private Authority $performer;
120
121    private Config $config;
122    private HookRunner $hookRunner;
123    private Language $contentLanguage;
124    private NamespaceInfo $namespaceInfo;
125    private TitleFactory $titleFactory;
126    private WikiPageFactory $wikiPageFactory;
127    private UploadRevisionImporter $uploadRevisionImporter;
128    private IContentHandlerFactory $contentHandlerFactory;
129    private SlotRoleRegistry $slotRoleRegistry;
130
131    /**
132     * Creates an ImportXMLReader drawing from the source provided
133     */
134    public function __construct(
135        ImportSource $source,
136        Authority $performer,
137        Config $config,
138        HookContainer $hookContainer,
139        Language $contentLanguage,
140        NamespaceInfo $namespaceInfo,
141        TitleFactory $titleFactory,
142        WikiPageFactory $wikiPageFactory,
143        UploadRevisionImporter $uploadRevisionImporter,
144        IContentHandlerFactory $contentHandlerFactory,
145        SlotRoleRegistry $slotRoleRegistry
146    ) {
147        $this->performer = $performer;
148        $this->config = $config;
149        $this->hookRunner = new HookRunner( $hookContainer );
150        $this->contentLanguage = $contentLanguage;
151        $this->namespaceInfo = $namespaceInfo;
152        $this->titleFactory = $titleFactory;
153        $this->wikiPageFactory = $wikiPageFactory;
154        $this->uploadRevisionImporter = $uploadRevisionImporter;
155        $this->contentHandlerFactory = $contentHandlerFactory;
156        $this->slotRoleRegistry = $slotRoleRegistry;
157
158        if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
159            stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
160        }
161        $this->sourceAdapterId = UploadSourceAdapter::registerSource( $source );
162
163        $this->openReader();
164
165        // Default callbacks
166        $this->setPageCallback( $this->beforeImportPage( ... ) );
167        $this->setRevisionCallback( $this->importRevision( ... ) );
168        $this->setUploadCallback( $this->importUpload( ... ) );
169        $this->setLogItemCallback( $this->importLogItem( ... ) );
170        $this->setPageOutCallback( $this->finishImportPage( ... ) );
171
172        $this->importTitleFactory = new NaiveImportTitleFactory(
173            $this->contentLanguage,
174            $this->namespaceInfo,
175            $this->titleFactory
176        );
177        $this->externalUserNames = new ExternalUserNames( 'imported', false );
178    }
179
180    /**
181     * @return null|XMLReader
182     */
183    public function getReader() {
184        return $this->reader;
185    }
186
187    /**
188     * @param string $err
189     */
190    public function throwXmlError( $err ) {
191        $this->debug( "FAILURE: $err" );
192        wfDebug( "WikiImporter XML error: $err" );
193    }
194
195    /**
196     * @param string $data
197     */
198    public function debug( $data ) {
199        if ( $this->mDebug ) {
200            wfDebug( "IMPORT: $data" );
201        }
202    }
203
204    /**
205     * @param string $data
206     */
207    public function warn( $data ) {
208        wfDebug( "IMPORT: $data" );
209    }
210
211    /**
212     * @param string $msg
213     * @phpcs:ignore Generic.Files.LineLength
214     * @param MessageParam|MessageSpecifier|string|int|float|list<MessageParam|MessageSpecifier|string|int|float> ...$params
215     *   See Message::params()
216     */
217    public function notice( $msg, ...$params ) {
218        if ( is_callable( $this->mNoticeCallback ) ) {
219            ( $this->mNoticeCallback )( $msg, $params );
220        } else { # No ImportReporter -> CLI
221            // T177997: the command line importers should call setNoticeCallback()
222            // for their own custom callback to echo the notice
223            wfDebug( wfMessage( $msg, $params )->text() );
224        }
225    }
226
227    /**
228     * Set debug mode...
229     * @param bool $debug
230     */
231    public function setDebug( $debug ) {
232        $this->mDebug = $debug;
233    }
234
235    /**
236     * Set 'no updates' mode. In this mode, the link tables will not be updated by the importer
237     * @param bool $noupdates
238     */
239    public function setNoUpdates( $noupdates ) {
240        $this->mNoUpdates = $noupdates;
241    }
242
243    /**
244     * Sets 'pageOffset' value. So it will skip the first n-1 pages
245     * and start from the nth page. It's 1-based indexing.
246     * @param int $nthPage
247     * @since 1.29
248     */
249    public function setPageOffset( $nthPage ) {
250        $this->pageOffset = $nthPage;
251    }
252
253    /**
254     * Set a callback that displays notice messages
255     *
256     * @param callable $callback
257     * @return callable
258     */
259    public function setNoticeCallback( $callback ) {
260        return wfSetVar( $this->mNoticeCallback, $callback );
261    }
262
263    /**
264     * Sets the action to perform as each new page in the stream is reached.
265     * @param callable|null $callback
266     * @return callable|null
267     */
268    public function setPageCallback( $callback ) {
269        $previous = $this->mPageCallback;
270        $this->mPageCallback = $callback;
271        return $previous;
272    }
273
274    /**
275     * Sets the action to perform as each page in the stream is completed.
276     * Callback accepts the page title (as a Title object), a second object
277     * with the original title form (in case it's been overridden into a
278     * local namespace), and a count of revisions.
279     *
280     * @param callable|null $callback
281     * @return callable|null
282     */
283    public function setPageOutCallback( $callback ) {
284        $previous = $this->mPageOutCallback;
285        $this->mPageOutCallback = $callback;
286        return $previous;
287    }
288
289    /**
290     * Sets the action to perform as each page revision is reached.
291     * @param callable|null $callback
292     * @return callable|null
293     */
294    public function setRevisionCallback( $callback ) {
295        $previous = $this->mRevisionCallback;
296        $this->mRevisionCallback = $callback;
297        return $previous;
298    }
299
300    /**
301     * Sets the action to perform as each file upload version is reached.
302     * @param callable $callback
303     * @return callable
304     */
305    public function setUploadCallback( $callback ) {
306        $previous = $this->mUploadCallback;
307        $this->mUploadCallback = $callback;
308        return $previous;
309    }
310
311    /**
312     * Sets the action to perform as each log item reached.
313     * @param callable $callback
314     * @return callable
315     */
316    public function setLogItemCallback( $callback ) {
317        $previous = $this->mLogItemCallback;
318        $this->mLogItemCallback = $callback;
319        return $previous;
320    }
321
322    /**
323     * Sets the action to perform when site info is encountered
324     * @param callable $callback
325     * @return callable
326     */
327    public function setSiteInfoCallback( $callback ) {
328        $previous = $this->mSiteInfoCallback;
329        $this->mSiteInfoCallback = $callback;
330        return $previous;
331    }
332
333    /**
334     * Sets the factory object to use to convert ForeignTitle objects into local
335     * Title objects
336     * @param ImportTitleFactory $factory
337     */
338    public function setImportTitleFactory( $factory ) {
339        $this->importTitleFactory = $factory;
340    }
341
342    /**
343     * Set a target namespace to override the defaults
344     * @param null|int $namespace
345     * @return bool
346     */
347    public function setTargetNamespace( $namespace ) {
348        if ( $namespace === null ) {
349            // Don't override namespaces
350            $this->setImportTitleFactory(
351                new NaiveImportTitleFactory(
352                    $this->contentLanguage,
353                    $this->namespaceInfo,
354                    $this->titleFactory
355                )
356            );
357            return true;
358        } elseif (
359            $namespace >= 0 &&
360            $this->namespaceInfo->exists( intval( $namespace ) )
361        ) {
362            $namespace = intval( $namespace );
363            $this->setImportTitleFactory(
364                new NamespaceImportTitleFactory(
365                    $this->namespaceInfo,
366                    $this->titleFactory,
367                    $namespace
368                )
369            );
370            return true;
371        } else {
372            return false;
373        }
374    }
375
376    /**
377     * Set a target root page under which all pages are imported
378     * @param null|string $rootpage
379     * @return Status
380     */
381    public function setTargetRootPage( $rootpage ) {
382        $status = Status::newGood();
383        $nsInfo = $this->namespaceInfo;
384        if ( $rootpage === null ) {
385            // No rootpage
386            $this->setImportTitleFactory(
387                new NaiveImportTitleFactory(
388                    $this->contentLanguage,
389                    $nsInfo,
390                    $this->titleFactory
391                )
392            );
393        } elseif ( $rootpage !== '' ) {
394            $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
395            $title = Title::newFromText( $rootpage );
396
397            if ( !$title || $title->isExternal() ) {
398                $status->fatal( 'import-rootpage-invalid' );
399            } elseif ( !$nsInfo->hasSubpages( $title->getNamespace() ) ) {
400                $displayNSText = $title->getNamespace() === NS_MAIN
401                    ? wfMessage( 'blanknamespace' )->text()
402                    : $this->contentLanguage->getNsText( $title->getNamespace() );
403                $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
404            } else {
405                // set namespace to 'all', so the namespace check in processTitle() can pass
406                $this->setTargetNamespace( null );
407                $this->setImportTitleFactory(
408                    new SubpageImportTitleFactory(
409                        $nsInfo,
410                        $this->titleFactory,
411                        $title
412                    )
413                );
414            }
415        }
416        return $status;
417    }
418
419    /**
420     * @param string $dir
421     */
422    public function setImageBasePath( $dir ) {
423        $this->mImageBasePath = $dir;
424    }
425
426    /**
427     * @param bool $import
428     */
429    public function setImportUploads( $import ) {
430        $this->mImportUploads = $import;
431    }
432
433    /**
434     * @since 1.31
435     * @param string $usernamePrefix Prefix to apply to unknown (and possibly also known) usernames
436     * @param bool $assignKnownUsers Whether to apply the prefix to usernames that exist locally
437     */
438    public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
439        $this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
440    }
441
442    /**
443     * Statistics update can cause a lot of time
444     * @since 1.29
445     */
446    public function disableStatisticsUpdate() {
447        $this->disableStatisticsUpdate = true;
448    }
449
450    /**
451     * Default per-page callback. Sets up some things related to site statistics
452     * @param array $titleAndForeignTitle Two-element array, with Title object at
453     * index 0 and ForeignTitle object at index 1
454     * @return bool
455     */
456    public function beforeImportPage( $titleAndForeignTitle ) {
457        $title = $titleAndForeignTitle[0];
458        $page = $this->wikiPageFactory->newFromTitle( $title );
459        $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
460        return true;
461    }
462
463    /**
464     * Default per-revision callback, performs the import.
465     * @param WikiRevision $revision
466     * @return bool
467     */
468    public function importRevision( $revision ) {
469        if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
470            $this->notice( 'import-error-bad-location',
471                $revision->getTitle()->getPrefixedText(),
472                $revision->getID(),
473                $revision->getModel(),
474                $revision->getFormat()
475            );
476
477            return false;
478        }
479
480        try {
481            return $revision->importOldRevision();
482        } catch ( ContentSerializationException ) {
483            $this->notice( 'import-error-unserialize',
484                $revision->getTitle()->getPrefixedText(),
485                $revision->getID(),
486                $revision->getModel(),
487                $revision->getFormat()
488            );
489        }
490
491        return false;
492    }
493
494    /**
495     * Default per-revision callback, performs the import.
496     * @param WikiRevision $revision
497     * @return bool
498     */
499    public function importLogItem( $revision ) {
500        return $revision->importLogItem();
501    }
502
503    /**
504     * Dummy for now...
505     * @param WikiRevision $revision
506     * @return bool
507     */
508    public function importUpload( $revision ) {
509        $status = $this->uploadRevisionImporter->import( $revision );
510        return $status->isGood();
511    }
512
513    /**
514     * Mostly for hook use
515     * @param PageIdentity $pageIdentity
516     * @param ForeignTitle $foreignTitle
517     * @param int $revCount
518     * @param int $sRevCount
519     * @param array $pageInfo
520     * @return bool
521     */
522    public function finishImportPage( PageIdentity $pageIdentity, $foreignTitle, $revCount,
523        $sRevCount, $pageInfo
524    ) {
525        // Update article count statistics (T42009)
526        // The normal counting logic in WikiPage->doEditUpdates() is designed for
527        // one-revision-at-a-time editing, not bulk imports. In this situation it
528        // suffers from issues of replica DB lag. We let WikiPage handle the total page
529        // and revision count, and we implement our own custom logic for the
530        // article (content page) count.
531        if ( !$this->disableStatisticsUpdate ) {
532            $page = $this->wikiPageFactory->newFromTitle( $pageIdentity );
533
534            $page->loadPageData( IDBAccessObject::READ_LATEST );
535            $rev = $page->getRevisionRecord();
536            if ( $rev === null ) {
537
538                wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $pageIdentity .
539                    ' because WikiPage::getRevisionRecord() returned null' );
540            } else {
541                $update = $page->newPageUpdater( $this->performer )->prepareUpdate();
542                $countKey = 'title_' . CacheKeyHelper::getKeyForPage( $pageIdentity );
543                $countable = $update->isCountable();
544                if ( array_key_exists( $countKey, $this->countableCache ) &&
545                    $countable != $this->countableCache[$countKey] ) {
546                    DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
547                        'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
548                    ] ) );
549                }
550            }
551        }
552
553        $title = Title::newFromPageIdentity( $pageIdentity );
554        return $this->hookRunner->onAfterImportPage( $title, $foreignTitle,
555            $revCount, $sRevCount, $pageInfo );
556    }
557
558    /**
559     * Notify the callback function of site info
560     * @param array $siteInfo
561     * @return mixed|false
562     */
563    private function siteInfoCallback( $siteInfo ) {
564        if ( $this->mSiteInfoCallback ) {
565            return ( $this->mSiteInfoCallback )( $siteInfo, $this );
566        } else {
567            return false;
568        }
569    }
570
571    /**
572     * Notify the callback function when a new "<page>" is reached.
573     * @param array $title
574     */
575    public function pageCallback( $title ) {
576        if ( $this->mPageCallback ) {
577            ( $this->mPageCallback )( $title );
578        }
579    }
580
581    /**
582     * Notify the callback function when a "</page>" is closed.
583     * @param PageIdentity $pageIdentity
584     * @param ForeignTitle $foreignTitle
585     * @param int $revCount
586     * @param int $sucCount Number of revisions for which callback returned true
587     * @param array $pageInfo Associative array of page information
588     */
589    private function pageOutCallback( PageIdentity $pageIdentity, $foreignTitle, $revCount,
590            $sucCount, $pageInfo ) {
591        if ( $this->mPageOutCallback ) {
592            ( $this->mPageOutCallback )( $pageIdentity, $foreignTitle, $revCount, $sucCount, $pageInfo );
593        }
594    }
595
596    /**
597     * Notify the callback function of a revision
598     * @param WikiRevision $revision
599     * @return bool|mixed
600     */
601    private function revisionCallback( $revision ) {
602        if ( $this->mRevisionCallback ) {
603            return ( $this->mRevisionCallback )( $revision, $this );
604        } else {
605            return false;
606        }
607    }
608
609    /**
610     * Notify the callback function of a new log item
611     * @param WikiRevision $revision
612     * @return mixed|false
613     */
614    private function logItemCallback( $revision ) {
615        if ( $this->mLogItemCallback ) {
616            return ( $this->mLogItemCallback )( $revision, $this );
617        } else {
618            return false;
619        }
620    }
621
622    /**
623     * Retrieves the contents of the named attribute of the current element.
624     * @param string $attr The name of the attribute
625     * @return string The value of the attribute or an empty string if it is not set in the current
626     * element.
627     */
628    public function nodeAttribute( $attr ) {
629        return $this->reader->getAttribute( $attr ) ?? '';
630    }
631
632    /**
633     * Shouldn't something like this be built-in to XMLReader?
634     * Fetches text contents of the current element, assuming
635     * no sub-elements or such scary things.
636     * @return string
637     * @internal
638     */
639    public function nodeContents() {
640        if ( $this->reader->isEmptyElement ) {
641            return "";
642        }
643        $buffer = "";
644        while ( $this->reader->read() ) {
645            switch ( $this->reader->nodeType ) {
646                case XMLReader::TEXT:
647                case XMLReader::CDATA:
648                case XMLReader::SIGNIFICANT_WHITESPACE:
649                    $buffer .= $this->reader->value;
650                    break;
651                case XMLReader::END_ELEMENT:
652                    return $buffer;
653            }
654        }
655
656        $this->reader->close();
657        return '';
658    }
659
660    /**
661     * Primary entry point
662     * @throws \Exception
663     * @return bool
664     */
665    public function doImport() {
666        $this->syntaxCheckXML();
667
668        // Calls to reader->read need to be wrapped in calls to
669        // libxml_disable_entity_loader() to avoid local file
670        // inclusion attacks (T48932).
671        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
672        $oldDisable = @libxml_disable_entity_loader( true );
673        try {
674            $this->reader->read();
675
676            if ( $this->reader->localName != 'mediawiki' ) {
677                // phpcs:ignore Generic.PHP.NoSilencedErrors
678                @libxml_disable_entity_loader( $oldDisable );
679                $error = libxml_get_last_error();
680                if ( $error ) {
681                    throw new NormalizedException( "XML error at line {line}: {message}", [
682                        'line' => $error->line,
683                        'message' => $error->message,
684                    ] );
685                } else {
686                    throw new UnexpectedValueException(
687                        "Expected '<mediawiki>' tag, got '<{$this->reader->localName}>' tag."
688                    );
689                }
690            }
691            $this->debug( "<mediawiki> tag is correct." );
692
693            $this->debug( "Starting primary dump processing loop." );
694
695            $keepReading = $this->reader->read();
696            $skip = false;
697            $pageCount = 0;
698            while ( $keepReading ) {
699                $tag = $this->reader->localName;
700                if ( $this->pageOffset ) {
701                    if ( $tag === 'page' ) {
702                        $pageCount++;
703                    }
704                    if ( $pageCount < $this->pageOffset ) {
705                        $keepReading = $this->reader->next();
706                        continue;
707                    }
708                }
709                $type = $this->reader->nodeType;
710
711                if ( !$this->hookRunner->onImportHandleToplevelXMLTag( $this ) ) {
712                    // Do nothing
713                } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
714                    break;
715                } elseif ( $tag == 'siteinfo' ) {
716                    $this->handleSiteInfo();
717                } elseif ( $tag == 'page' ) {
718                    $this->handlePage();
719                } elseif ( $tag == 'logitem' ) {
720                    $this->handleLogItem();
721                } elseif ( $tag != '#text' ) {
722                    $this->warn( "Unhandled top-level XML tag $tag" );
723
724                    $skip = true;
725                }
726
727                if ( $skip ) {
728                    $keepReading = $this->reader->next();
729                    $skip = false;
730                    $this->debug( "Skip" );
731                } else {
732                    $keepReading = $this->reader->read();
733                }
734            }
735        } finally {
736            // phpcs:ignore Generic.PHP.NoSilencedErrors
737            @libxml_disable_entity_loader( $oldDisable );
738            $this->reader->close();
739        }
740
741        return true;
742    }
743
744    private function handleSiteInfo() {
745        $this->debug( "Enter site info handler." );
746        $siteInfo = [];
747
748        // Fields that can just be stuffed in the siteInfo object
749        $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
750
751        while ( $this->reader->read() ) {
752            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
753                    $this->reader->localName == 'siteinfo' ) {
754                break;
755            }
756
757            $tag = $this->reader->localName;
758
759            if ( $tag == 'namespace' ) {
760                $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
761                    $this->nodeContents();
762            } elseif ( in_array( $tag, $normalFields ) ) {
763                $siteInfo[$tag] = $this->nodeContents();
764            }
765        }
766
767        $siteInfo['_namespaces'] = $this->foreignNamespaces;
768        $this->siteInfoCallback( $siteInfo );
769    }
770
771    private function handleLogItem() {
772        $this->debug( "Enter log item handler." );
773        $logInfo = [];
774
775        // Fields that can just be stuffed in the pageInfo object
776        $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
777            'logtitle', 'params' ];
778
779        while ( $this->reader->read() ) {
780            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
781                    $this->reader->localName == 'logitem' ) {
782                break;
783            }
784
785            $tag = $this->reader->localName;
786
787            if ( !$this->hookRunner->onImportHandleLogItemXMLTag( $this, $logInfo ) ) {
788                // Do nothing
789            } elseif ( in_array( $tag, $normalFields ) ) {
790                $logInfo[$tag] = $this->nodeContents();
791            } elseif ( $tag == 'contributor' ) {
792                $logInfo['contributor'] = $this->handleContributor();
793            } elseif ( $tag != '#text' ) {
794                $this->warn( "Unhandled log-item XML tag $tag" );
795            }
796        }
797
798        $this->processLogItem( $logInfo );
799    }
800
801    /**
802     * @param array $logInfo
803     * @return mixed|false
804     */
805    private function processLogItem( $logInfo ) {
806        $revision = new WikiRevision();
807
808        if ( isset( $logInfo['id'] ) ) {
809            $revision->setID( $logInfo['id'] );
810        }
811        $revision->setType( $logInfo['type'] );
812        $revision->setAction( $logInfo['action'] );
813        if ( isset( $logInfo['timestamp'] ) ) {
814            $revision->setTimestamp( $logInfo['timestamp'] );
815        }
816        if ( isset( $logInfo['params'] ) ) {
817            $revision->setParams( $logInfo['params'] );
818        }
819        if ( isset( $logInfo['logtitle'] ) ) {
820            // @todo Using Title for non-local titles is a recipe for disaster.
821            // We should use ForeignTitle here instead.
822            $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
823        }
824
825        $revision->setNoUpdates( $this->mNoUpdates );
826
827        if ( isset( $logInfo['comment'] ) ) {
828            $revision->setComment( $logInfo['comment'] );
829        }
830
831        if ( isset( $logInfo['contributor']['username'] ) ) {
832            $revision->setUsername(
833                $this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
834            );
835        } elseif ( isset( $logInfo['contributor']['ip'] ) ) {
836            $revision->setUserIP( $logInfo['contributor']['ip'] );
837        } else {
838            $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
839        }
840
841        return $this->logItemCallback( $revision );
842    }
843
844    private function handlePage() {
845        // Handle page data.
846        $this->debug( "Enter page handler." );
847        $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
848
849        // Fields that can just be stuffed in the pageInfo object
850        $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
851
852        $skip = false;
853        $badTitle = false;
854
855        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
856            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
857                    $this->reader->localName == 'page' ) {
858                break;
859            }
860
861            $skip = false;
862
863            $tag = $this->reader->localName;
864
865            if ( $badTitle ) {
866                // The title is invalid, bail out of this page
867                $skip = true;
868            } elseif ( !$this->hookRunner->onImportHandlePageXMLTag( $this, $pageInfo ) ) {
869                // Do nothing
870            } elseif ( in_array( $tag, $normalFields ) ) {
871                // An XML snippet:
872                // <page>
873                //     <id>123</id>
874                //     <title>Page</title>
875                //     <redirect title="NewTitle"/>
876                //     ...
877                // Because the redirect tag is built differently, we need special handling for that case.
878                if ( $tag == 'redirect' ) {
879                    $pageInfo[$tag] = $this->nodeAttribute( 'title' );
880                } else {
881                    $pageInfo[$tag] = $this->nodeContents();
882                }
883            } elseif ( $tag == 'revision' || $tag == 'upload' ) {
884                if ( !isset( $title ) ) {
885                    $title = $this->processTitle( $pageInfo['title'],
886                        $pageInfo['ns'] ?? null );
887
888                    // $title is either an array of two titles or false.
889                    if ( is_array( $title ) ) {
890                        $this->pageCallback( $title );
891                        [ $pageInfo['_title'], $foreignTitle ] = $title;
892                    } else {
893                        $badTitle = true;
894                        $skip = true;
895                    }
896                }
897
898                if ( $title ) {
899                    if ( $tag == 'revision' ) {
900                        $this->handleRevision( $pageInfo );
901                    } else {
902                        $this->handleUpload( $pageInfo );
903                    }
904                }
905            } elseif ( $tag != '#text' ) {
906                $this->warn( "Unhandled page XML tag $tag" );
907                $skip = true;
908            }
909        }
910
911        // @note $pageInfo is only set if a valid $title is processed above with
912        //       no error. If we have a valid $title, then pageCallback is called
913        //       above, $pageInfo['title'] is set and we do pageOutCallback here.
914        //       If $pageInfo['_title'] is not set, then $foreignTitle is also not
915        //       set since they both come from $title above.
916        if ( array_key_exists( '_title', $pageInfo ) ) {
917            /** @var Title $title */
918            $title = $pageInfo['_title'];
919            $this->pageOutCallback(
920                $title,
921                // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Set together with _title key
922                $foreignTitle,
923                $pageInfo['revisionCount'],
924                $pageInfo['successfulRevisionCount'],
925                $pageInfo
926            );
927        }
928    }
929
930    /**
931     * @param array &$pageInfo
932     */
933    private function handleRevision( &$pageInfo ) {
934        $this->debug( "Enter revision handler" );
935        $revisionInfo = [];
936
937        $normalFields = [ 'id', 'parentid', 'timestamp', 'comment', 'minor', 'origin',
938            'model', 'format', 'text', 'sha1' ];
939
940        $skip = false;
941
942        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
943            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
944                    $this->reader->localName == 'revision' ) {
945                break;
946            }
947
948            $tag = $this->reader->localName;
949
950            if ( !$this->hookRunner->onImportHandleRevisionXMLTag(
951                $this, $pageInfo, $revisionInfo )
952            ) {
953                // Do nothing
954            } elseif ( in_array( $tag, $normalFields ) ) {
955                $revisionInfo[$tag] = $this->nodeContents();
956            } elseif ( $tag == 'content' ) {
957                // We can have multiple content tags, so make this an array.
958                $revisionInfo[$tag][] = $this->handleContent();
959            } elseif ( $tag == 'contributor' ) {
960                $revisionInfo['contributor'] = $this->handleContributor();
961            } elseif ( $tag != '#text' ) {
962                $this->warn( "Unhandled revision XML tag $tag" );
963                $skip = true;
964            }
965        }
966
967        $pageInfo['revisionCount']++;
968        if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
969            $pageInfo['successfulRevisionCount']++;
970        }
971    }
972
973    private function handleContent(): array {
974        $this->debug( "Enter content handler" );
975        $contentInfo = [];
976
977        $normalFields = [ 'role', 'origin', 'model', 'format', 'text' ];
978
979        $skip = false;
980
981        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
982            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
983                $this->reader->localName == 'content' ) {
984                break;
985            }
986
987            $tag = $this->reader->localName;
988
989            if ( !$this->hookRunner->onImportHandleContentXMLTag(
990                $this, $contentInfo )
991            ) {
992                // Do nothing
993            } elseif ( in_array( $tag, $normalFields ) ) {
994                $contentInfo[$tag] = $this->nodeContents();
995            } elseif ( $tag != '#text' ) {
996                $this->warn( "Unhandled content XML tag $tag" );
997                $skip = true;
998            }
999        }
1000
1001        return $contentInfo;
1002    }
1003
1004    /**
1005     * @param PageIdentity $page
1006     * @param int $revisionId
1007     * @param array $contentInfo
1008     *
1009     * @return Content
1010     */
1011    private function makeContent( PageIdentity $page, $revisionId, $contentInfo ) {
1012        $maxArticleSize = $this->config->get( MainConfigNames::MaxArticleSize );
1013
1014        if ( !isset( $contentInfo['text'] ) ) {
1015            throw new InvalidArgumentException( 'Missing text field in import.' );
1016        }
1017
1018        // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
1019        // database errors and instability. Testing for revisions with only listed
1020        // content models, as other content models might use serialization formats
1021        // which aren't checked against $wgMaxArticleSize.
1022        if ( ( !isset( $contentInfo['model'] ) ||
1023                in_array( $contentInfo['model'], [
1024                    'wikitext',
1025                    'css',
1026                    'json',
1027                    'javascript',
1028                    'text',
1029                    ''
1030                ] ) ) &&
1031            strlen( $contentInfo['text'] ) > $maxArticleSize * 1024
1032        ) {
1033            throw new RuntimeException( 'The text of ' .
1034                ( $revisionId ?
1035                    "the revision with ID $revisionId" :
1036                    'a revision'
1037                ) . " exceeds the maximum allowable size ({$maxArticleSize} KiB)" );
1038        }
1039
1040        $role = $contentInfo['role'] ?? SlotRecord::MAIN;
1041        $model = $contentInfo['model'] ?? $this->slotRoleRegistry
1042            ->getRoleHandler( $role )
1043            ->getDefaultModel( $page );
1044        $handler = $this->contentHandlerFactory->getContentHandler( $model );
1045
1046        $text = $handler->importTransform( $contentInfo['text'] );
1047
1048        return $handler->unserializeContent( $text );
1049    }
1050
1051    /**
1052     * @param array $pageInfo
1053     * @param array $revisionInfo
1054     * @return mixed|false
1055     */
1056    private function processRevision( $pageInfo, $revisionInfo ) {
1057        $revision = new WikiRevision();
1058
1059        $revId = $revisionInfo['id'] ?? 0;
1060        if ( $revId ) {
1061            $revision->setID( $revisionInfo['id'] );
1062        }
1063
1064        $title = $pageInfo['_title'];
1065        $revision->setTitle( $title );
1066
1067        $content = $this->makeContent( $title, $revId, $revisionInfo );
1068        $revision->setContent( SlotRecord::MAIN, $content );
1069
1070        foreach ( $revisionInfo['content'] ?? [] as $slotInfo ) {
1071            if ( !isset( $slotInfo['role'] ) ) {
1072                throw new RuntimeException( "Missing role for imported slot." );
1073            }
1074
1075            $content = $this->makeContent( $title, $revId, $slotInfo );
1076            $revision->setContent( $slotInfo['role'], $content );
1077        }
1078        $revision->setTimestamp( $revisionInfo['timestamp'] ?? wfTimestampNow() );
1079
1080        if ( isset( $revisionInfo['comment'] ) ) {
1081            $revision->setComment( $revisionInfo['comment'] );
1082        }
1083
1084        if ( isset( $revisionInfo['minor'] ) ) {
1085            $revision->setMinor( true );
1086        }
1087        if ( isset( $revisionInfo['contributor']['username'] ) ) {
1088            $revision->setUsername(
1089                $this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
1090            );
1091        } elseif ( isset( $revisionInfo['contributor']['ip'] ) ) {
1092            $revision->setUserIP( $revisionInfo['contributor']['ip'] );
1093        } else {
1094            $revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
1095        }
1096        if ( isset( $revisionInfo['sha1'] ) ) {
1097            $revision->setSha1Base36( $revisionInfo['sha1'] );
1098        }
1099        $revision->setNoUpdates( $this->mNoUpdates );
1100
1101        return $this->revisionCallback( $revision );
1102    }
1103
1104    /**
1105     * @param array &$pageInfo
1106     * @return mixed
1107     */
1108    private function handleUpload( &$pageInfo ) {
1109        $this->debug( "Enter upload handler" );
1110        $uploadInfo = [];
1111
1112        $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
1113            'src', 'size', 'sha1base36', 'archivename', 'rel' ];
1114
1115        $skip = false;
1116
1117        while ( $skip ? $this->reader->next() : $this->reader->read() ) {
1118            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1119                    $this->reader->localName == 'upload' ) {
1120                break;
1121            }
1122
1123            $tag = $this->reader->localName;
1124
1125            if ( !$this->hookRunner->onImportHandleUploadXMLTag( $this, $pageInfo ) ) {
1126                // Do nothing
1127            } elseif ( in_array( $tag, $normalFields ) ) {
1128                $uploadInfo[$tag] = $this->nodeContents();
1129            } elseif ( $tag == 'contributor' ) {
1130                $uploadInfo['contributor'] = $this->handleContributor();
1131            } elseif ( $tag == 'contents' ) {
1132                $contents = $this->nodeContents();
1133                $encoding = $this->reader->getAttribute( 'encoding' );
1134                if ( $encoding === 'base64' ) {
1135                    $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
1136                    $uploadInfo['isTempSrc'] = true;
1137                }
1138            } elseif ( $tag != '#text' ) {
1139                $this->warn( "Unhandled upload XML tag $tag" );
1140                $skip = true;
1141            }
1142        }
1143
1144        if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
1145            $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
1146            if ( file_exists( $path ) ) {
1147                $uploadInfo['fileSrc'] = $path;
1148                $uploadInfo['isTempSrc'] = false;
1149            }
1150        }
1151
1152        if ( $this->mImportUploads ) {
1153            return $this->processUpload( $pageInfo, $uploadInfo );
1154        }
1155    }
1156
1157    /**
1158     * @param string $contents
1159     * @return string
1160     */
1161    private function dumpTemp( $contents ) {
1162        $filename = tempnam( wfTempDir(), 'importupload' );
1163        file_put_contents( $filename, $contents );
1164        return $filename;
1165    }
1166
1167    /**
1168     * @param array $pageInfo
1169     * @param array $uploadInfo
1170     * @return mixed
1171     */
1172    private function processUpload( $pageInfo, $uploadInfo ) {
1173        $revision = new WikiRevision();
1174        $revId = $pageInfo['id'];
1175        $title = $pageInfo['_title'];
1176        // T292348: text key may be absent, force addition if null
1177        $uploadInfo['text'] ??= '';
1178        $content = $this->makeContent( $title, $revId, $uploadInfo );
1179
1180        $revision->setTitle( $title );
1181        $revision->setID( $revId );
1182        $revision->setTimestamp( $uploadInfo['timestamp'] );
1183        $revision->setContent( SlotRecord::MAIN, $content );
1184        $revision->setFilename( $uploadInfo['filename'] );
1185        if ( isset( $uploadInfo['archivename'] ) ) {
1186            $revision->setArchiveName( $uploadInfo['archivename'] );
1187        }
1188        $revision->setSrc( $uploadInfo['src'] );
1189        if ( isset( $uploadInfo['fileSrc'] ) ) {
1190            $revision->setFileSrc( $uploadInfo['fileSrc'],
1191                !empty( $uploadInfo['isTempSrc'] )
1192            );
1193        }
1194        if ( isset( $uploadInfo['sha1base36'] ) ) {
1195            $revision->setSha1Base36( $uploadInfo['sha1base36'] );
1196        }
1197        $revision->setSize( intval( $uploadInfo['size'] ) );
1198        $revision->setComment( $uploadInfo['comment'] );
1199
1200        if ( isset( $uploadInfo['contributor']['username'] ) ) {
1201            $revision->setUsername(
1202                $this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
1203            );
1204        } elseif ( isset( $uploadInfo['contributor']['ip'] ) ) {
1205            $revision->setUserIP( $uploadInfo['contributor']['ip'] );
1206        }
1207        $revision->setNoUpdates( $this->mNoUpdates );
1208
1209        return ( $this->mUploadCallback )( $revision );
1210    }
1211
1212    /**
1213     * @return array
1214     */
1215    private function handleContributor() {
1216        $this->debug( "Enter contributor handler." );
1217
1218        if ( $this->reader->isEmptyElement ) {
1219            return [];
1220        }
1221
1222        $fields = [ 'id', 'ip', 'username' ];
1223        $info = [];
1224
1225        while ( $this->reader->read() ) {
1226            if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1227                    $this->reader->localName == 'contributor' ) {
1228                break;
1229            }
1230
1231            $tag = $this->reader->localName;
1232
1233            if ( in_array( $tag, $fields ) ) {
1234                $info[$tag] = $this->nodeContents();
1235            }
1236        }
1237
1238        return $info;
1239    }
1240
1241    /**
1242     * @param string $text
1243     * @param string|null $ns
1244     * @return array|false
1245     */
1246    private function processTitle( $text, $ns = null ) {
1247        if ( $this->foreignNamespaces === null ) {
1248            $foreignTitleFactory = new NaiveForeignTitleFactory(
1249                $this->contentLanguage
1250            );
1251        } else {
1252            $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1253                $this->foreignNamespaces );
1254        }
1255
1256        $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1257            intval( $ns ) );
1258
1259        $title = $this->importTitleFactory->createTitleFromForeignTitle(
1260            $foreignTitle );
1261
1262        if ( $title === null ) {
1263            # Invalid page title? Ignore the page
1264            $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1265            return false;
1266        } elseif ( $title->isExternal() ) {
1267            $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1268            return false;
1269        } elseif ( !$title->canExist() ) {
1270            $this->notice( 'import-error-special', $title->getPrefixedText() );
1271            return false;
1272        } elseif ( !$this->performer->definitelyCan( 'edit', $title ) ) {
1273            # Do not import if the importing wiki user cannot edit this page
1274            $this->notice( 'import-error-edit', $title->getPrefixedText() );
1275            return false;
1276        }
1277
1278        return [ $title, $foreignTitle ];
1279    }
1280
1281    /**
1282     * Open the XMLReader connected to the source adapter id
1283     */
1284    private function openReader() {
1285        // Enable the entity loader, as it is needed for loading external URLs via
1286        // XMLReader::open (T86036)
1287        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
1288        $oldDisable = @libxml_disable_entity_loader( false );
1289
1290        // A static call, to avoid https://github.com/php/php-src/issues/11548
1291        $reader = XMLReader::open(
1292            'uploadsource://' . $this->sourceAdapterId, null, LIBXML_PARSEHUGE );
1293        if ( $reader instanceof XMLReader ) {
1294            $this->reader = $reader;
1295            $status = true;
1296        } else {
1297            $status = false;
1298        }
1299        if ( !$status ) {
1300            $error = libxml_get_last_error();
1301            // phpcs:ignore Generic.PHP.NoSilencedErrors
1302            @libxml_disable_entity_loader( $oldDisable );
1303            throw new RuntimeException(
1304                'Encountered an internal error while initializing WikiImporter object: ' . $error->message
1305            );
1306        }
1307        // phpcs:ignore Generic.PHP.NoSilencedErrors
1308        @libxml_disable_entity_loader( $oldDisable );
1309    }
1310
1311    /**
1312     * Check the syntax of the given xml
1313     */
1314    private function syntaxCheckXML() {
1315        if ( !UploadSourceAdapter::isSeekableSource( $this->sourceAdapterId ) ) {
1316            return;
1317        }
1318        // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
1319        $oldDisable = @libxml_disable_entity_loader( false );
1320        try {
1321            // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
1322            while ( @$this->reader->read() );
1323            $error = libxml_get_last_error();
1324            if ( $error ) {
1325                $errorMessage = 'XML error at line ' . $error->line . ': ' . $error->message;
1326                wfDebug( __METHOD__ . ': Invalid xml found - ' . $errorMessage );
1327                throw new RuntimeException( $errorMessage );
1328            }
1329        } finally {
1330            // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
1331            @libxml_disable_entity_loader( $oldDisable );
1332            $this->reader->close();
1333        }
1334
1335        // Reopen for the real import
1336        UploadSourceAdapter::seekSource( $this->sourceAdapterId, 0 );
1337        $this->openReader();
1338    }
1339}
1340
1341/** @deprecated class alias since 1.46 */
1342class_alias( WikiImporter::class, 'WikiImporter' );