MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Specials;
24
35use WikiExporter;
38
46 protected bool $curonly;
47 protected bool $doExport;
48 protected int $pageLinkDepth;
49 protected bool $templates;
50
51 private IConnectionProvider $dbProvider;
52 private WikiExporterFactory $wikiExporterFactory;
53 private TitleFormatter $titleFormatter;
54 private LinksMigration $linksMigration;
55
56 public function __construct(
57 IConnectionProvider $dbProvider,
58 WikiExporterFactory $wikiExporterFactory,
59 TitleFormatter $titleFormatter,
60 LinksMigration $linksMigration
61 ) {
62 parent::__construct( 'Export' );
63 $this->dbProvider = $dbProvider;
64 $this->wikiExporterFactory = $wikiExporterFactory;
65 $this->titleFormatter = $titleFormatter;
66 $this->linksMigration = $linksMigration;
67 }
68
69 public function execute( $par ) {
70 $this->setHeaders();
71 $this->outputHeader();
72 $config = $this->getConfig();
73
74 $this->curonly = true;
75 $this->doExport = false;
76 $request = $this->getRequest();
77 $this->templates = $request->getCheck( 'templates' );
78 $this->pageLinkDepth = $this->validateLinkDepth(
79 $request->getIntOrNull( 'pagelink-depth' )
80 );
81 $nsindex = '';
82 $exportall = false;
83
84 if ( $request->getCheck( 'addcat' ) ) {
85 $page = $request->getText( 'pages' );
86 $catname = $request->getText( 'catname' );
87
88 if ( $catname !== '' && $catname !== null && $catname !== false ) {
89 $t = Title::makeTitleSafe( NS_MAIN, $catname );
90 if ( $t ) {
96 $catpages = $this->getPagesFromCategory( $t );
97 if ( $catpages ) {
98 if ( $page !== '' ) {
99 $page .= "\n";
100 }
101 $page .= implode( "\n", $catpages );
102 }
103 }
104 }
105 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
106 $page = $request->getText( 'pages' );
107 $nsindex = $request->getText( 'nsindex', '' );
108
109 if ( strval( $nsindex ) !== '' ) {
113 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
114 if ( $nspages ) {
115 $page .= "\n" . implode( "\n", $nspages );
116 }
117 }
118 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
119 $this->doExport = true;
120 $exportall = true;
121
122 /* Although $page and $history are not used later on, we
123 nevertheless set them to avoid that PHP notices about using
124 undefined variables foul up our XML output (see call to
125 doExport(...) further down) */
126 $page = '';
127 $history = '';
128 } elseif ( $request->wasPosted() && $par == '' ) {
129 // Log to see if certain parameters are actually used.
130 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
131 LoggerFactory::getInstance( 'export' )->debug(
132 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
133 'dir' => $request->getRawVal( 'dir' ),
134 'offset' => $request->getRawVal( 'offset' ),
135 'limit' => $request->getRawVal( 'limit' ),
136 ] );
137
138 $page = $request->getText( 'pages' );
139 $this->curonly = $request->getCheck( 'curonly' );
140 $rawOffset = $request->getVal( 'offset' );
141
142 if ( $rawOffset ) {
143 $offset = wfTimestamp( TS_MW, $rawOffset );
144 } else {
145 $offset = null;
146 }
147
148 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
149 $limit = $request->getInt( 'limit' );
150 $dir = $request->getVal( 'dir' );
151 $history = [
152 'dir' => 'asc',
153 'offset' => false,
154 'limit' => $maxHistory,
155 ];
156 $historyCheck = $request->getCheck( 'history' );
157
158 if ( $this->curonly ) {
159 $history = WikiExporter::CURRENT;
160 } elseif ( !$historyCheck ) {
161 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
162 $history['limit'] = $limit;
163 }
164
165 if ( $offset !== null ) {
166 $history['offset'] = $offset;
167 }
168
169 if ( strtolower( $dir ?? '' ) == 'desc' ) {
170 $history['dir'] = 'desc';
171 }
172 }
173
174 if ( $page != '' ) {
175 $this->doExport = true;
176 }
177 } else {
178 // Default to current-only for GET requests.
179 $page = $request->getText( 'pages', $par ?? '' );
180 $historyCheck = $request->getCheck( 'history' );
181
182 if ( $historyCheck ) {
183 $history = WikiExporter::FULL;
184 } else {
185 $history = WikiExporter::CURRENT;
186 }
187
188 if ( $page != '' ) {
189 $this->doExport = true;
190 }
191 }
192
193 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
194 // Override
195 $history = WikiExporter::CURRENT;
196 }
197
198 $list_authors = $request->getCheck( 'listauthors' );
199 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
200 $list_authors = false;
201 }
202
203 if ( $this->doExport ) {
204 $this->getOutput()->disable();
205
206 // Cancel output buffering and gzipping if set
207 // This should provide safer streaming for pages with history
209 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
210 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
211
212 if ( $request->getCheck( 'wpDownload' ) ) {
213 // Provide a sensible filename suggestion
214 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
215 wfTimestampNow() . '.xml' );
216 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
217 }
218
219 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
220 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
221 $this->doExport( $page, $history, $list_authors, $exportall );
222
223 return;
224 }
225
226 $out = $this->getOutput();
227 $out->addWikiMsg( 'exporttext' );
228
229 if ( $page == '' ) {
230 $categoryName = $request->getText( 'catname' );
231 } else {
232 $categoryName = '';
233 }
234 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
235 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
236
237 $formDescriptor = [
238 'catname' => [
239 'type' => 'textwithbutton',
240 'name' => 'catname',
241 'horizontal-label' => true,
242 'label-message' => 'export-addcattext',
243 'default' => $categoryName,
244 'size' => 40,
245 'buttontype' => 'submit',
246 'buttonname' => 'addcat',
247 'buttondefault' => $this->msg( 'export-addcat' )->text(),
248 ] + $hideIf,
249 ];
250 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
251 $formDescriptor += [
252 'nsindex' => [
253 'type' => 'namespaceselectwithbutton',
254 'default' => $nsindex,
255 'label-message' => 'export-addnstext',
256 'horizontal-label' => true,
257 'name' => 'nsindex',
258 'id' => 'namespace',
259 'cssclass' => 'namespaceselector',
260 'buttontype' => 'submit',
261 'buttonname' => 'addns',
262 'buttondefault' => $this->msg( 'export-addns' )->text(),
263 ] + $hideIf,
264 ];
265 }
266
267 if ( $canExportAll ) {
268 $formDescriptor += [
269 'exportall' => [
270 'type' => 'check',
271 'label-message' => 'exportall',
272 'name' => 'exportall',
273 'id' => 'exportall',
274 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
275 ],
276 ];
277 }
278
279 $formDescriptor += [
280 'textarea' => [
281 'class' => HTMLTextAreaField::class,
282 'name' => 'pages',
283 'label-message' => 'export-manual',
284 'nodata' => true,
285 'rows' => 10,
286 'default' => $page,
287 ] + $hideIf,
288 ];
289
290 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
291 $formDescriptor += [
292 'curonly' => [
293 'type' => 'check',
294 'label-message' => 'exportcuronly',
295 'name' => 'curonly',
296 'id' => 'curonly',
297 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
298 ],
299 ];
300 } else {
301 $out->addWikiMsg( 'exportnohistory' );
302 }
303
304 $formDescriptor += [
305 'templates' => [
306 'type' => 'check',
307 'label-message' => 'export-templates',
308 'name' => 'templates',
309 'id' => 'wpExportTemplates',
310 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
311 ],
312 ];
313
314 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
315 $formDescriptor += [
316 'pagelink-depth' => [
317 'type' => 'text',
318 'name' => 'pagelink-depth',
319 'id' => 'pagelink-depth',
320 'label-message' => 'export-pagelinks',
321 'default' => '0',
322 'size' => 20,
323 ],
324 ];
325 }
326
327 $formDescriptor += [
328 'wpDownload' => [
329 'type' => 'check',
330 'name' => 'wpDownload',
331 'id' => 'wpDownload',
332 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
333 'label-message' => 'export-download',
334 ],
335 ];
336
337 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
338 $formDescriptor += [
339 'listauthors' => [
340 'type' => 'check',
341 'label-message' => 'exportlistauthors',
342 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
343 'name' => 'listauthors',
344 'id' => 'listauthors',
345 ],
346 ];
347 }
348
349 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
350 $htmlForm->setSubmitTextMsg( 'export-submit' );
351 $htmlForm->prepareForm()->displayForm( false );
352 $this->addHelpLink( 'Help:Export' );
353 }
354
358 protected function userCanOverrideExportDepth() {
359 return $this->getAuthority()->isAllowed( 'override-export-depth' );
360 }
361
371 protected function doExport( $page, $history, $list_authors, $exportall ) {
372 // If we are grabbing everything, enable full history and ignore the rest
373 if ( $exportall ) {
374 $history = WikiExporter::FULL;
375 } else {
376 $pageSet = []; // Inverted index of all pages to look up
377
378 // Split up and normalize input
379 foreach ( explode( "\n", $page ) as $pageName ) {
380 $pageName = trim( $pageName );
381 $title = Title::newFromText( $pageName );
382 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
383 // Only record each page once!
384 $pageSet[$title->getPrefixedText()] = true;
385 }
386 }
387
388 // Set of original pages to pass on to further manipulation...
389 $inputPages = array_keys( $pageSet );
390
391 // Look up any linked pages if asked...
392 if ( $this->templates ) {
393 $pageSet = $this->getTemplates( $inputPages, $pageSet );
394 }
395 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
396 $linkDepth = $this->pageLinkDepth;
397 if ( $linkDepth ) {
398 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
399 }
400
401 $pages = array_keys( $pageSet );
402
403 // Normalize titles to the same format and remove dupes, see T19374
404 foreach ( $pages as $k => $v ) {
405 $pages[$k] = str_replace( ' ', '_', $v );
406 }
407
408 $pages = array_unique( $pages );
409 }
410
411 /* Ok, let's get to it... */
412 $db = $this->dbProvider->getReplicaDatabase();
413
414 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
415 $exporter->list_authors = $list_authors;
416 $exporter->openStream();
417
418 if ( $exportall ) {
419 $exporter->allPages();
420 } else {
421 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
422 foreach ( $pages as $page ) {
423 # T10824: Only export pages the user can read
424 $title = Title::newFromText( $page );
425 if ( $title === null ) {
426 // @todo Perhaps output an <error> tag or something.
427 continue;
428 }
429
430 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
431 // @todo Perhaps output an <error> tag or something.
432 continue;
433 }
434
435 $exporter->pageByTitle( $title );
436 }
437 }
438
439 $exporter->closeStream();
440 }
441
446 protected function getPagesFromCategory( PageIdentity $page ) {
447 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
448 $categoryLinksMigrationStage = $this->getConfig()->get( MainConfigNames::CategoryLinksSchemaMigrationStage );
449
450 $name = $page->getDBkey();
451
452 $dbr = $this->dbProvider->getReplicaDatabase();
453 $queryBuilder = $dbr->newSelectQueryBuilder()
454 ->select( [ 'page_namespace', 'page_title' ] )
455 ->from( 'page' )
456 ->join( 'categorylinks', null, 'cl_from=page_id' )
457 ->limit( $maxPages );
458 if ( $categoryLinksMigrationStage & SCHEMA_COMPAT_READ_OLD ) {
459 $queryBuilder->where( [ 'cl_to' => $name ] );
460 } else {
461 $queryBuilder->join( 'linktarget', null, 'cl_target_id = lt_id' )
462 ->where( [ 'lt_title' => $name, 'lt_namespace' => NS_CATEGORY ] );
463 }
464 $res = $queryBuilder->caller( __METHOD__ )->fetchResultSet();
465
466 $pages = [];
467
468 foreach ( $res as $row ) {
469 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470 }
471
472 return $pages;
473 }
474
479 protected function getPagesFromNamespace( $nsindex ) {
480 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481
482 $dbr = $this->dbProvider->getReplicaDatabase();
483 $res = $dbr->newSelectQueryBuilder()
484 ->select( [ 'page_namespace', 'page_title' ] )
485 ->from( 'page' )
486 ->where( [ 'page_namespace' => $nsindex ] )
487 ->limit( $maxPages )
488 ->caller( __METHOD__ )->fetchResultSet();
489
490 $pages = [];
491
492 foreach ( $res as $row ) {
493 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
494 }
495
496 return $pages;
497 }
498
505 protected function getTemplates( $inputPages, $pageSet ) {
506 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
507 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
508 $dbr = $this->dbProvider->getReplicaDatabase();
509 $queryBuilder = $dbr->newSelectQueryBuilder()
510 ->caller( __METHOD__ )
511 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
512 ->from( 'page' )
513 ->join( 'templatelinks', null, 'page_id=tl_from' )
514 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
515 ->joinConds( $queryInfo['joins'] );
516 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
517 }
518
525 private function getExtraPages( $inputPages, $pageSet ) {
526 $extraPages = [];
527 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
528 foreach ( $extraPages as $extraPage ) {
529 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
530 }
531 return $pageSet;
532 }
533
539 protected function validateLinkDepth( $depth ) {
540 if ( $depth === null || $depth < 0 ) {
541 return 0;
542 }
543
544 if ( !$this->userCanOverrideExportDepth() ) {
545 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
546 if ( $depth > $maxLinkDepth ) {
547 return $maxLinkDepth;
548 }
549 }
550
551 /*
552 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
553 * crazy-big export from being done by someone setting the depth
554 * number too high. In other words, last resort safety net.
555 */
556
557 return intval( min( $depth, 5 ) );
558 }
559
567 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
568 for ( ; $depth > 0; --$depth ) {
569 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
570 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
571 $dbr = $this->dbProvider->getReplicaDatabase();
572 $queryBuilder = $dbr->newSelectQueryBuilder()
573 ->caller( __METHOD__ )
574 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
575 ->from( 'page' )
576 ->join( 'pagelinks', null, 'page_id=pl_from' )
577 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
578 ->joinConds( $queryInfo['joins'] );
579 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
580 $inputPages = array_keys( $pageSet );
581 }
582
583 return $pageSet;
584 }
585
593 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
594 foreach ( $inputPages as $page ) {
595 $title = Title::newFromText( $page );
596 if ( $title ) {
597 $pageSet[$title->getPrefixedText()] = true;
600 $result = ( clone $queryBuilder )
601 ->where( [
602 'page_namespace' => $title->getNamespace(),
603 'page_title' => $title->getDBkey()
604 ] )
605 ->fetchResultSet();
606
607 foreach ( $result as $row ) {
608 $template = Title::makeTitle( $row->namespace, $row->title );
609 $pageSet[$template->getPrefixedText()] = true;
610 }
611 }
612 }
613
614 return $pageSet;
615 }
616
617 protected function getGroupName() {
618 return 'pagetools';
619 }
620}
621
623class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:65
const SCHEMA_COMPAT_READ_OLD
Definition Defines.php:304
const NS_CATEGORY
Definition Defines.php:79
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:210
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const CategoryLinksSchemaMigrationStage
Name constant for the CategoryLinksSchemaMigrationStage setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
getPagesFromCategory(PageIdentity $page)
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
A title formatter service for MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:78
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getDBkey()
Get the page title in DB key form.
Provide primary and replica IDatabase connections.