MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
9namespace MediaWiki\Specials;
10
28use Wikimedia\Timestamp\TimestampFormat as TS;
29
37 protected bool $curonly;
38 protected bool $doExport;
39 protected int $pageLinkDepth;
40 protected bool $templates;
41
42 public function __construct(
43 private readonly IConnectionProvider $dbProvider,
44 private readonly WikiExporterFactory $wikiExporterFactory,
45 private readonly TitleFormatter $titleFormatter,
46 private readonly LinksMigration $linksMigration,
47 ) {
48 parent::__construct( 'Export' );
49 }
50
52 public function execute( $par ) {
53 $this->setHeaders();
54 $this->outputHeader();
55 $config = $this->getConfig();
56
57 $this->curonly = true;
58 $this->doExport = false;
59 $request = $this->getRequest();
60 $this->templates = $request->getCheck( 'templates' );
61 $this->pageLinkDepth = $this->validateLinkDepth(
62 $request->getIntOrNull( 'pagelink-depth' )
63 );
64 $nsindex = '';
65 $exportall = false;
66
67 if ( $request->getCheck( 'addcat' ) ) {
68 $page = $request->getText( 'pages' );
69 $catname = $request->getText( 'catname' );
70
71 if ( $catname !== '' && $catname !== null && $catname !== false ) {
72 $t = Title::makeTitleSafe( NS_MAIN, $catname );
73 if ( $t ) {
79 $catpages = $this->getPagesFromCategory( $t );
80 if ( $catpages ) {
81 if ( $page !== '' ) {
82 $page .= "\n";
83 }
84 $page .= implode( "\n", $catpages );
85 }
86 }
87 }
88 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
89 $page = $request->getText( 'pages' );
90 $nsindex = $request->getText( 'nsindex', '' );
91
92 if ( strval( $nsindex ) !== '' ) {
96 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
97 if ( $nspages ) {
98 $page .= "\n" . implode( "\n", $nspages );
99 }
100 }
101 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
102 $this->doExport = true;
103 $exportall = true;
104
105 /* Although $page and $history are not used later on, we
106 nevertheless set them to avoid that PHP notices about using
107 undefined variables foul up our XML output (see call to
108 doExport(...) further down) */
109 $page = '';
110 $history = '';
111 } elseif ( $request->wasPosted() && $par == '' ) {
112 // Log to see if certain parameters are actually used.
113 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
114 LoggerFactory::getInstance( 'export' )->debug(
115 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
116 'dir' => $request->getRawVal( 'dir' ),
117 'offset' => $request->getRawVal( 'offset' ),
118 'limit' => $request->getRawVal( 'limit' ),
119 ] );
120
121 $page = $request->getText( 'pages' );
122 $this->curonly = $request->getCheck( 'curonly' );
123 $rawOffset = $request->getVal( 'offset' );
124
125 if ( $rawOffset ) {
126 $offset = wfTimestamp( TS::MW, $rawOffset );
127 } else {
128 $offset = null;
129 }
130
131 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
132 $limit = $request->getInt( 'limit' );
133 $dir = $request->getVal( 'dir' );
134 $history = [
135 'dir' => 'asc',
136 'offset' => false,
137 'limit' => $maxHistory,
138 ];
139 $historyCheck = $request->getCheck( 'history' );
140
141 if ( $this->curonly ) {
142 $history = WikiExporter::CURRENT;
143 } elseif ( !$historyCheck ) {
144 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
145 $history['limit'] = $limit;
146 }
147
148 if ( $offset !== null ) {
149 $history['offset'] = $offset;
150 }
151
152 if ( strtolower( $dir ?? '' ) == 'desc' ) {
153 $history['dir'] = 'desc';
154 }
155 }
156
157 if ( $page != '' ) {
158 $this->doExport = true;
159 }
160 } else {
161 // Default to current-only for GET requests.
162 $page = $request->getText( 'pages', $par ?? '' );
163 $historyCheck = $request->getCheck( 'history' );
164
165 if ( $historyCheck ) {
166 $history = WikiExporter::FULL;
167 } else {
168 $history = WikiExporter::CURRENT;
169 }
170
171 if ( $page != '' ) {
172 $this->doExport = true;
173 }
174 }
175
176 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
177 // Override
178 $history = WikiExporter::CURRENT;
179 }
180
181 $list_authors = $request->getCheck( 'listauthors' );
182 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
183 $list_authors = false;
184 }
185
186 if ( $this->doExport ) {
187 $this->getOutput()->disable();
188
189 // Cancel output buffering and gzipping if set
190 // This should provide safer streaming for pages with history
192 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
193 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
194 ContentSecurityPolicy::sendRestrictiveHeader();
195
196 if ( $request->getCheck( 'wpDownload' ) ) {
197 // Provide a sensible filename suggestion
198 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
199 wfTimestampNow() . '.xml' );
200 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
201 }
202
203 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
204 $this->doExport( $page, $history, $list_authors, $exportall );
205
206 return;
207 }
208
209 $out = $this->getOutput();
210 $out->addWikiMsg( 'exporttext' );
211
212 if ( $page == '' ) {
213 $categoryName = $request->getText( 'catname' );
214 } else {
215 $categoryName = '';
216 }
217 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
218 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
219
220 $formDescriptor = [
221 'catname' => [
222 'type' => 'textwithbutton',
223 'name' => 'catname',
224 'horizontal-label' => true,
225 'label-message' => 'export-addcattext',
226 'default' => $categoryName,
227 'size' => 40,
228 'buttontype' => 'submit',
229 'buttonname' => 'addcat',
230 'buttondefault' => $this->msg( 'export-addcat' )->text(),
231 ] + $hideIf,
232 ];
233 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
234 $formDescriptor += [
235 'nsindex' => [
236 'type' => 'namespaceselectwithbutton',
237 'default' => $nsindex,
238 'label-message' => 'export-addnstext',
239 'horizontal-label' => true,
240 'name' => 'nsindex',
241 'id' => 'namespace',
242 'cssclass' => 'namespaceselector',
243 'buttontype' => 'submit',
244 'buttonname' => 'addns',
245 'buttondefault' => $this->msg( 'export-addns' )->text(),
246 ] + $hideIf,
247 ];
248 }
249
250 if ( $canExportAll ) {
251 $formDescriptor += [
252 'exportall' => [
253 'type' => 'check',
254 'label-message' => 'exportall',
255 'name' => 'exportall',
256 'id' => 'exportall',
257 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
258 ],
259 ];
260 }
261
262 $formDescriptor += [
263 'textarea' => [
264 'class' => HTMLTextAreaField::class,
265 'name' => 'pages',
266 'label-message' => 'export-manual',
267 'nodata' => true,
268 'rows' => 10,
269 'default' => $page,
270 ] + $hideIf,
271 ];
272
273 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
274 $formDescriptor += [
275 'curonly' => [
276 'type' => 'check',
277 'label-message' => 'exportcuronly',
278 'name' => 'curonly',
279 'id' => 'curonly',
280 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
281 ],
282 ];
283 } else {
284 $out->addWikiMsg( 'exportnohistory' );
285 }
286
287 $formDescriptor += [
288 'templates' => [
289 'type' => 'check',
290 'label-message' => 'export-templates',
291 'name' => 'templates',
292 'id' => 'wpExportTemplates',
293 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
294 ],
295 ];
296
297 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
298 $formDescriptor += [
299 'pagelink-depth' => [
300 'type' => 'text',
301 'name' => 'pagelink-depth',
302 'id' => 'pagelink-depth',
303 'label-message' => 'export-pagelinks',
304 'default' => '0',
305 'size' => 20,
306 ],
307 ];
308 }
309
310 $formDescriptor += [
311 'wpDownload' => [
312 'type' => 'check',
313 'name' => 'wpDownload',
314 'id' => 'wpDownload',
315 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
316 'label-message' => 'export-download',
317 ],
318 ];
319
320 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
321 $formDescriptor += [
322 'listauthors' => [
323 'type' => 'check',
324 'label-message' => 'exportlistauthors',
325 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
326 'name' => 'listauthors',
327 'id' => 'listauthors',
328 ],
329 ];
330 }
331
332 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
333 $htmlForm->setSubmitTextMsg( 'export-submit' );
334 $htmlForm->prepareForm()->displayForm( false );
335 $this->addHelpLink( 'Help:Export' );
336 }
337
341 protected function userCanOverrideExportDepth() {
342 return $this->getAuthority()->isAllowed( 'override-export-depth' );
343 }
344
354 protected function doExport( $page, $history, $list_authors, $exportall ) {
355 // If we are grabbing everything, enable full history and ignore the rest
356 if ( $exportall ) {
357 $history = WikiExporter::FULL;
358 } else {
359 $pageSet = []; // Inverted index of all pages to look up
360
361 // Split up and normalize input
362 foreach ( explode( "\n", $page ) as $pageName ) {
363 $pageName = trim( $pageName );
364 $title = Title::newFromText( $pageName );
365 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
366 // Only record each page once!
367 $pageSet[$title->getPrefixedText()] = true;
368 }
369 }
370
371 // Set of original pages to pass on to further manipulation...
372 $inputPages = array_keys( $pageSet );
373
374 // Look up any linked pages if asked...
375 if ( $this->templates ) {
376 $pageSet = $this->getTemplates( $inputPages, $pageSet );
377 }
378 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
379 $linkDepth = $this->pageLinkDepth;
380 if ( $linkDepth ) {
381 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
382 }
383
384 $pages = array_keys( $pageSet );
385
386 // Normalize titles to the same format and remove dupes, see T19374
387 foreach ( $pages as $k => $v ) {
388 $pages[$k] = str_replace( ' ', '_', $v );
389 }
390
391 $pages = array_unique( $pages );
392 }
393
394 /* Ok, let's get to it... */
395 $db = $this->dbProvider->getReplicaDatabase();
396
397 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
398 $exporter->list_authors = $list_authors;
399 $exporter->openStream();
400
401 if ( $exportall ) {
402 $exporter->allPages();
403 } else {
404 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
405 foreach ( $pages as $page ) {
406 # T10824: Only export pages the user can read
407 $title = Title::newFromText( $page );
408 if ( $title === null ) {
409 // @todo Perhaps output an <error> tag or something.
410 continue;
411 }
412
413 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
414 // @todo Perhaps output an <error> tag or something.
415 continue;
416 }
417
418 $exporter->pageByTitle( $title );
419 }
420 }
421
422 $exporter->closeStream();
423 }
424
429 protected function getPagesFromCategory( PageIdentity $page ) {
430 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
431
432 $name = $page->getDBkey();
433
434 $dbr = $this->dbProvider->getReplicaDatabase( CategoryLinksTable::VIRTUAL_DOMAIN );
435 $res = $dbr->newSelectQueryBuilder()
436 ->select( [ 'page_namespace', 'page_title' ] )
437 ->from( 'page' )
438 ->join( 'categorylinks', null, 'cl_from=page_id' )
439 ->join( 'linktarget', null, 'cl_target_id = lt_id' )
440 ->where( [ 'lt_title' => $name, 'lt_namespace' => NS_CATEGORY ] )
441 ->limit( $maxPages )
442 ->caller( __METHOD__ )
443 ->fetchResultSet();
444
445 $pages = [];
446
447 foreach ( $res as $row ) {
448 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
449 }
450
451 return $pages;
452 }
453
458 protected function getPagesFromNamespace( $nsindex ) {
459 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
460
461 $dbr = $this->dbProvider->getReplicaDatabase();
462 $res = $dbr->newSelectQueryBuilder()
463 ->select( [ 'page_namespace', 'page_title' ] )
464 ->from( 'page' )
465 ->where( [ 'page_namespace' => $nsindex ] )
466 ->limit( $maxPages )
467 ->caller( __METHOD__ )->fetchResultSet();
468
469 $pages = [];
470
471 foreach ( $res as $row ) {
472 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
473 }
474
475 return $pages;
476 }
477
484 protected function getTemplates( $inputPages, $pageSet ) {
485 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
486 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
487 $dbr = $this->dbProvider->getReplicaDatabase( TemplateLinksTable::VIRTUAL_DOMAIN );
488 $queryBuilder = $dbr->newSelectQueryBuilder()
489 ->caller( __METHOD__ )
490 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
491 ->from( 'page' )
492 ->join( 'templatelinks', null, 'page_id=tl_from' )
493 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
494 ->joinConds( $queryInfo['joins'] );
495 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
496 }
497
504 private function getExtraPages( $inputPages, $pageSet ) {
505 $extraPages = [];
506 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
507 foreach ( $extraPages as $extraPage ) {
508 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
509 }
510 return $pageSet;
511 }
512
518 protected function validateLinkDepth( $depth ) {
519 if ( $depth === null || $depth < 0 ) {
520 return 0;
521 }
522
523 if ( !$this->userCanOverrideExportDepth() ) {
524 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
525 if ( $depth > $maxLinkDepth ) {
526 return $maxLinkDepth;
527 }
528 }
529
530 /*
531 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
532 * crazy-big export from being done by someone setting the depth
533 * number too high. In other words, last resort safety net.
534 */
535
536 return intval( min( $depth, 5 ) );
537 }
538
546 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
547 for ( ; $depth > 0; --$depth ) {
548 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
549 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
550 $dbr = $this->dbProvider->getReplicaDatabase( PageLinksTable::VIRTUAL_DOMAIN );
551 $queryBuilder = $dbr->newSelectQueryBuilder()
552 ->caller( __METHOD__ )
553 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
554 ->from( 'page' )
555 ->join( 'pagelinks', null, 'page_id=pl_from' )
556 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
557 ->joinConds( $queryInfo['joins'] );
558 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
559 $inputPages = array_keys( $pageSet );
560 }
561
562 return $pageSet;
563 }
564
572 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
573 foreach ( $inputPages as $page ) {
574 $title = Title::newFromText( $page );
575 if ( $title ) {
576 $pageSet[$title->getPrefixedText()] = true;
579 $result = ( clone $queryBuilder )
580 ->where( [
581 'page_namespace' => $title->getNamespace(),
582 'page_title' => $title->getDBkey()
583 ] )
584 ->fetchResultSet();
585
586 foreach ( $result as $row ) {
587 $template = Title::makeTitle( $row->namespace, $row->title );
588 $pageSet[$template->getPrefixedText()] = true;
589 }
590 }
591 }
592
593 return $pageSet;
594 }
595
597 protected function getGroupName() {
598 return 'pagetools';
599 }
600}
601
603class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:51
const NS_CATEGORY
Definition Defines.php:65
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
makeTitle( $linkId)
Convert a link ID to a Title.to override Title
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:207
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Handle sending Content-Security-Policy headers.
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.This must be overridden by subclasses; it will be made...
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
__construct(private readonly IConnectionProvider $dbProvider, private readonly WikiExporterFactory $wikiExporterFactory, private readonly TitleFormatter $titleFormatter, private readonly LinksMigration $linksMigration,)
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
getPagesFromCategory(PageIdentity $page)
validateLinkDepth( $depth)
Validate link depth setting, if available.
A title formatter service for MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:69
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getDBkey()
Get the page title in DB key form.
Provide primary and replica IDatabase connections.