MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
9namespace MediaWiki\Specials;
10
25use WikiExporter;
28
36 protected bool $curonly;
37 protected bool $doExport;
38 protected int $pageLinkDepth;
39 protected bool $templates;
40
41 private IConnectionProvider $dbProvider;
42 private WikiExporterFactory $wikiExporterFactory;
43 private TitleFormatter $titleFormatter;
44 private LinksMigration $linksMigration;
45
46 public function __construct(
47 IConnectionProvider $dbProvider,
48 WikiExporterFactory $wikiExporterFactory,
49 TitleFormatter $titleFormatter,
50 LinksMigration $linksMigration
51 ) {
52 parent::__construct( 'Export' );
53 $this->dbProvider = $dbProvider;
54 $this->wikiExporterFactory = $wikiExporterFactory;
55 $this->titleFormatter = $titleFormatter;
56 $this->linksMigration = $linksMigration;
57 }
58
60 public function execute( $par ) {
61 $this->setHeaders();
62 $this->outputHeader();
63 $config = $this->getConfig();
64
65 $this->curonly = true;
66 $this->doExport = false;
67 $request = $this->getRequest();
68 $this->templates = $request->getCheck( 'templates' );
69 $this->pageLinkDepth = $this->validateLinkDepth(
70 $request->getIntOrNull( 'pagelink-depth' )
71 );
72 $nsindex = '';
73 $exportall = false;
74
75 if ( $request->getCheck( 'addcat' ) ) {
76 $page = $request->getText( 'pages' );
77 $catname = $request->getText( 'catname' );
78
79 if ( $catname !== '' && $catname !== null && $catname !== false ) {
80 $t = Title::makeTitleSafe( NS_MAIN, $catname );
81 if ( $t ) {
87 $catpages = $this->getPagesFromCategory( $t );
88 if ( $catpages ) {
89 if ( $page !== '' ) {
90 $page .= "\n";
91 }
92 $page .= implode( "\n", $catpages );
93 }
94 }
95 }
96 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
97 $page = $request->getText( 'pages' );
98 $nsindex = $request->getText( 'nsindex', '' );
99
100 if ( strval( $nsindex ) !== '' ) {
104 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
105 if ( $nspages ) {
106 $page .= "\n" . implode( "\n", $nspages );
107 }
108 }
109 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
110 $this->doExport = true;
111 $exportall = true;
112
113 /* Although $page and $history are not used later on, we
114 nevertheless set them to avoid that PHP notices about using
115 undefined variables foul up our XML output (see call to
116 doExport(...) further down) */
117 $page = '';
118 $history = '';
119 } elseif ( $request->wasPosted() && $par == '' ) {
120 // Log to see if certain parameters are actually used.
121 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
122 LoggerFactory::getInstance( 'export' )->debug(
123 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
124 'dir' => $request->getRawVal( 'dir' ),
125 'offset' => $request->getRawVal( 'offset' ),
126 'limit' => $request->getRawVal( 'limit' ),
127 ] );
128
129 $page = $request->getText( 'pages' );
130 $this->curonly = $request->getCheck( 'curonly' );
131 $rawOffset = $request->getVal( 'offset' );
132
133 if ( $rawOffset ) {
134 $offset = wfTimestamp( TS_MW, $rawOffset );
135 } else {
136 $offset = null;
137 }
138
139 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
140 $limit = $request->getInt( 'limit' );
141 $dir = $request->getVal( 'dir' );
142 $history = [
143 'dir' => 'asc',
144 'offset' => false,
145 'limit' => $maxHistory,
146 ];
147 $historyCheck = $request->getCheck( 'history' );
148
149 if ( $this->curonly ) {
150 $history = WikiExporter::CURRENT;
151 } elseif ( !$historyCheck ) {
152 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
153 $history['limit'] = $limit;
154 }
155
156 if ( $offset !== null ) {
157 $history['offset'] = $offset;
158 }
159
160 if ( strtolower( $dir ?? '' ) == 'desc' ) {
161 $history['dir'] = 'desc';
162 }
163 }
164
165 if ( $page != '' ) {
166 $this->doExport = true;
167 }
168 } else {
169 // Default to current-only for GET requests.
170 $page = $request->getText( 'pages', $par ?? '' );
171 $historyCheck = $request->getCheck( 'history' );
172
173 if ( $historyCheck ) {
174 $history = WikiExporter::FULL;
175 } else {
176 $history = WikiExporter::CURRENT;
177 }
178
179 if ( $page != '' ) {
180 $this->doExport = true;
181 }
182 }
183
184 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
185 // Override
186 $history = WikiExporter::CURRENT;
187 }
188
189 $list_authors = $request->getCheck( 'listauthors' );
190 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
191 $list_authors = false;
192 }
193
194 if ( $this->doExport ) {
195 $this->getOutput()->disable();
196
197 // Cancel output buffering and gzipping if set
198 // This should provide safer streaming for pages with history
200 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
201 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
202 ContentSecurityPolicy::sendRestrictiveHeader();
203
204 if ( $request->getCheck( 'wpDownload' ) ) {
205 // Provide a sensible filename suggestion
206 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
207 wfTimestampNow() . '.xml' );
208 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
209 }
210
211 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
212 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
213 $this->doExport( $page, $history, $list_authors, $exportall );
214
215 return;
216 }
217
218 $out = $this->getOutput();
219 $out->addWikiMsg( 'exporttext' );
220
221 if ( $page == '' ) {
222 $categoryName = $request->getText( 'catname' );
223 } else {
224 $categoryName = '';
225 }
226 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
227 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
228
229 $formDescriptor = [
230 'catname' => [
231 'type' => 'textwithbutton',
232 'name' => 'catname',
233 'horizontal-label' => true,
234 'label-message' => 'export-addcattext',
235 'default' => $categoryName,
236 'size' => 40,
237 'buttontype' => 'submit',
238 'buttonname' => 'addcat',
239 'buttondefault' => $this->msg( 'export-addcat' )->text(),
240 ] + $hideIf,
241 ];
242 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
243 $formDescriptor += [
244 'nsindex' => [
245 'type' => 'namespaceselectwithbutton',
246 'default' => $nsindex,
247 'label-message' => 'export-addnstext',
248 'horizontal-label' => true,
249 'name' => 'nsindex',
250 'id' => 'namespace',
251 'cssclass' => 'namespaceselector',
252 'buttontype' => 'submit',
253 'buttonname' => 'addns',
254 'buttondefault' => $this->msg( 'export-addns' )->text(),
255 ] + $hideIf,
256 ];
257 }
258
259 if ( $canExportAll ) {
260 $formDescriptor += [
261 'exportall' => [
262 'type' => 'check',
263 'label-message' => 'exportall',
264 'name' => 'exportall',
265 'id' => 'exportall',
266 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
267 ],
268 ];
269 }
270
271 $formDescriptor += [
272 'textarea' => [
273 'class' => HTMLTextAreaField::class,
274 'name' => 'pages',
275 'label-message' => 'export-manual',
276 'nodata' => true,
277 'rows' => 10,
278 'default' => $page,
279 ] + $hideIf,
280 ];
281
282 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
283 $formDescriptor += [
284 'curonly' => [
285 'type' => 'check',
286 'label-message' => 'exportcuronly',
287 'name' => 'curonly',
288 'id' => 'curonly',
289 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
290 ],
291 ];
292 } else {
293 $out->addWikiMsg( 'exportnohistory' );
294 }
295
296 $formDescriptor += [
297 'templates' => [
298 'type' => 'check',
299 'label-message' => 'export-templates',
300 'name' => 'templates',
301 'id' => 'wpExportTemplates',
302 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
303 ],
304 ];
305
306 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
307 $formDescriptor += [
308 'pagelink-depth' => [
309 'type' => 'text',
310 'name' => 'pagelink-depth',
311 'id' => 'pagelink-depth',
312 'label-message' => 'export-pagelinks',
313 'default' => '0',
314 'size' => 20,
315 ],
316 ];
317 }
318
319 $formDescriptor += [
320 'wpDownload' => [
321 'type' => 'check',
322 'name' => 'wpDownload',
323 'id' => 'wpDownload',
324 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
325 'label-message' => 'export-download',
326 ],
327 ];
328
329 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
330 $formDescriptor += [
331 'listauthors' => [
332 'type' => 'check',
333 'label-message' => 'exportlistauthors',
334 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
335 'name' => 'listauthors',
336 'id' => 'listauthors',
337 ],
338 ];
339 }
340
341 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
342 $htmlForm->setSubmitTextMsg( 'export-submit' );
343 $htmlForm->prepareForm()->displayForm( false );
344 $this->addHelpLink( 'Help:Export' );
345 }
346
350 protected function userCanOverrideExportDepth() {
351 return $this->getAuthority()->isAllowed( 'override-export-depth' );
352 }
353
363 protected function doExport( $page, $history, $list_authors, $exportall ) {
364 // If we are grabbing everything, enable full history and ignore the rest
365 if ( $exportall ) {
366 $history = WikiExporter::FULL;
367 } else {
368 $pageSet = []; // Inverted index of all pages to look up
369
370 // Split up and normalize input
371 foreach ( explode( "\n", $page ) as $pageName ) {
372 $pageName = trim( $pageName );
373 $title = Title::newFromText( $pageName );
374 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
375 // Only record each page once!
376 $pageSet[$title->getPrefixedText()] = true;
377 }
378 }
379
380 // Set of original pages to pass on to further manipulation...
381 $inputPages = array_keys( $pageSet );
382
383 // Look up any linked pages if asked...
384 if ( $this->templates ) {
385 $pageSet = $this->getTemplates( $inputPages, $pageSet );
386 }
387 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
388 $linkDepth = $this->pageLinkDepth;
389 if ( $linkDepth ) {
390 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
391 }
392
393 $pages = array_keys( $pageSet );
394
395 // Normalize titles to the same format and remove dupes, see T19374
396 foreach ( $pages as $k => $v ) {
397 $pages[$k] = str_replace( ' ', '_', $v );
398 }
399
400 $pages = array_unique( $pages );
401 }
402
403 /* Ok, let's get to it... */
404 $db = $this->dbProvider->getReplicaDatabase();
405
406 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
407 $exporter->list_authors = $list_authors;
408 $exporter->openStream();
409
410 if ( $exportall ) {
411 $exporter->allPages();
412 } else {
413 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
414 foreach ( $pages as $page ) {
415 # T10824: Only export pages the user can read
416 $title = Title::newFromText( $page );
417 if ( $title === null ) {
418 // @todo Perhaps output an <error> tag or something.
419 continue;
420 }
421
422 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
423 // @todo Perhaps output an <error> tag or something.
424 continue;
425 }
426
427 $exporter->pageByTitle( $title );
428 }
429 }
430
431 $exporter->closeStream();
432 }
433
438 protected function getPagesFromCategory( PageIdentity $page ) {
439 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
440
441 $name = $page->getDBkey();
442
443 $dbr = $this->dbProvider->getReplicaDatabase( CategoryLinksTable::VIRTUAL_DOMAIN );
444 $res = $dbr->newSelectQueryBuilder()
445 ->select( [ 'page_namespace', 'page_title' ] )
446 ->from( 'page' )
447 ->join( 'categorylinks', null, 'cl_from=page_id' )
448 ->join( 'linktarget', null, 'cl_target_id = lt_id' )
449 ->where( [ 'lt_title' => $name, 'lt_namespace' => NS_CATEGORY ] )
450 ->limit( $maxPages )
451 ->caller( __METHOD__ )
452 ->fetchResultSet();
453
454 $pages = [];
455
456 foreach ( $res as $row ) {
457 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
458 }
459
460 return $pages;
461 }
462
467 protected function getPagesFromNamespace( $nsindex ) {
468 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
469
470 $dbr = $this->dbProvider->getReplicaDatabase();
471 $res = $dbr->newSelectQueryBuilder()
472 ->select( [ 'page_namespace', 'page_title' ] )
473 ->from( 'page' )
474 ->where( [ 'page_namespace' => $nsindex ] )
475 ->limit( $maxPages )
476 ->caller( __METHOD__ )->fetchResultSet();
477
478 $pages = [];
479
480 foreach ( $res as $row ) {
481 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
482 }
483
484 return $pages;
485 }
486
493 protected function getTemplates( $inputPages, $pageSet ) {
494 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
495 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
496 $dbr = $this->dbProvider->getReplicaDatabase( TemplateLinksTable::VIRTUAL_DOMAIN );
497 $queryBuilder = $dbr->newSelectQueryBuilder()
498 ->caller( __METHOD__ )
499 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
500 ->from( 'page' )
501 ->join( 'templatelinks', null, 'page_id=tl_from' )
502 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
503 ->joinConds( $queryInfo['joins'] );
504 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
505 }
506
513 private function getExtraPages( $inputPages, $pageSet ) {
514 $extraPages = [];
515 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
516 foreach ( $extraPages as $extraPage ) {
517 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
518 }
519 return $pageSet;
520 }
521
527 protected function validateLinkDepth( $depth ) {
528 if ( $depth === null || $depth < 0 ) {
529 return 0;
530 }
531
532 if ( !$this->userCanOverrideExportDepth() ) {
533 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
534 if ( $depth > $maxLinkDepth ) {
535 return $maxLinkDepth;
536 }
537 }
538
539 /*
540 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
541 * crazy-big export from being done by someone setting the depth
542 * number too high. In other words, last resort safety net.
543 */
544
545 return intval( min( $depth, 5 ) );
546 }
547
555 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
556 for ( ; $depth > 0; --$depth ) {
557 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
558 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
559 $dbr = $this->dbProvider->getReplicaDatabase( PageLinksTable::VIRTUAL_DOMAIN );
560 $queryBuilder = $dbr->newSelectQueryBuilder()
561 ->caller( __METHOD__ )
562 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
563 ->from( 'page' )
564 ->join( 'pagelinks', null, 'page_id=pl_from' )
565 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
566 ->joinConds( $queryInfo['joins'] );
567 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
568 $inputPages = array_keys( $pageSet );
569 }
570
571 return $pageSet;
572 }
573
581 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
582 foreach ( $inputPages as $page ) {
583 $title = Title::newFromText( $page );
584 if ( $title ) {
585 $pageSet[$title->getPrefixedText()] = true;
588 $result = ( clone $queryBuilder )
589 ->where( [
590 'page_namespace' => $title->getNamespace(),
591 'page_title' => $title->getDBkey()
592 ] )
593 ->fetchResultSet();
594
595 foreach ( $result as $row ) {
596 $template = Title::makeTitle( $row->namespace, $row->title );
597 $pageSet[$template->getPrefixedText()] = true;
598 }
599 }
600 }
601
602 return $pageSet;
603 }
604
606 protected function getGroupName() {
607 return 'pagetools';
608 }
609}
610
612class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:51
const NS_CATEGORY
Definition Defines.php:65
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
makeTitle( $linkId)
Convert a link ID to a Title.to override Title
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:195
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Handle sending Content-Security-Policy headers.
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.This must be overridden by subclasses; it will be made...
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
getPagesFromCategory(PageIdentity $page)
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
A title formatter service for MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:69
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getDBkey()
Get the page title in DB key form.
Provide primary and replica IDatabase connections.