MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
26namespace MediaWiki\Specials;
27
28use HTMLTextAreaField;
37use WikiExporter;
40
47 protected bool $curonly;
48 protected bool $doExport;
49 protected int $pageLinkDepth;
50 protected bool $templates;
51
52 private IConnectionProvider $dbProvider;
53 private WikiExporterFactory $wikiExporterFactory;
54 private TitleFormatter $titleFormatter;
55 private LinksMigration $linksMigration;
56
63 public function __construct(
64 IConnectionProvider $dbProvider,
65 WikiExporterFactory $wikiExporterFactory,
66 TitleFormatter $titleFormatter,
67 LinksMigration $linksMigration
68 ) {
69 parent::__construct( 'Export' );
70 $this->dbProvider = $dbProvider;
71 $this->wikiExporterFactory = $wikiExporterFactory;
72 $this->titleFormatter = $titleFormatter;
73 $this->linksMigration = $linksMigration;
74 }
75
76 public function execute( $par ) {
77 $this->setHeaders();
78 $this->outputHeader();
79 $config = $this->getConfig();
80
81 $this->curonly = true;
82 $this->doExport = false;
83 $request = $this->getRequest();
84 $this->templates = $request->getCheck( 'templates' );
85 $this->pageLinkDepth = $this->validateLinkDepth(
86 $request->getIntOrNull( 'pagelink-depth' )
87 );
88 $nsindex = '';
89 $exportall = false;
90
91 if ( $request->getCheck( 'addcat' ) ) {
92 $page = $request->getText( 'pages' );
93 $catname = $request->getText( 'catname' );
94
95 if ( $catname !== '' && $catname !== null && $catname !== false ) {
96 $t = Title::makeTitleSafe( NS_MAIN, $catname );
97 if ( $t ) {
103 $catpages = $this->getPagesFromCategory( $t );
104 if ( $catpages ) {
105 if ( $page !== '' ) {
106 $page .= "\n";
107 }
108 $page .= implode( "\n", $catpages );
109 }
110 }
111 }
112 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
113 $page = $request->getText( 'pages' );
114 $nsindex = $request->getText( 'nsindex', '' );
115
116 if ( strval( $nsindex ) !== '' ) {
120 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
121 if ( $nspages ) {
122 $page .= "\n" . implode( "\n", $nspages );
123 }
124 }
125 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
126 $this->doExport = true;
127 $exportall = true;
128
129 /* Although $page and $history are not used later on, we
130 nevertheless set them to avoid that PHP notices about using
131 undefined variables foul up our XML output (see call to
132 doExport(...) further down) */
133 $page = '';
134 $history = '';
135 } elseif ( $request->wasPosted() && $par == '' ) {
136 // Log to see if certain parameters are actually used.
137 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
138 LoggerFactory::getInstance( 'export' )->debug(
139 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
140 'dir' => $request->getRawVal( 'dir' ),
141 'offset' => $request->getRawVal( 'offset' ),
142 'limit' => $request->getRawVal( 'limit' ),
143 ] );
144
145 $page = $request->getText( 'pages' );
146 $this->curonly = $request->getCheck( 'curonly' );
147 $rawOffset = $request->getVal( 'offset' );
148
149 if ( $rawOffset ) {
150 $offset = wfTimestamp( TS_MW, $rawOffset );
151 } else {
152 $offset = null;
153 }
154
155 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
156 $limit = $request->getInt( 'limit' );
157 $dir = $request->getVal( 'dir' );
158 $history = [
159 'dir' => 'asc',
160 'offset' => false,
161 'limit' => $maxHistory,
162 ];
163 $historyCheck = $request->getCheck( 'history' );
164
165 if ( $this->curonly ) {
166 $history = WikiExporter::CURRENT;
167 } elseif ( !$historyCheck ) {
168 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
169 $history['limit'] = $limit;
170 }
171
172 if ( $offset !== null ) {
173 $history['offset'] = $offset;
174 }
175
176 if ( strtolower( $dir ) == 'desc' ) {
177 $history['dir'] = 'desc';
178 }
179 }
180
181 if ( $page != '' ) {
182 $this->doExport = true;
183 }
184 } else {
185 // Default to current-only for GET requests.
186 $page = $request->getText( 'pages', $par ?? '' );
187 $historyCheck = $request->getCheck( 'history' );
188
189 if ( $historyCheck ) {
190 $history = WikiExporter::FULL;
191 } else {
192 $history = WikiExporter::CURRENT;
193 }
194
195 if ( $page != '' ) {
196 $this->doExport = true;
197 }
198 }
199
200 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
201 // Override
202 $history = WikiExporter::CURRENT;
203 }
204
205 $list_authors = $request->getCheck( 'listauthors' );
206 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
207 $list_authors = false;
208 }
209
210 if ( $this->doExport ) {
211 $this->getOutput()->disable();
212
213 // Cancel output buffering and gzipping if set
214 // This should provide safer streaming for pages with history
216 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
217 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
218
219 if ( $request->getCheck( 'wpDownload' ) ) {
220 // Provide a sensible filename suggestion
221 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
222 wfTimestampNow() . '.xml' );
223 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
224 }
225
226 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
227 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
228 $this->doExport( $page, $history, $list_authors, $exportall );
229
230 return;
231 }
232
233 $out = $this->getOutput();
234 $out->addWikiMsg( 'exporttext' );
235
236 if ( $page == '' ) {
237 $categoryName = $request->getText( 'catname' );
238 } else {
239 $categoryName = '';
240 }
241 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
242 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
243
244 $formDescriptor = [
245 'catname' => [
246 'type' => 'textwithbutton',
247 'name' => 'catname',
248 'horizontal-label' => true,
249 'label-message' => 'export-addcattext',
250 'default' => $categoryName,
251 'size' => 40,
252 'buttontype' => 'submit',
253 'buttonname' => 'addcat',
254 'buttondefault' => $this->msg( 'export-addcat' )->text(),
255 ] + $hideIf,
256 ];
257 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
258 $formDescriptor += [
259 'nsindex' => [
260 'type' => 'namespaceselectwithbutton',
261 'default' => $nsindex,
262 'label-message' => 'export-addnstext',
263 'horizontal-label' => true,
264 'name' => 'nsindex',
265 'id' => 'namespace',
266 'cssclass' => 'namespaceselector',
267 'buttontype' => 'submit',
268 'buttonname' => 'addns',
269 'buttondefault' => $this->msg( 'export-addns' )->text(),
270 ] + $hideIf,
271 ];
272 }
273
274 if ( $canExportAll ) {
275 $formDescriptor += [
276 'exportall' => [
277 'type' => 'check',
278 'label-message' => 'exportall',
279 'name' => 'exportall',
280 'id' => 'exportall',
281 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
282 ],
283 ];
284 }
285
286 $formDescriptor += [
287 'textarea' => [
288 'class' => HTMLTextAreaField::class,
289 'name' => 'pages',
290 'label-message' => 'export-manual',
291 'nodata' => true,
292 'rows' => 10,
293 'default' => $page,
294 ] + $hideIf,
295 ];
296
297 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
298 $formDescriptor += [
299 'curonly' => [
300 'type' => 'check',
301 'label-message' => 'exportcuronly',
302 'name' => 'curonly',
303 'id' => 'curonly',
304 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
305 ],
306 ];
307 } else {
308 $out->addWikiMsg( 'exportnohistory' );
309 }
310
311 $formDescriptor += [
312 'templates' => [
313 'type' => 'check',
314 'label-message' => 'export-templates',
315 'name' => 'templates',
316 'id' => 'wpExportTemplates',
317 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
318 ],
319 ];
320
321 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
322 $formDescriptor += [
323 'pagelink-depth' => [
324 'type' => 'text',
325 'name' => 'pagelink-depth',
326 'id' => 'pagelink-depth',
327 'label-message' => 'export-pagelinks',
328 'default' => '0',
329 'size' => 20,
330 ],
331 ];
332 }
333
334 $formDescriptor += [
335 'wpDownload' => [
336 'type' => 'check',
337 'name' => 'wpDownload',
338 'id' => 'wpDownload',
339 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
340 'label-message' => 'export-download',
341 ],
342 ];
343
344 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
345 $formDescriptor += [
346 'listauthors' => [
347 'type' => 'check',
348 'label-message' => 'exportlistauthors',
349 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
350 'name' => 'listauthors',
351 'id' => 'listauthors',
352 ],
353 ];
354 }
355
356 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
357 $htmlForm->setSubmitTextMsg( 'export-submit' );
358 $htmlForm->prepareForm()->displayForm( false );
359 $this->addHelpLink( 'Help:Export' );
360 }
361
365 protected function userCanOverrideExportDepth() {
366 return $this->getAuthority()->isAllowed( 'override-export-depth' );
367 }
368
378 protected function doExport( $page, $history, $list_authors, $exportall ) {
379 // If we are grabbing everything, enable full history and ignore the rest
380 if ( $exportall ) {
381 $history = WikiExporter::FULL;
382 } else {
383 $pageSet = []; // Inverted index of all pages to look up
384
385 // Split up and normalize input
386 foreach ( explode( "\n", $page ) as $pageName ) {
387 $pageName = trim( $pageName );
388 $title = Title::newFromText( $pageName );
389 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
390 // Only record each page once!
391 $pageSet[$title->getPrefixedText()] = true;
392 }
393 }
394
395 // Set of original pages to pass on to further manipulation...
396 $inputPages = array_keys( $pageSet );
397
398 // Look up any linked pages if asked...
399 if ( $this->templates ) {
400 $pageSet = $this->getTemplates( $inputPages, $pageSet );
401 }
402 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
403 $linkDepth = $this->pageLinkDepth;
404 if ( $linkDepth ) {
405 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
406 }
407
408 $pages = array_keys( $pageSet );
409
410 // Normalize titles to the same format and remove dupes, see T19374
411 foreach ( $pages as $k => $v ) {
412 $pages[$k] = str_replace( ' ', '_', $v );
413 }
414
415 $pages = array_unique( $pages );
416 }
417
418 /* Ok, let's get to it... */
419 $db = $this->dbProvider->getReplicaDatabase();
420
421 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
422 $exporter->list_authors = $list_authors;
423 $exporter->openStream();
424
425 if ( $exportall ) {
426 $exporter->allPages();
427 } else {
428 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
429 foreach ( $pages as $page ) {
430 # T10824: Only export pages the user can read
431 $title = Title::newFromText( $page );
432 if ( $title === null ) {
433 // @todo Perhaps output an <error> tag or something.
434 continue;
435 }
436
437 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
438 // @todo Perhaps output an <error> tag or something.
439 continue;
440 }
441
442 $exporter->pageByTitle( $title );
443 }
444 }
445
446 $exporter->closeStream();
447 }
448
453 protected function getPagesFromCategory( $title ) {
454 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
455
456 $name = $title->getDBkey();
457
458 $dbr = $this->dbProvider->getReplicaDatabase();
459 $res = $dbr->newSelectQueryBuilder()
460 ->select( [ 'page_namespace', 'page_title' ] )
461 ->from( 'page' )
462 ->join( 'categorylinks', null, 'cl_from=page_id' )
463 ->where( [ 'cl_to' => $name ] )
464 ->limit( $maxPages )
465 ->caller( __METHOD__ )->fetchResultSet();
466
467 $pages = [];
468
469 foreach ( $res as $row ) {
470 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
471 }
472
473 return $pages;
474 }
475
480 protected function getPagesFromNamespace( $nsindex ) {
481 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
482
483 $dbr = $this->dbProvider->getReplicaDatabase();
484 $res = $dbr->newSelectQueryBuilder()
485 ->select( [ 'page_namespace', 'page_title' ] )
486 ->from( 'page' )
487 ->where( [ 'page_namespace' => $nsindex ] )
488 ->limit( $maxPages )
489 ->caller( __METHOD__ )->fetchResultSet();
490
491 $pages = [];
492
493 foreach ( $res as $row ) {
494 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
495 }
496
497 return $pages;
498 }
499
506 protected function getTemplates( $inputPages, $pageSet ) {
507 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
508 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
509 $dbr = $this->dbProvider->getReplicaDatabase();
510 $queryBuilder = $dbr->newSelectQueryBuilder()
511 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
512 ->from( 'page' )
513 ->join( 'templatelinks', null, 'page_id=tl_from' )
514 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
515 ->joinConds( $queryInfo['joins'] );
516 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
517 }
518
525 private function getExtraPages( $inputPages, $pageSet ) {
526 $extraPages = [];
527 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
528 foreach ( $extraPages as $extraPage ) {
529 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
530 }
531 return $pageSet;
532 }
533
539 protected function validateLinkDepth( $depth ) {
540 if ( $depth === null || $depth < 0 ) {
541 return 0;
542 }
543
544 if ( !$this->userCanOverrideExportDepth() ) {
545 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
546 if ( $depth > $maxLinkDepth ) {
547 return $maxLinkDepth;
548 }
549 }
550
551 /*
552 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
553 * crazy-big export from being done by someone setting the depth
554 * number too high. In other words, last resort safety net.
555 */
556
557 return intval( min( $depth, 5 ) );
558 }
559
567 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
568 for ( ; $depth > 0; --$depth ) {
569 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
570 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
571 $dbr = $this->dbProvider->getReplicaDatabase();
572 $queryBuilder = $dbr->newSelectQueryBuilder()
573 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
574 ->from( 'page' )
575 ->join( 'pagelinks', null, 'page_id=pl_from' )
576 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
577 ->joinConds( $queryInfo['joins'] );
578 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
579 $inputPages = array_keys( $pageSet );
580 }
581
582 return $pageSet;
583 }
584
592 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
593 foreach ( $inputPages as $page ) {
594 $title = Title::newFromText( $page );
595 if ( $title ) {
596 $pageSet[$title->getPrefixedText()] = true;
599 $result = ( clone $queryBuilder )
600 ->where( [
601 'page_namespace' => $title->getNamespace(),
602 'page_title' => $title->getDBkey()
603 ] )
604 ->fetchResultSet();
605
606 foreach ( $result as $row ) {
607 $template = Title::makeTitle( $row->namespace, $row->title );
608 $pageSet[$template->getPrefixedText()] = true;
609 }
610 }
611 }
612
613 return $pageSet;
614 }
615
616 protected function getGroupName() {
617 return 'pagetools';
618 }
619}
620
622class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:206
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
Represents a title within MediaWiki.
Definition Title.php:78
Build SELECT queries with a fluent interface.
A title formatter service for MediaWiki.
Provide primary and replica IDatabase connections.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...