MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Specials;
24
35use WikiExporter;
38
46 protected bool $curonly;
47 protected bool $doExport;
48 protected int $pageLinkDepth;
49 protected bool $templates;
50
51 private IConnectionProvider $dbProvider;
52 private WikiExporterFactory $wikiExporterFactory;
53 private TitleFormatter $titleFormatter;
54 private LinksMigration $linksMigration;
55
62 public function __construct(
63 IConnectionProvider $dbProvider,
64 WikiExporterFactory $wikiExporterFactory,
65 TitleFormatter $titleFormatter,
66 LinksMigration $linksMigration
67 ) {
68 parent::__construct( 'Export' );
69 $this->dbProvider = $dbProvider;
70 $this->wikiExporterFactory = $wikiExporterFactory;
71 $this->titleFormatter = $titleFormatter;
72 $this->linksMigration = $linksMigration;
73 }
74
75 public function execute( $par ) {
76 $this->setHeaders();
77 $this->outputHeader();
78 $config = $this->getConfig();
79
80 $this->curonly = true;
81 $this->doExport = false;
82 $request = $this->getRequest();
83 $this->templates = $request->getCheck( 'templates' );
84 $this->pageLinkDepth = $this->validateLinkDepth(
85 $request->getIntOrNull( 'pagelink-depth' )
86 );
87 $nsindex = '';
88 $exportall = false;
89
90 if ( $request->getCheck( 'addcat' ) ) {
91 $page = $request->getText( 'pages' );
92 $catname = $request->getText( 'catname' );
93
94 if ( $catname !== '' && $catname !== null && $catname !== false ) {
95 $t = Title::makeTitleSafe( NS_MAIN, $catname );
96 if ( $t ) {
102 $catpages = $this->getPagesFromCategory( $t );
103 if ( $catpages ) {
104 if ( $page !== '' ) {
105 $page .= "\n";
106 }
107 $page .= implode( "\n", $catpages );
108 }
109 }
110 }
111 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
112 $page = $request->getText( 'pages' );
113 $nsindex = $request->getText( 'nsindex', '' );
114
115 if ( strval( $nsindex ) !== '' ) {
119 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
120 if ( $nspages ) {
121 $page .= "\n" . implode( "\n", $nspages );
122 }
123 }
124 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
125 $this->doExport = true;
126 $exportall = true;
127
128 /* Although $page and $history are not used later on, we
129 nevertheless set them to avoid that PHP notices about using
130 undefined variables foul up our XML output (see call to
131 doExport(...) further down) */
132 $page = '';
133 $history = '';
134 } elseif ( $request->wasPosted() && $par == '' ) {
135 // Log to see if certain parameters are actually used.
136 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
137 LoggerFactory::getInstance( 'export' )->debug(
138 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
139 'dir' => $request->getRawVal( 'dir' ),
140 'offset' => $request->getRawVal( 'offset' ),
141 'limit' => $request->getRawVal( 'limit' ),
142 ] );
143
144 $page = $request->getText( 'pages' );
145 $this->curonly = $request->getCheck( 'curonly' );
146 $rawOffset = $request->getVal( 'offset' );
147
148 if ( $rawOffset ) {
149 $offset = wfTimestamp( TS_MW, $rawOffset );
150 } else {
151 $offset = null;
152 }
153
154 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
155 $limit = $request->getInt( 'limit' );
156 $dir = $request->getVal( 'dir' );
157 $history = [
158 'dir' => 'asc',
159 'offset' => false,
160 'limit' => $maxHistory,
161 ];
162 $historyCheck = $request->getCheck( 'history' );
163
164 if ( $this->curonly ) {
165 $history = WikiExporter::CURRENT;
166 } elseif ( !$historyCheck ) {
167 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
168 $history['limit'] = $limit;
169 }
170
171 if ( $offset !== null ) {
172 $history['offset'] = $offset;
173 }
174
175 if ( strtolower( $dir ?? '' ) == 'desc' ) {
176 $history['dir'] = 'desc';
177 }
178 }
179
180 if ( $page != '' ) {
181 $this->doExport = true;
182 }
183 } else {
184 // Default to current-only for GET requests.
185 $page = $request->getText( 'pages', $par ?? '' );
186 $historyCheck = $request->getCheck( 'history' );
187
188 if ( $historyCheck ) {
189 $history = WikiExporter::FULL;
190 } else {
191 $history = WikiExporter::CURRENT;
192 }
193
194 if ( $page != '' ) {
195 $this->doExport = true;
196 }
197 }
198
199 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
200 // Override
201 $history = WikiExporter::CURRENT;
202 }
203
204 $list_authors = $request->getCheck( 'listauthors' );
205 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
206 $list_authors = false;
207 }
208
209 if ( $this->doExport ) {
210 $this->getOutput()->disable();
211
212 // Cancel output buffering and gzipping if set
213 // This should provide safer streaming for pages with history
215 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
216 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
217
218 if ( $request->getCheck( 'wpDownload' ) ) {
219 // Provide a sensible filename suggestion
220 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
221 wfTimestampNow() . '.xml' );
222 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
223 }
224
225 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
226 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
227 $this->doExport( $page, $history, $list_authors, $exportall );
228
229 return;
230 }
231
232 $out = $this->getOutput();
233 $out->addWikiMsg( 'exporttext' );
234
235 if ( $page == '' ) {
236 $categoryName = $request->getText( 'catname' );
237 } else {
238 $categoryName = '';
239 }
240 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
241 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
242
243 $formDescriptor = [
244 'catname' => [
245 'type' => 'textwithbutton',
246 'name' => 'catname',
247 'horizontal-label' => true,
248 'label-message' => 'export-addcattext',
249 'default' => $categoryName,
250 'size' => 40,
251 'buttontype' => 'submit',
252 'buttonname' => 'addcat',
253 'buttondefault' => $this->msg( 'export-addcat' )->text(),
254 ] + $hideIf,
255 ];
256 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
257 $formDescriptor += [
258 'nsindex' => [
259 'type' => 'namespaceselectwithbutton',
260 'default' => $nsindex,
261 'label-message' => 'export-addnstext',
262 'horizontal-label' => true,
263 'name' => 'nsindex',
264 'id' => 'namespace',
265 'cssclass' => 'namespaceselector',
266 'buttontype' => 'submit',
267 'buttonname' => 'addns',
268 'buttondefault' => $this->msg( 'export-addns' )->text(),
269 ] + $hideIf,
270 ];
271 }
272
273 if ( $canExportAll ) {
274 $formDescriptor += [
275 'exportall' => [
276 'type' => 'check',
277 'label-message' => 'exportall',
278 'name' => 'exportall',
279 'id' => 'exportall',
280 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
281 ],
282 ];
283 }
284
285 $formDescriptor += [
286 'textarea' => [
287 'class' => HTMLTextAreaField::class,
288 'name' => 'pages',
289 'label-message' => 'export-manual',
290 'nodata' => true,
291 'rows' => 10,
292 'default' => $page,
293 ] + $hideIf,
294 ];
295
296 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
297 $formDescriptor += [
298 'curonly' => [
299 'type' => 'check',
300 'label-message' => 'exportcuronly',
301 'name' => 'curonly',
302 'id' => 'curonly',
303 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
304 ],
305 ];
306 } else {
307 $out->addWikiMsg( 'exportnohistory' );
308 }
309
310 $formDescriptor += [
311 'templates' => [
312 'type' => 'check',
313 'label-message' => 'export-templates',
314 'name' => 'templates',
315 'id' => 'wpExportTemplates',
316 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
317 ],
318 ];
319
320 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
321 $formDescriptor += [
322 'pagelink-depth' => [
323 'type' => 'text',
324 'name' => 'pagelink-depth',
325 'id' => 'pagelink-depth',
326 'label-message' => 'export-pagelinks',
327 'default' => '0',
328 'size' => 20,
329 ],
330 ];
331 }
332
333 $formDescriptor += [
334 'wpDownload' => [
335 'type' => 'check',
336 'name' => 'wpDownload',
337 'id' => 'wpDownload',
338 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
339 'label-message' => 'export-download',
340 ],
341 ];
342
343 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
344 $formDescriptor += [
345 'listauthors' => [
346 'type' => 'check',
347 'label-message' => 'exportlistauthors',
348 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
349 'name' => 'listauthors',
350 'id' => 'listauthors',
351 ],
352 ];
353 }
354
355 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
356 $htmlForm->setSubmitTextMsg( 'export-submit' );
357 $htmlForm->prepareForm()->displayForm( false );
358 $this->addHelpLink( 'Help:Export' );
359 }
360
364 protected function userCanOverrideExportDepth() {
365 return $this->getAuthority()->isAllowed( 'override-export-depth' );
366 }
367
377 protected function doExport( $page, $history, $list_authors, $exportall ) {
378 // If we are grabbing everything, enable full history and ignore the rest
379 if ( $exportall ) {
380 $history = WikiExporter::FULL;
381 } else {
382 $pageSet = []; // Inverted index of all pages to look up
383
384 // Split up and normalize input
385 foreach ( explode( "\n", $page ) as $pageName ) {
386 $pageName = trim( $pageName );
387 $title = Title::newFromText( $pageName );
388 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
389 // Only record each page once!
390 $pageSet[$title->getPrefixedText()] = true;
391 }
392 }
393
394 // Set of original pages to pass on to further manipulation...
395 $inputPages = array_keys( $pageSet );
396
397 // Look up any linked pages if asked...
398 if ( $this->templates ) {
399 $pageSet = $this->getTemplates( $inputPages, $pageSet );
400 }
401 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
402 $linkDepth = $this->pageLinkDepth;
403 if ( $linkDepth ) {
404 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
405 }
406
407 $pages = array_keys( $pageSet );
408
409 // Normalize titles to the same format and remove dupes, see T19374
410 foreach ( $pages as $k => $v ) {
411 $pages[$k] = str_replace( ' ', '_', $v );
412 }
413
414 $pages = array_unique( $pages );
415 }
416
417 /* Ok, let's get to it... */
418 $db = $this->dbProvider->getReplicaDatabase();
419
420 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
421 $exporter->list_authors = $list_authors;
422 $exporter->openStream();
423
424 if ( $exportall ) {
425 $exporter->allPages();
426 } else {
427 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
428 foreach ( $pages as $page ) {
429 # T10824: Only export pages the user can read
430 $title = Title::newFromText( $page );
431 if ( $title === null ) {
432 // @todo Perhaps output an <error> tag or something.
433 continue;
434 }
435
436 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
437 // @todo Perhaps output an <error> tag or something.
438 continue;
439 }
440
441 $exporter->pageByTitle( $title );
442 }
443 }
444
445 $exporter->closeStream();
446 }
447
452 protected function getPagesFromCategory( PageIdentity $page ) {
453 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
454
455 $name = $page->getDBkey();
456
457 $dbr = $this->dbProvider->getReplicaDatabase();
458 $res = $dbr->newSelectQueryBuilder()
459 ->select( [ 'page_namespace', 'page_title' ] )
460 ->from( 'page' )
461 ->join( 'categorylinks', null, 'cl_from=page_id' )
462 ->where( [ 'cl_to' => $name ] )
463 ->limit( $maxPages )
464 ->caller( __METHOD__ )->fetchResultSet();
465
466 $pages = [];
467
468 foreach ( $res as $row ) {
469 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470 }
471
472 return $pages;
473 }
474
479 protected function getPagesFromNamespace( $nsindex ) {
480 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481
482 $dbr = $this->dbProvider->getReplicaDatabase();
483 $res = $dbr->newSelectQueryBuilder()
484 ->select( [ 'page_namespace', 'page_title' ] )
485 ->from( 'page' )
486 ->where( [ 'page_namespace' => $nsindex ] )
487 ->limit( $maxPages )
488 ->caller( __METHOD__ )->fetchResultSet();
489
490 $pages = [];
491
492 foreach ( $res as $row ) {
493 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
494 }
495
496 return $pages;
497 }
498
505 protected function getTemplates( $inputPages, $pageSet ) {
506 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
507 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
508 $dbr = $this->dbProvider->getReplicaDatabase();
509 $queryBuilder = $dbr->newSelectQueryBuilder()
510 ->caller( __METHOD__ )
511 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
512 ->from( 'page' )
513 ->join( 'templatelinks', null, 'page_id=tl_from' )
514 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
515 ->joinConds( $queryInfo['joins'] );
516 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
517 }
518
525 private function getExtraPages( $inputPages, $pageSet ) {
526 $extraPages = [];
527 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
528 foreach ( $extraPages as $extraPage ) {
529 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
530 }
531 return $pageSet;
532 }
533
539 protected function validateLinkDepth( $depth ) {
540 if ( $depth === null || $depth < 0 ) {
541 return 0;
542 }
543
544 if ( !$this->userCanOverrideExportDepth() ) {
545 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
546 if ( $depth > $maxLinkDepth ) {
547 return $maxLinkDepth;
548 }
549 }
550
551 /*
552 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
553 * crazy-big export from being done by someone setting the depth
554 * number too high. In other words, last resort safety net.
555 */
556
557 return intval( min( $depth, 5 ) );
558 }
559
567 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
568 for ( ; $depth > 0; --$depth ) {
569 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
570 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
571 $dbr = $this->dbProvider->getReplicaDatabase();
572 $queryBuilder = $dbr->newSelectQueryBuilder()
573 ->caller( __METHOD__ )
574 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
575 ->from( 'page' )
576 ->join( 'pagelinks', null, 'page_id=pl_from' )
577 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
578 ->joinConds( $queryInfo['joins'] );
579 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
580 $inputPages = array_keys( $pageSet );
581 }
582
583 return $pageSet;
584 }
585
593 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
594 foreach ( $inputPages as $page ) {
595 $title = Title::newFromText( $page );
596 if ( $title ) {
597 $pageSet[$title->getPrefixedText()] = true;
600 $result = ( clone $queryBuilder )
601 ->where( [
602 'page_namespace' => $title->getNamespace(),
603 'page_title' => $title->getDBkey()
604 ] )
605 ->fetchResultSet();
606
607 foreach ( $result as $row ) {
608 $template = Title::makeTitle( $row->namespace, $row->title );
609 $pageSet[$template->getPrefixedText()] = true;
610 }
611 }
612 }
613
614 return $pageSet;
615 }
616
617 protected function getGroupName() {
618 return 'pagetools';
619 }
620}
621
623class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:65
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:209
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
getPagesFromCategory(PageIdentity $page)
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
Represents a title within MediaWiki.
Definition Title.php:78
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getDBkey()
Get the page title in DB key form.
A title formatter service for MediaWiki.
Provide primary and replica IDatabase connections.