MediaWiki master
SpecialExport.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Specials;
24
25use HTMLTextAreaField;
34use WikiExporter;
37
45 protected bool $curonly;
46 protected bool $doExport;
47 protected int $pageLinkDepth;
48 protected bool $templates;
49
50 private IConnectionProvider $dbProvider;
51 private WikiExporterFactory $wikiExporterFactory;
52 private TitleFormatter $titleFormatter;
53 private LinksMigration $linksMigration;
54
61 public function __construct(
62 IConnectionProvider $dbProvider,
63 WikiExporterFactory $wikiExporterFactory,
64 TitleFormatter $titleFormatter,
65 LinksMigration $linksMigration
66 ) {
67 parent::__construct( 'Export' );
68 $this->dbProvider = $dbProvider;
69 $this->wikiExporterFactory = $wikiExporterFactory;
70 $this->titleFormatter = $titleFormatter;
71 $this->linksMigration = $linksMigration;
72 }
73
74 public function execute( $par ) {
75 $this->setHeaders();
76 $this->outputHeader();
77 $config = $this->getConfig();
78
79 $this->curonly = true;
80 $this->doExport = false;
81 $request = $this->getRequest();
82 $this->templates = $request->getCheck( 'templates' );
83 $this->pageLinkDepth = $this->validateLinkDepth(
84 $request->getIntOrNull( 'pagelink-depth' )
85 );
86 $nsindex = '';
87 $exportall = false;
88
89 if ( $request->getCheck( 'addcat' ) ) {
90 $page = $request->getText( 'pages' );
91 $catname = $request->getText( 'catname' );
92
93 if ( $catname !== '' && $catname !== null && $catname !== false ) {
94 $t = Title::makeTitleSafe( NS_MAIN, $catname );
95 if ( $t ) {
101 $catpages = $this->getPagesFromCategory( $t );
102 if ( $catpages ) {
103 if ( $page !== '' ) {
104 $page .= "\n";
105 }
106 $page .= implode( "\n", $catpages );
107 }
108 }
109 }
110 } elseif ( $request->getCheck( 'addns' ) && $config->get( MainConfigNames::ExportFromNamespaces ) ) {
111 $page = $request->getText( 'pages' );
112 $nsindex = $request->getText( 'nsindex', '' );
113
114 if ( strval( $nsindex ) !== '' ) {
118 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
119 if ( $nspages ) {
120 $page .= "\n" . implode( "\n", $nspages );
121 }
122 }
123 } elseif ( $request->getCheck( 'exportall' ) && $config->get( MainConfigNames::ExportAllowAll ) ) {
124 $this->doExport = true;
125 $exportall = true;
126
127 /* Although $page and $history are not used later on, we
128 nevertheless set them to avoid that PHP notices about using
129 undefined variables foul up our XML output (see call to
130 doExport(...) further down) */
131 $page = '';
132 $history = '';
133 } elseif ( $request->wasPosted() && $par == '' ) {
134 // Log to see if certain parameters are actually used.
135 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
136 LoggerFactory::getInstance( 'export' )->debug(
137 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
138 'dir' => $request->getRawVal( 'dir' ),
139 'offset' => $request->getRawVal( 'offset' ),
140 'limit' => $request->getRawVal( 'limit' ),
141 ] );
142
143 $page = $request->getText( 'pages' );
144 $this->curonly = $request->getCheck( 'curonly' );
145 $rawOffset = $request->getVal( 'offset' );
146
147 if ( $rawOffset ) {
148 $offset = wfTimestamp( TS_MW, $rawOffset );
149 } else {
150 $offset = null;
151 }
152
153 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
154 $limit = $request->getInt( 'limit' );
155 $dir = $request->getVal( 'dir' );
156 $history = [
157 'dir' => 'asc',
158 'offset' => false,
159 'limit' => $maxHistory,
160 ];
161 $historyCheck = $request->getCheck( 'history' );
162
163 if ( $this->curonly ) {
164 $history = WikiExporter::CURRENT;
165 } elseif ( !$historyCheck ) {
166 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
167 $history['limit'] = $limit;
168 }
169
170 if ( $offset !== null ) {
171 $history['offset'] = $offset;
172 }
173
174 if ( strtolower( $dir ) == 'desc' ) {
175 $history['dir'] = 'desc';
176 }
177 }
178
179 if ( $page != '' ) {
180 $this->doExport = true;
181 }
182 } else {
183 // Default to current-only for GET requests.
184 $page = $request->getText( 'pages', $par ?? '' );
185 $historyCheck = $request->getCheck( 'history' );
186
187 if ( $historyCheck ) {
188 $history = WikiExporter::FULL;
189 } else {
190 $history = WikiExporter::CURRENT;
191 }
192
193 if ( $page != '' ) {
194 $this->doExport = true;
195 }
196 }
197
198 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
199 // Override
200 $history = WikiExporter::CURRENT;
201 }
202
203 $list_authors = $request->getCheck( 'listauthors' );
204 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
205 $list_authors = false;
206 }
207
208 if ( $this->doExport ) {
209 $this->getOutput()->disable();
210
211 // Cancel output buffering and gzipping if set
212 // This should provide safer streaming for pages with history
214 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
215 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
216
217 if ( $request->getCheck( 'wpDownload' ) ) {
218 // Provide a sensible filename suggestion
219 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
220 wfTimestampNow() . '.xml' );
221 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
222 }
223
224 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
225 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
226 $this->doExport( $page, $history, $list_authors, $exportall );
227
228 return;
229 }
230
231 $out = $this->getOutput();
232 $out->addWikiMsg( 'exporttext' );
233
234 if ( $page == '' ) {
235 $categoryName = $request->getText( 'catname' );
236 } else {
237 $categoryName = '';
238 }
239 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
240 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
241
242 $formDescriptor = [
243 'catname' => [
244 'type' => 'textwithbutton',
245 'name' => 'catname',
246 'horizontal-label' => true,
247 'label-message' => 'export-addcattext',
248 'default' => $categoryName,
249 'size' => 40,
250 'buttontype' => 'submit',
251 'buttonname' => 'addcat',
252 'buttondefault' => $this->msg( 'export-addcat' )->text(),
253 ] + $hideIf,
254 ];
255 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
256 $formDescriptor += [
257 'nsindex' => [
258 'type' => 'namespaceselectwithbutton',
259 'default' => $nsindex,
260 'label-message' => 'export-addnstext',
261 'horizontal-label' => true,
262 'name' => 'nsindex',
263 'id' => 'namespace',
264 'cssclass' => 'namespaceselector',
265 'buttontype' => 'submit',
266 'buttonname' => 'addns',
267 'buttondefault' => $this->msg( 'export-addns' )->text(),
268 ] + $hideIf,
269 ];
270 }
271
272 if ( $canExportAll ) {
273 $formDescriptor += [
274 'exportall' => [
275 'type' => 'check',
276 'label-message' => 'exportall',
277 'name' => 'exportall',
278 'id' => 'exportall',
279 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
280 ],
281 ];
282 }
283
284 $formDescriptor += [
285 'textarea' => [
286 'class' => HTMLTextAreaField::class,
287 'name' => 'pages',
288 'label-message' => 'export-manual',
289 'nodata' => true,
290 'rows' => 10,
291 'default' => $page,
292 ] + $hideIf,
293 ];
294
295 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
296 $formDescriptor += [
297 'curonly' => [
298 'type' => 'check',
299 'label-message' => 'exportcuronly',
300 'name' => 'curonly',
301 'id' => 'curonly',
302 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
303 ],
304 ];
305 } else {
306 $out->addWikiMsg( 'exportnohistory' );
307 }
308
309 $formDescriptor += [
310 'templates' => [
311 'type' => 'check',
312 'label-message' => 'export-templates',
313 'name' => 'templates',
314 'id' => 'wpExportTemplates',
315 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
316 ],
317 ];
318
319 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) || $this->userCanOverrideExportDepth() ) {
320 $formDescriptor += [
321 'pagelink-depth' => [
322 'type' => 'text',
323 'name' => 'pagelink-depth',
324 'id' => 'pagelink-depth',
325 'label-message' => 'export-pagelinks',
326 'default' => '0',
327 'size' => 20,
328 ],
329 ];
330 }
331
332 $formDescriptor += [
333 'wpDownload' => [
334 'type' => 'check',
335 'name' => 'wpDownload',
336 'id' => 'wpDownload',
337 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
338 'label-message' => 'export-download',
339 ],
340 ];
341
342 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
343 $formDescriptor += [
344 'listauthors' => [
345 'type' => 'check',
346 'label-message' => 'exportlistauthors',
347 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
348 'name' => 'listauthors',
349 'id' => 'listauthors',
350 ],
351 ];
352 }
353
354 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
355 $htmlForm->setSubmitTextMsg( 'export-submit' );
356 $htmlForm->prepareForm()->displayForm( false );
357 $this->addHelpLink( 'Help:Export' );
358 }
359
363 protected function userCanOverrideExportDepth() {
364 return $this->getAuthority()->isAllowed( 'override-export-depth' );
365 }
366
376 protected function doExport( $page, $history, $list_authors, $exportall ) {
377 // If we are grabbing everything, enable full history and ignore the rest
378 if ( $exportall ) {
379 $history = WikiExporter::FULL;
380 } else {
381 $pageSet = []; // Inverted index of all pages to look up
382
383 // Split up and normalize input
384 foreach ( explode( "\n", $page ) as $pageName ) {
385 $pageName = trim( $pageName );
386 $title = Title::newFromText( $pageName );
387 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
388 // Only record each page once!
389 $pageSet[$title->getPrefixedText()] = true;
390 }
391 }
392
393 // Set of original pages to pass on to further manipulation...
394 $inputPages = array_keys( $pageSet );
395
396 // Look up any linked pages if asked...
397 if ( $this->templates ) {
398 $pageSet = $this->getTemplates( $inputPages, $pageSet );
399 }
400 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
401 $linkDepth = $this->pageLinkDepth;
402 if ( $linkDepth ) {
403 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
404 }
405
406 $pages = array_keys( $pageSet );
407
408 // Normalize titles to the same format and remove dupes, see T19374
409 foreach ( $pages as $k => $v ) {
410 $pages[$k] = str_replace( ' ', '_', $v );
411 }
412
413 $pages = array_unique( $pages );
414 }
415
416 /* Ok, let's get to it... */
417 $db = $this->dbProvider->getReplicaDatabase();
418
419 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
420 $exporter->list_authors = $list_authors;
421 $exporter->openStream();
422
423 if ( $exportall ) {
424 $exporter->allPages();
425 } else {
426 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
427 foreach ( $pages as $page ) {
428 # T10824: Only export pages the user can read
429 $title = Title::newFromText( $page );
430 if ( $title === null ) {
431 // @todo Perhaps output an <error> tag or something.
432 continue;
433 }
434
435 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
436 // @todo Perhaps output an <error> tag or something.
437 continue;
438 }
439
440 $exporter->pageByTitle( $title );
441 }
442 }
443
444 $exporter->closeStream();
445 }
446
451 protected function getPagesFromCategory( $title ) {
452 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
453
454 $name = $title->getDBkey();
455
456 $dbr = $this->dbProvider->getReplicaDatabase();
457 $res = $dbr->newSelectQueryBuilder()
458 ->select( [ 'page_namespace', 'page_title' ] )
459 ->from( 'page' )
460 ->join( 'categorylinks', null, 'cl_from=page_id' )
461 ->where( [ 'cl_to' => $name ] )
462 ->limit( $maxPages )
463 ->caller( __METHOD__ )->fetchResultSet();
464
465 $pages = [];
466
467 foreach ( $res as $row ) {
468 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
469 }
470
471 return $pages;
472 }
473
478 protected function getPagesFromNamespace( $nsindex ) {
479 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
480
481 $dbr = $this->dbProvider->getReplicaDatabase();
482 $res = $dbr->newSelectQueryBuilder()
483 ->select( [ 'page_namespace', 'page_title' ] )
484 ->from( 'page' )
485 ->where( [ 'page_namespace' => $nsindex ] )
486 ->limit( $maxPages )
487 ->caller( __METHOD__ )->fetchResultSet();
488
489 $pages = [];
490
491 foreach ( $res as $row ) {
492 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
493 }
494
495 return $pages;
496 }
497
504 protected function getTemplates( $inputPages, $pageSet ) {
505 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
506 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
507 $dbr = $this->dbProvider->getReplicaDatabase();
508 $queryBuilder = $dbr->newSelectQueryBuilder()
509 ->caller( __METHOD__ )
510 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
511 ->from( 'page' )
512 ->join( 'templatelinks', null, 'page_id=tl_from' )
513 ->tables( array_diff( $queryInfo['tables'], [ 'templatelinks' ] ) )
514 ->joinConds( $queryInfo['joins'] );
515 return $this->getLinks( $inputPages, $pageSet, $queryBuilder );
516 }
517
524 private function getExtraPages( $inputPages, $pageSet ) {
525 $extraPages = [];
526 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
527 foreach ( $extraPages as $extraPage ) {
528 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
529 }
530 return $pageSet;
531 }
532
538 protected function validateLinkDepth( $depth ) {
539 if ( $depth === null || $depth < 0 ) {
540 return 0;
541 }
542
543 if ( !$this->userCanOverrideExportDepth() ) {
544 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
545 if ( $depth > $maxLinkDepth ) {
546 return $maxLinkDepth;
547 }
548 }
549
550 /*
551 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
552 * crazy-big export from being done by someone setting the depth
553 * number too high. In other words, last resort safety net.
554 */
555
556 return intval( min( $depth, 5 ) );
557 }
558
566 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
567 for ( ; $depth > 0; --$depth ) {
568 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
569 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
570 $dbr = $this->dbProvider->getReplicaDatabase();
571 $queryBuilder = $dbr->newSelectQueryBuilder()
572 ->caller( __METHOD__ )
573 ->select( [ 'namespace' => $nsField, 'title' => $titleField ] )
574 ->from( 'page' )
575 ->join( 'pagelinks', null, 'page_id=pl_from' )
576 ->tables( array_diff( $queryInfo['tables'], [ 'pagelinks' ] ) )
577 ->joinConds( $queryInfo['joins'] );
578 $pageSet = $this->getLinks( $inputPages, $pageSet, $queryBuilder );
579 $inputPages = array_keys( $pageSet );
580 }
581
582 return $pageSet;
583 }
584
592 protected function getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder ) {
593 foreach ( $inputPages as $page ) {
594 $title = Title::newFromText( $page );
595 if ( $title ) {
596 $pageSet[$title->getPrefixedText()] = true;
599 $result = ( clone $queryBuilder )
600 ->where( [
601 'page_namespace' => $title->getNamespace(),
602 'page_title' => $title->getDBkey()
603 ] )
604 ->fetchResultSet();
605
606 foreach ( $result as $row ) {
607 $template = Title::makeTitle( $row->namespace, $row->title );
608 $pageSet[$template->getPrefixedText()] = true;
609 }
610 }
611 }
612
613 return $pageSet;
614 }
615
616 protected function getGroupName() {
617 return 'pagetools';
618 }
619}
620
622class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:65
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Factory service for WikiExporter instances.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:206
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages By default the message key is the canonical name of...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, SelectQueryBuilder $queryBuilder)
Expand a list of pages to include items used in those pages.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
Represents a title within MediaWiki.
Definition Title.php:79
Build SELECT queries with a fluent interface.
A title formatter service for MediaWiki.
Provide primary and replica IDatabase connections.