MediaWiki REL1_41
SpecialExport.php
Go to the documentation of this file.
1<?php
26namespace MediaWiki\Specials;
27
28use HTMLForm;
37use WikiExporter;
39
47
48 private IConnectionProvider $dbProvider;
49 private WikiExporterFactory $wikiExporterFactory;
50 private TitleFormatter $titleFormatter;
51 private LinksMigration $linksMigration;
52
59 public function __construct(
60 IConnectionProvider $dbProvider,
61 WikiExporterFactory $wikiExporterFactory,
62 TitleFormatter $titleFormatter,
63 LinksMigration $linksMigration
64 ) {
65 parent::__construct( 'Export' );
66 $this->dbProvider = $dbProvider;
67 $this->wikiExporterFactory = $wikiExporterFactory;
68 $this->titleFormatter = $titleFormatter;
69 $this->linksMigration = $linksMigration;
70 }
71
72 public function execute( $par ) {
73 $this->setHeaders();
74 $this->outputHeader();
75 $config = $this->getConfig();
76
77 $this->curonly = true;
78 $this->doExport = false;
79 $request = $this->getRequest();
80 $this->templates = $request->getCheck( 'templates' );
81 $this->pageLinkDepth = $this->validateLinkDepth(
82 $request->getIntOrNull( 'pagelink-depth' )
83 );
84 $nsindex = '';
85 $exportall = false;
86
87 if ( $request->getCheck( 'addcat' ) ) {
88 $page = $request->getText( 'pages' );
89 $catname = $request->getText( 'catname' );
90
91 if ( $catname !== '' && $catname !== null && $catname !== false ) {
92 $t = Title::makeTitleSafe( NS_MAIN, $catname );
93 if ( $t ) {
99 $catpages = $this->getPagesFromCategory( $t );
100 if ( $catpages ) {
101 if ( $page !== '' ) {
102 $page .= "\n";
103 }
104 $page .= implode( "\n", $catpages );
105 }
106 }
107 }
108 } elseif ( $request->getCheck( 'addns' ) &&
109 $config->get( MainConfigNames::ExportFromNamespaces ) ) {
110 $page = $request->getText( 'pages' );
111 $nsindex = $request->getText( 'nsindex', '' );
112
113 if ( strval( $nsindex ) !== '' ) {
117 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
118 if ( $nspages ) {
119 $page .= "\n" . implode( "\n", $nspages );
120 }
121 }
122 } elseif ( $request->getCheck( 'exportall' ) &&
123 $config->get( MainConfigNames::ExportAllowAll ) ) {
124 $this->doExport = true;
125 $exportall = true;
126
127 /* Although $page and $history are not used later on, we
128 nevertheless set them to avoid that PHP notices about using
129 undefined variables foul up our XML output (see call to
130 doExport(...) further down) */
131 $page = '';
132 $history = '';
133 } elseif ( $request->wasPosted() && $par == '' ) {
134 // Log to see if certain parameters are actually used.
135 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
136 LoggerFactory::getInstance( 'export' )->debug(
137 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
138 'dir' => $request->getRawVal( 'dir' ),
139 'offset' => $request->getRawVal( 'offset' ),
140 'limit' => $request->getRawVal( 'limit' ),
141 ] );
142
143 $page = $request->getText( 'pages' );
144 $this->curonly = $request->getCheck( 'curonly' );
145 $rawOffset = $request->getVal( 'offset' );
146
147 if ( $rawOffset ) {
148 $offset = wfTimestamp( TS_MW, $rawOffset );
149 } else {
150 $offset = null;
151 }
152
153 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
154 $limit = $request->getInt( 'limit' );
155 $dir = $request->getVal( 'dir' );
156 $history = [
157 'dir' => 'asc',
158 'offset' => false,
159 'limit' => $maxHistory,
160 ];
161 $historyCheck = $request->getCheck( 'history' );
162
163 if ( $this->curonly ) {
164 $history = WikiExporter::CURRENT;
165 } elseif ( !$historyCheck ) {
166 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
167 $history['limit'] = $limit;
168 }
169
170 if ( $offset !== null ) {
171 $history['offset'] = $offset;
172 }
173
174 if ( strtolower( $dir ?? '' ) == 'desc' ) {
175 $history['dir'] = 'desc';
176 }
177 }
178
179 if ( $page != '' ) {
180 $this->doExport = true;
181 }
182 } else {
183 // Default to current-only for GET requests.
184 $page = $request->getText( 'pages', $par ?? '' );
185 $historyCheck = $request->getCheck( 'history' );
186
187 if ( $historyCheck ) {
188 $history = WikiExporter::FULL;
189 } else {
190 $history = WikiExporter::CURRENT;
191 }
192
193 if ( $page != '' ) {
194 $this->doExport = true;
195 }
196 }
197
198 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
199 // Override
200 $history = WikiExporter::CURRENT;
201 }
202
203 $list_authors = $request->getCheck( 'listauthors' );
204 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
205 $list_authors = false;
206 }
207
208 if ( $this->doExport ) {
209 $this->getOutput()->disable();
210
211 // Cancel output buffering and gzipping if set
212 // This should provide safer streaming for pages with history
214 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
215 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
216
217 if ( $request->getCheck( 'wpDownload' ) ) {
218 // Provide a sensible filename suggestion
219 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
220 wfTimestampNow() . '.xml' );
221 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
222 }
223
224 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
225 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
226 $this->doExport( $page, $history, $list_authors, $exportall );
227
228 return;
229 }
230
231 $out = $this->getOutput();
232 $out->addWikiMsg( 'exporttext' );
233
234 if ( $page == '' ) {
235 $categoryName = $request->getText( 'catname' );
236 } else {
237 $categoryName = '';
238 }
239 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
240 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
241
242 $formDescriptor = [
243 'catname' => [
244 'type' => 'textwithbutton',
245 'name' => 'catname',
246 'horizontal-label' => true,
247 'label-message' => 'export-addcattext',
248 'default' => $categoryName,
249 'size' => 40,
250 'buttontype' => 'submit',
251 'buttonname' => 'addcat',
252 'buttondefault' => $this->msg( 'export-addcat' )->text(),
253 ] + $hideIf,
254 ];
255 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
256 $formDescriptor += [
257 'nsindex' => [
258 'type' => 'namespaceselectwithbutton',
259 'default' => $nsindex,
260 'label-message' => 'export-addnstext',
261 'horizontal-label' => true,
262 'name' => 'nsindex',
263 'id' => 'namespace',
264 'cssclass' => 'namespaceselector',
265 'buttontype' => 'submit',
266 'buttonname' => 'addns',
267 'buttondefault' => $this->msg( 'export-addns' )->text(),
268 ] + $hideIf,
269 ];
270 }
271
272 if ( $canExportAll ) {
273 $formDescriptor += [
274 'exportall' => [
275 'type' => 'check',
276 'label-message' => 'exportall',
277 'name' => 'exportall',
278 'id' => 'exportall',
279 'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
280 ],
281 ];
282 }
283
284 $formDescriptor += [
285 'textarea' => [
286 'class' => HTMLTextAreaField::class,
287 'name' => 'pages',
288 'label-message' => 'export-manual',
289 'nodata' => true,
290 'rows' => 10,
291 'default' => $page,
292 ] + $hideIf,
293 ];
294
295 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
296 $formDescriptor += [
297 'curonly' => [
298 'type' => 'check',
299 'label-message' => 'exportcuronly',
300 'name' => 'curonly',
301 'id' => 'curonly',
302 'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
303 ],
304 ];
305 } else {
306 $out->addWikiMsg( 'exportnohistory' );
307 }
308
309 $formDescriptor += [
310 'templates' => [
311 'type' => 'check',
312 'label-message' => 'export-templates',
313 'name' => 'templates',
314 'id' => 'wpExportTemplates',
315 'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
316 ],
317 ];
318
319 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) ||
320 $this->userCanOverrideExportDepth() ) {
321 $formDescriptor += [
322 'pagelink-depth' => [
323 'type' => 'text',
324 'name' => 'pagelink-depth',
325 'id' => 'pagelink-depth',
326 'label-message' => 'export-pagelinks',
327 'default' => '0',
328 'size' => 20,
329 ],
330 ];
331 }
332
333 $formDescriptor += [
334 'wpDownload' => [
335 'type' => 'check',
336 'name' => 'wpDownload',
337 'id' => 'wpDownload',
338 'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
339 'label-message' => 'export-download',
340 ],
341 ];
342
343 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
344 $formDescriptor += [
345 'listauthors' => [
346 'type' => 'check',
347 'label-message' => 'exportlistauthors',
348 'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
349 'name' => 'listauthors',
350 'id' => 'listauthors',
351 ],
352 ];
353 }
354
355 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
356 $htmlForm->setSubmitTextMsg( 'export-submit' );
357 $htmlForm->prepareForm()->displayForm( false );
358 $this->addHelpLink( 'Help:Export' );
359 }
360
364 protected function userCanOverrideExportDepth() {
365 return $this->getAuthority()->isAllowed( 'override-export-depth' );
366 }
367
377 protected function doExport( $page, $history, $list_authors, $exportall ) {
378 // If we are grabbing everything, enable full history and ignore the rest
379 if ( $exportall ) {
380 $history = WikiExporter::FULL;
381 } else {
382 $pageSet = []; // Inverted index of all pages to look up
383
384 // Split up and normalize input
385 foreach ( explode( "\n", $page ) as $pageName ) {
386 $pageName = trim( $pageName );
387 $title = Title::newFromText( $pageName );
388 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
389 // Only record each page once!
390 $pageSet[$title->getPrefixedText()] = true;
391 }
392 }
393
394 // Set of original pages to pass on to further manipulation...
395 $inputPages = array_keys( $pageSet );
396
397 // Look up any linked pages if asked...
398 if ( $this->templates ) {
399 $pageSet = $this->getTemplates( $inputPages, $pageSet );
400 }
401 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
402 $linkDepth = $this->pageLinkDepth;
403 if ( $linkDepth ) {
404 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
405 }
406
407 $pages = array_keys( $pageSet );
408
409 // Normalize titles to the same format and remove dupes, see T19374
410 foreach ( $pages as $k => $v ) {
411 $pages[$k] = str_replace( ' ', '_', $v );
412 }
413
414 $pages = array_unique( $pages );
415 }
416
417 /* Ok, let's get to it... */
418 $db = $this->dbProvider->getReplicaDatabase();
419
420 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
421 $exporter->list_authors = $list_authors;
422 $exporter->openStream();
423
424 if ( $exportall ) {
425 $exporter->allPages();
426 } else {
427 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
428 foreach ( $pages as $page ) {
429 # T10824: Only export pages the user can read
430 $title = Title::newFromText( $page );
431 if ( $title === null ) {
432 // @todo Perhaps output an <error> tag or something.
433 continue;
434 }
435
436 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
437 // @todo Perhaps output an <error> tag or something.
438 continue;
439 }
440
441 $exporter->pageByTitle( $title );
442 }
443 }
444
445 $exporter->closeStream();
446 }
447
452 protected function getPagesFromCategory( $title ) {
453 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
454
455 $name = $title->getDBkey();
456
457 $dbr = $this->dbProvider->getReplicaDatabase();
458 $res = $dbr->newSelectQueryBuilder()
459 ->select( [ 'page_namespace', 'page_title' ] )
460 ->from( 'page' )
461 ->join( 'categorylinks', null, 'cl_from=page_id' )
462 ->where( [ 'cl_to' => $name ] )
463 ->limit( $maxPages )
464 ->caller( __METHOD__ )->fetchResultSet();
465
466 $pages = [];
467
468 foreach ( $res as $row ) {
469 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470 }
471
472 return $pages;
473 }
474
479 protected function getPagesFromNamespace( $nsindex ) {
480 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481
482 $dbr = $this->dbProvider->getReplicaDatabase();
483 $res = $dbr->newSelectQueryBuilder()
484 ->select( [ 'page_namespace', 'page_title' ] )
485 ->from( 'page' )
486 ->where( [ 'page_namespace' => $nsindex ] )
487 ->limit( $maxPages )
488 ->caller( __METHOD__ )->fetchResultSet();
489
490 $pages = [];
491
492 foreach ( $res as $row ) {
493 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
494 }
495
496 return $pages;
497 }
498
505 protected function getTemplates( $inputPages, $pageSet ) {
506 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
507 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
508 return $this->getLinks( $inputPages, $pageSet,
509 $queryInfo['tables'],
510 [ 'namespace' => $nsField, 'title' => $titleField ],
511 array_merge(
512 [ 'templatelinks' => [ 'JOIN', [ 'page_id=tl_from' ] ] ],
513 $queryInfo['joins']
514 )
515 );
516 }
517
524 private function getExtraPages( $inputPages, $pageSet ) {
525 $extraPages = [];
526 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
527 foreach ( $extraPages as $extraPage ) {
528 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
529 }
530 return $pageSet;
531 }
532
538 protected function validateLinkDepth( $depth ) {
539 if ( $depth === null || $depth < 0 ) {
540 return 0;
541 }
542
543 if ( !$this->userCanOverrideExportDepth() ) {
544 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
545 if ( $depth > $maxLinkDepth ) {
546 return $maxLinkDepth;
547 }
548 }
549
550 /*
551 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
552 * crazy-big export from being done by someone setting the depth
553 * number too high. In other words, last resort safety net.
554 */
555
556 return intval( min( $depth, 5 ) );
557 }
558
566 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
567 for ( ; $depth > 0; --$depth ) {
568 [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'pagelinks' );
569 $queryInfo = $this->linksMigration->getQueryInfo( 'pagelinks' );
570 $pageSet = $this->getLinks(
571 $inputPages, $pageSet, $queryInfo['tables'],
572 [ 'namespace' => $nsField, 'title' => $titleField ],
573 array_merge( [ 'pagelinks' => [ 'JOIN', [ 'page_id=pl_from' ] ] ], $queryInfo['joins'] )
574 );
575 $inputPages = array_keys( $pageSet );
576 }
577
578 return $pageSet;
579 }
580
590 protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
591 $dbr = $this->dbProvider->getReplicaDatabase();
592 $table[] = 'page';
593
594 foreach ( $inputPages as $page ) {
595 $title = Title::newFromText( $page );
596 if ( $title ) {
597 $pageSet[$title->getPrefixedText()] = true;
600 $result = $dbr->select(
601 $table,
602 $fields,
603 [
604 'page_namespace' => $title->getNamespace(),
605 'page_title' => $title->getDBkey()
606 ],
607 __METHOD__,
608 [],
609 $join
610 );
611
612 foreach ( $result as $row ) {
613 $template = Title::makeTitle( $row->namespace, $row->title );
614 $pageSet[$template->getPrefixedText()] = true;
615 }
616 }
617 }
618
619 return $pageSet;
620 }
621
622 protected function getGroupName() {
623 return 'pagetools';
624 }
625}
626
630class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition HTMLForm.php:158
Factory service for WikiExporter instances.
Service for compat reading of links tables.
Create PSR-3 logger objects.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
Represents a title within MediaWiki.
Definition Title.php:76
A title formatter service for MediaWiki.
Provide primary and replica IDatabase connections.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...