MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
29 
35 class SpecialExport extends SpecialPage {
37 
39  private $loadBalancer;
40 
43 
45  private $titleFormatter;
46 
52  public function __construct(
56  ) {
57  parent::__construct( 'Export' );
58  $this->loadBalancer = $loadBalancer;
59  $this->wikiExporterFactory = $wikiExporterFactory;
60  $this->titleFormatter = $titleFormatter;
61  }
62 
63  public function execute( $par ) {
64  $this->setHeaders();
65  $this->outputHeader();
66  $config = $this->getConfig();
67 
68  // Set some variables
69  $this->curonly = true;
70  $this->doExport = false;
71  $request = $this->getRequest();
72  $this->templates = $request->getCheck( 'templates' );
73  $this->pageLinkDepth = $this->validateLinkDepth(
74  $request->getIntOrNull( 'pagelink-depth' )
75  );
76  $nsindex = '';
77  $exportall = false;
78 
79  if ( $request->getCheck( 'addcat' ) ) {
80  $page = $request->getText( 'pages' );
81  $catname = $request->getText( 'catname' );
82 
83  if ( $catname !== '' && $catname !== null && $catname !== false ) {
84  $t = Title::makeTitleSafe( NS_MAIN, $catname );
85  if ( $t ) {
91  $catpages = $this->getPagesFromCategory( $t );
92  if ( $catpages ) {
93  if ( $page !== '' ) {
94  $page .= "\n";
95  }
96  $page .= implode( "\n", $catpages );
97  }
98  }
99  }
100  } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
101  $page = $request->getText( 'pages' );
102  $nsindex = $request->getText( 'nsindex', '' );
103 
104  if ( strval( $nsindex ) !== '' ) {
108  $nspages = $this->getPagesFromNamespace( $nsindex );
109  if ( $nspages ) {
110  $page .= "\n" . implode( "\n", $nspages );
111  }
112  }
113  } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
114  $this->doExport = true;
115  $exportall = true;
116 
117  /* Although $page and $history are not used later on, we
118  nevertheless set them to avoid that PHP notices about using
119  undefined variables foul up our XML output (see call to
120  doExport(...) further down) */
121  $page = '';
122  $history = '';
123  } elseif ( $request->wasPosted() && $par == '' ) {
124  // Log to see if certain parameters are actually used.
125  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
126  LoggerFactory::getInstance( 'export' )->debug(
127  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
128  'dir' => $request->getRawVal( 'dir' ),
129  'offset' => $request->getRawVal( 'offset' ),
130  'limit' => $request->getRawVal( 'limit' ),
131  ] );
132 
133  $page = $request->getText( 'pages' );
134  $this->curonly = $request->getCheck( 'curonly' );
135  $rawOffset = $request->getVal( 'offset' );
136 
137  if ( $rawOffset ) {
138  $offset = wfTimestamp( TS_MW, $rawOffset );
139  } else {
140  $offset = null;
141  }
142 
143  $maxHistory = $config->get( 'ExportMaxHistory' );
144  $limit = $request->getInt( 'limit' );
145  $dir = $request->getVal( 'dir' );
146  $history = [
147  'dir' => 'asc',
148  'offset' => false,
149  'limit' => $maxHistory,
150  ];
151  $historyCheck = $request->getCheck( 'history' );
152 
153  if ( $this->curonly ) {
154  $history = WikiExporter::CURRENT;
155  } elseif ( !$historyCheck ) {
156  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
157  $history['limit'] = $limit;
158  }
159 
160  if ( $offset !== null ) {
161  $history['offset'] = $offset;
162  }
163 
164  if ( strtolower( $dir ) == 'desc' ) {
165  $history['dir'] = 'desc';
166  }
167  }
168 
169  if ( $page != '' ) {
170  $this->doExport = true;
171  }
172  } else {
173  // Default to current-only for GET requests.
174  $page = $request->getText( 'pages', $par );
175  $historyCheck = $request->getCheck( 'history' );
176 
177  if ( $historyCheck ) {
178  $history = WikiExporter::FULL;
179  } else {
180  $history = WikiExporter::CURRENT;
181  }
182 
183  if ( $page != '' ) {
184  $this->doExport = true;
185  }
186  }
187 
188  if ( !$config->get( 'ExportAllowHistory' ) ) {
189  // Override
190  $history = WikiExporter::CURRENT;
191  }
192 
193  $list_authors = $request->getCheck( 'listauthors' );
194  if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
195  $list_authors = false;
196  }
197 
198  if ( $this->doExport ) {
199  $this->getOutput()->disable();
200 
201  // Cancel output buffering and gzipping if set
202  // This should provide safer streaming for pages with history
204  $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
205  $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
206 
207  if ( $request->getCheck( 'wpDownload' ) ) {
208  // Provide a sensible filename suggestion
209  $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
210  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
211  }
212 
213  $this->doExport( $page, $history, $list_authors, $exportall );
214 
215  return;
216  }
217 
218  $out = $this->getOutput();
219  $out->addWikiMsg( 'exporttext' );
220 
221  if ( $page == '' ) {
222  $categoryName = $request->getText( 'catname' );
223  } else {
224  $categoryName = '';
225  }
226 
227  $formDescriptor = [
228  'catname' => [
229  'type' => 'textwithbutton',
230  'name' => 'catname',
231  'horizontal-label' => true,
232  'label-message' => 'export-addcattext',
233  'default' => $categoryName,
234  'size' => 40,
235  'buttontype' => 'submit',
236  'buttonname' => 'addcat',
237  'buttondefault' => $this->msg( 'export-addcat' )->text(),
238  'hide-if' => [ '===', 'exportall', '1' ],
239  ],
240  ];
241  if ( $config->get( 'ExportFromNamespaces' ) ) {
242  $formDescriptor += [
243  'nsindex' => [
244  'type' => 'namespaceselectwithbutton',
245  'default' => $nsindex,
246  'label-message' => 'export-addnstext',
247  'horizontal-label' => true,
248  'name' => 'nsindex',
249  'id' => 'namespace',
250  'cssclass' => 'namespaceselector',
251  'buttontype' => 'submit',
252  'buttonname' => 'addns',
253  'buttondefault' => $this->msg( 'export-addns' )->text(),
254  'hide-if' => [ '===', 'exportall', '1' ],
255  ],
256  ];
257  }
258 
259  if ( $config->get( 'ExportAllowAll' ) ) {
260  $formDescriptor += [
261  'exportall' => [
262  'type' => 'check',
263  'label-message' => 'exportall',
264  'name' => 'exportall',
265  'id' => 'exportall',
266  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
267  ],
268  ];
269  }
270 
271  $formDescriptor += [
272  'textarea' => [
273  'class' => HTMLTextAreaField::class,
274  'name' => 'pages',
275  'label-message' => 'export-manual',
276  'nodata' => true,
277  'rows' => 10,
278  'default' => $page,
279  'hide-if' => [ '===', 'exportall', '1' ],
280  ],
281  ];
282 
283  if ( $config->get( 'ExportAllowHistory' ) ) {
284  $formDescriptor += [
285  'curonly' => [
286  'type' => 'check',
287  'label-message' => 'exportcuronly',
288  'name' => 'curonly',
289  'id' => 'curonly',
290  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
291  ],
292  ];
293  } else {
294  $out->addWikiMsg( 'exportnohistory' );
295  }
296 
297  $formDescriptor += [
298  'templates' => [
299  'type' => 'check',
300  'label-message' => 'export-templates',
301  'name' => 'templates',
302  'id' => 'wpExportTemplates',
303  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
304  ],
305  ];
306 
307  if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
308  $formDescriptor += [
309  'pagelink-depth' => [
310  'type' => 'text',
311  'name' => 'pagelink-depth',
312  'id' => 'pagelink-depth',
313  'label-message' => 'export-pagelinks',
314  'default' => '0',
315  'size' => 20,
316  ],
317  ];
318  }
319 
320  $formDescriptor += [
321  'wpDownload' => [
322  'type' => 'check',
323  'name' => 'wpDownload',
324  'id' => 'wpDownload',
325  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
326  'label-message' => 'export-download',
327  ],
328  ];
329 
330  if ( $config->get( 'ExportAllowListContributors' ) ) {
331  $formDescriptor += [
332  'listauthors' => [
333  'type' => 'check',
334  'label-message' => 'exportlistauthors',
335  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
336  'name' => 'listauthors',
337  'id' => 'listauthors',
338  ],
339  ];
340  }
341 
342  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
343  $htmlForm->setSubmitTextMsg( 'export-submit' );
344  $htmlForm->prepareForm()->displayForm( false );
345  $this->addHelpLink( 'Help:Export' );
346  }
347 
351  protected function userCanOverrideExportDepth() {
352  return $this->getAuthority()->isAllowed( 'override-export-depth' );
353  }
354 
364  protected function doExport( $page, $history, $list_authors, $exportall ) {
365  // If we are grabbing everything, enable full history and ignore the rest
366  if ( $exportall ) {
367  $history = WikiExporter::FULL;
368  } else {
369  $pageSet = []; // Inverted index of all pages to look up
370 
371  // Split up and normalize input
372  foreach ( explode( "\n", $page ) as $pageName ) {
373  $pageName = trim( $pageName );
374  $title = Title::newFromText( $pageName );
375  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
376  // Only record each page once!
377  $pageSet[$title->getPrefixedText()] = true;
378  }
379  }
380 
381  // Set of original pages to pass on to further manipulation...
382  $inputPages = array_keys( $pageSet );
383 
384  // Look up any linked pages if asked...
385  if ( $this->templates ) {
386  $pageSet = $this->getTemplates( $inputPages, $pageSet );
387  }
388  $pageSet = $this->getExtraPages( $inputPages, $pageSet );
389  $linkDepth = $this->pageLinkDepth;
390  if ( $linkDepth ) {
391  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
392  }
393 
394  $pages = array_keys( $pageSet );
395 
396  // Normalize titles to the same format and remove dupes, see T19374
397  foreach ( $pages as $k => $v ) {
398  $pages[$k] = str_replace( ' ', '_', $v );
399  }
400 
401  $pages = array_unique( $pages );
402  }
403 
404  /* Ok, let's get to it... */
405  $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
406 
407  $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
408  $exporter->list_authors = $list_authors;
409  $exporter->openStream();
410 
411  if ( $exportall ) {
412  $exporter->allPages();
413  } else {
414  foreach ( $pages as $page ) {
415  # T10824: Only export pages the user can read
416  $title = Title::newFromText( $page );
417  if ( $title === null ) {
418  // @todo Perhaps output an <error> tag or something.
419  continue;
420  }
421 
422  if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
423  // @todo Perhaps output an <error> tag or something.
424  continue;
425  }
426 
427  $exporter->pageByTitle( $title );
428  }
429  }
430 
431  $exporter->closeStream();
432  }
433 
438  protected function getPagesFromCategory( $title ) {
439  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
440 
441  $name = $title->getDBkey();
442 
443  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
444  $res = $dbr->select(
445  [ 'page', 'categorylinks' ],
446  [ 'page_namespace', 'page_title' ],
447  [ 'cl_from=page_id', 'cl_to' => $name ],
448  __METHOD__,
449  [ 'LIMIT' => $maxPages ]
450  );
451 
452  $pages = [];
453 
454  foreach ( $res as $row ) {
455  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
456  }
457 
458  return $pages;
459  }
460 
465  protected function getPagesFromNamespace( $nsindex ) {
466  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
467 
468  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
469  $res = $dbr->select(
470  'page',
471  [ 'page_namespace', 'page_title' ],
472  [ 'page_namespace' => $nsindex ],
473  __METHOD__,
474  [ 'LIMIT' => $maxPages ]
475  );
476 
477  $pages = [];
478 
479  foreach ( $res as $row ) {
480  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
481  }
482 
483  return $pages;
484  }
485 
492  protected function getTemplates( $inputPages, $pageSet ) {
493  return $this->getLinks( $inputPages, $pageSet,
494  'templatelinks',
495  [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
496  [ 'page_id=tl_from' ]
497  );
498  }
499 
506  private function getExtraPages( $inputPages, $pageSet ) {
507  $extraPages = [];
508  $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
509  foreach ( $extraPages as $extraPage ) {
510  $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
511  }
512  return $pageSet;
513  }
514 
520  protected function validateLinkDepth( $depth ) {
521  if ( $depth < 0 ) {
522  return 0;
523  }
524 
525  if ( !$this->userCanOverrideExportDepth() ) {
526  $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
527  if ( $depth > $maxLinkDepth ) {
528  return $maxLinkDepth;
529  }
530  }
531 
532  /*
533  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
534  * crazy-big export from being done by someone setting the depth
535  * number too high. In other words, last resort safety net.
536  */
537 
538  return intval( min( $depth, 5 ) );
539  }
540 
548  protected function getPageLinks( $inputPages, $pageSet, $depth ) {
549  for ( ; $depth > 0; --$depth ) {
550  $pageSet = $this->getLinks(
551  $inputPages, $pageSet, 'pagelinks',
552  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
553  [ 'page_id=pl_from' ]
554  );
555  $inputPages = array_keys( $pageSet );
556  }
557 
558  return $pageSet;
559  }
560 
570  protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
571  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
572 
573  foreach ( $inputPages as $page ) {
574  $title = Title::newFromText( $page );
575 
576  if ( $title ) {
577  $pageSet[$title->getPrefixedText()] = true;
580  $result = $dbr->select(
581  [ 'page', $table ],
582  $fields,
583  array_merge(
584  $join,
585  [
586  'page_namespace' => $title->getNamespace(),
587  'page_title' => $title->getDBkey()
588  ]
589  ),
590  __METHOD__
591  );
592 
593  foreach ( $result as $row ) {
594  $template = Title::makeTitle( $row->namespace, $row->title );
595  $pageSet[$template->getPrefixedText()] = true;
596  }
597  }
598  }
599 
600  return $pageSet;
601  }
602 
603  protected function getGroupName() {
604  return 'pagetools';
605  }
606 }
SpecialPage\msg
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
Definition: SpecialPage.php:936
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:377
SpecialExport\getLinks
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
Definition: SpecialExport.php:570
wfResetOutputBuffers
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Definition: GlobalFunctions.php:1588
Title\makeName
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:854
SpecialExport
A special page that allows users to export pages in a XML file.
Definition: SpecialExport.php:35
SpecialPage\getOutput
getOutput()
Get the OutputPage being used for this instance.
Definition: SpecialPage.php:814
SpecialExport\execute
execute( $par)
Default execute method Checks user permissions.
Definition: SpecialExport.php:63
SpecialExport\$loadBalancer
ILoadBalancer $loadBalancer
Definition: SpecialExport.php:39
WikiExporter\CURRENT
const CURRENT
Definition: WikiExporter.php:57
SpecialExport\getTemplates
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
Definition: SpecialExport.php:492
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1649
SpecialExport\doExport
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
Definition: SpecialExport.php:364
SpecialPage\getAuthority
getAuthority()
Shortcut to get the Authority executing this instance.
Definition: SpecialPage.php:834
SpecialExport\$curonly
$curonly
Definition: SpecialExport.php:36
SpecialExport\$templates
$templates
Definition: SpecialExport.php:36
$res
$res
Definition: testCompression.php:57
SpecialExport\getExtraPages
getExtraPages( $inputPages, $pageSet)
Add extra pages to the list of pages to export.
Definition: SpecialExport.php:506
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
$dbr
$dbr
Definition: testCompression.php:54
SpecialExport\getGroupName
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
Definition: SpecialExport.php:603
MediaWiki\Export\WikiExporterFactory
Factory service for WikiExporter instances.
Definition: WikiExporterFactory.php:35
SpecialPage\addHelpLink
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
Definition: SpecialPage.php:972
SpecialExport\getPagesFromCategory
getPagesFromCategory( $title)
Definition: SpecialExport.php:438
SpecialExport\$doExport
$doExport
Definition: SpecialExport.php:36
SpecialPage\getHookRunner
getHookRunner()
Definition: SpecialPage.php:1119
SpecialPage\getConfig
getConfig()
Shortcut to get main config object.
Definition: SpecialPage.php:902
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
SpecialExport\getPageLinks
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
Definition: SpecialExport.php:548
$title
$title
Definition: testCompression.php:38
SpecialPage\setHeaders
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
Definition: SpecialPage.php:642
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:648
SpecialExport\$titleFormatter
TitleFormatter $titleFormatter
Definition: SpecialExport.php:45
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1678
SpecialPage\getContext
getContext()
Gets the context this SpecialPage is executed in.
Definition: SpecialPage.php:788
SpecialExport\$pageLinkDepth
$pageLinkDepth
Definition: SpecialExport.php:36
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:674
SpecialExport\__construct
__construct(ILoadBalancer $loadBalancer, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter)
Definition: SpecialExport.php:52
SpecialPage
Parent class for all special pages.
Definition: SpecialPage.php:43
SpecialPage\getRequest
getRequest()
Get the WebRequest being used for this instance.
Definition: SpecialPage.php:804
WikiExporter\FULL
const FULL
Definition: WikiExporter.php:56
SpecialExport\validateLinkDepth
validateLinkDepth( $depth)
Validate link depth setting, if available.
Definition: SpecialExport.php:520
SpecialExport\getPagesFromNamespace
getPagesFromNamespace( $nsindex)
Definition: SpecialExport.php:465
SpecialExport\$wikiExporterFactory
WikiExporterFactory $wikiExporterFactory
Definition: SpecialExport.php:42
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:35
$t
$t
Definition: testCompression.php:74
HTMLForm\factory
static factory( $displayFormat,... $arguments)
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:332
SpecialPage\outputHeader
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
Definition: SpecialPage.php:733
SpecialExport\userCanOverrideExportDepth
userCanOverrideExportDepth()
Definition: SpecialExport.php:351
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81