MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
29 
35 class SpecialExport extends SpecialPage {
37 
39  private $permManager;
40 
42  private $loadBalancer;
43 
48  public function __construct(
51  ) {
52  parent::__construct( 'Export' );
53  $this->permManager = $permManager;
54  $this->loadBalancer = $loadBalancer;
55  }
56 
57  public function execute( $par ) {
58  $this->setHeaders();
59  $this->outputHeader();
60  $config = $this->getConfig();
61 
62  // Set some variables
63  $this->curonly = true;
64  $this->doExport = false;
65  $request = $this->getRequest();
66  $this->templates = $request->getCheck( 'templates' );
67  $this->pageLinkDepth = $this->validateLinkDepth(
68  $request->getIntOrNull( 'pagelink-depth' )
69  );
70  $nsindex = '';
71  $exportall = false;
72 
73  if ( $request->getCheck( 'addcat' ) ) {
74  $page = $request->getText( 'pages' );
75  $catname = $request->getText( 'catname' );
76 
77  if ( $catname !== '' && $catname !== null && $catname !== false ) {
78  $t = Title::makeTitleSafe( NS_MAIN, $catname );
79  if ( $t ) {
85  $catpages = $this->getPagesFromCategory( $t );
86  if ( $catpages ) {
87  if ( $page !== '' ) {
88  $page .= "\n";
89  }
90  $page .= implode( "\n", $catpages );
91  }
92  }
93  }
94  } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
95  $page = $request->getText( 'pages' );
96  $nsindex = $request->getText( 'nsindex', '' );
97 
98  if ( strval( $nsindex ) !== '' ) {
102  $nspages = $this->getPagesFromNamespace( $nsindex );
103  if ( $nspages ) {
104  $page .= "\n" . implode( "\n", $nspages );
105  }
106  }
107  } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
108  $this->doExport = true;
109  $exportall = true;
110 
111  /* Although $page and $history are not used later on, we
112  nevertheless set them to avoid that PHP notices about using
113  undefined variables foul up our XML output (see call to
114  doExport(...) further down) */
115  $page = '';
116  $history = '';
117  } elseif ( $request->wasPosted() && $par == '' ) {
118  // Log to see if certain parameters are actually used.
119  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
120  LoggerFactory::getInstance( 'export' )->debug(
121  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
122  'dir' => $request->getRawVal( 'dir' ),
123  'offset' => $request->getRawVal( 'offset' ),
124  'limit' => $request->getRawVal( 'limit' ),
125  ] );
126 
127  $page = $request->getText( 'pages' );
128  $this->curonly = $request->getCheck( 'curonly' );
129  $rawOffset = $request->getVal( 'offset' );
130 
131  if ( $rawOffset ) {
132  $offset = wfTimestamp( TS_MW, $rawOffset );
133  } else {
134  $offset = null;
135  }
136 
137  $maxHistory = $config->get( 'ExportMaxHistory' );
138  $limit = $request->getInt( 'limit' );
139  $dir = $request->getVal( 'dir' );
140  $history = [
141  'dir' => 'asc',
142  'offset' => false,
143  'limit' => $maxHistory,
144  ];
145  $historyCheck = $request->getCheck( 'history' );
146 
147  if ( $this->curonly ) {
148  $history = WikiExporter::CURRENT;
149  } elseif ( !$historyCheck ) {
150  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
151  $history['limit'] = $limit;
152  }
153 
154  if ( $offset !== null ) {
155  $history['offset'] = $offset;
156  }
157 
158  if ( strtolower( $dir ) == 'desc' ) {
159  $history['dir'] = 'desc';
160  }
161  }
162 
163  if ( $page != '' ) {
164  $this->doExport = true;
165  }
166  } else {
167  // Default to current-only for GET requests.
168  $page = $request->getText( 'pages', $par );
169  $historyCheck = $request->getCheck( 'history' );
170 
171  if ( $historyCheck ) {
172  $history = WikiExporter::FULL;
173  } else {
174  $history = WikiExporter::CURRENT;
175  }
176 
177  if ( $page != '' ) {
178  $this->doExport = true;
179  }
180  }
181 
182  if ( !$config->get( 'ExportAllowHistory' ) ) {
183  // Override
184  $history = WikiExporter::CURRENT;
185  }
186 
187  $list_authors = $request->getCheck( 'listauthors' );
188  if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
189  $list_authors = false;
190  }
191 
192  if ( $this->doExport ) {
193  $this->getOutput()->disable();
194 
195  // Cancel output buffering and gzipping if set
196  // This should provide safer streaming for pages with history
198  $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
199  $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
200 
201  if ( $request->getCheck( 'wpDownload' ) ) {
202  // Provide a sane filename suggestion
203  $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
204  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
205  }
206 
207  $this->doExport( $page, $history, $list_authors, $exportall );
208 
209  return;
210  }
211 
212  $out = $this->getOutput();
213  $out->addWikiMsg( 'exporttext' );
214 
215  if ( $page == '' ) {
216  $categoryName = $request->getText( 'catname' );
217  } else {
218  $categoryName = '';
219  }
220 
221  $formDescriptor = [
222  'catname' => [
223  'type' => 'textwithbutton',
224  'name' => 'catname',
225  'horizontal-label' => true,
226  'label-message' => 'export-addcattext',
227  'default' => $categoryName,
228  'size' => 40,
229  'buttontype' => 'submit',
230  'buttonname' => 'addcat',
231  'buttondefault' => $this->msg( 'export-addcat' )->text(),
232  'hide-if' => [ '===', 'exportall', '1' ],
233  ],
234  ];
235  if ( $config->get( 'ExportFromNamespaces' ) ) {
236  $formDescriptor += [
237  'nsindex' => [
238  'type' => 'namespaceselectwithbutton',
239  'default' => $nsindex,
240  'label-message' => 'export-addnstext',
241  'horizontal-label' => true,
242  'name' => 'nsindex',
243  'id' => 'namespace',
244  'cssclass' => 'namespaceselector',
245  'buttontype' => 'submit',
246  'buttonname' => 'addns',
247  'buttondefault' => $this->msg( 'export-addns' )->text(),
248  'hide-if' => [ '===', 'exportall', '1' ],
249  ],
250  ];
251  }
252 
253  if ( $config->get( 'ExportAllowAll' ) ) {
254  $formDescriptor += [
255  'exportall' => [
256  'type' => 'check',
257  'label-message' => 'exportall',
258  'name' => 'exportall',
259  'id' => 'exportall',
260  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
261  ],
262  ];
263  }
264 
265  $formDescriptor += [
266  'textarea' => [
267  'class' => HTMLTextAreaField::class,
268  'name' => 'pages',
269  'label-message' => 'export-manual',
270  'nodata' => true,
271  'rows' => 10,
272  'default' => $page,
273  'hide-if' => [ '===', 'exportall', '1' ],
274  ],
275  ];
276 
277  if ( $config->get( 'ExportAllowHistory' ) ) {
278  $formDescriptor += [
279  'curonly' => [
280  'type' => 'check',
281  'label-message' => 'exportcuronly',
282  'name' => 'curonly',
283  'id' => 'curonly',
284  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
285  ],
286  ];
287  } else {
288  $out->addWikiMsg( 'exportnohistory' );
289  }
290 
291  $formDescriptor += [
292  'templates' => [
293  'type' => 'check',
294  'label-message' => 'export-templates',
295  'name' => 'templates',
296  'id' => 'wpExportTemplates',
297  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
298  ],
299  ];
300 
301  if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
302  $formDescriptor += [
303  'pagelink-depth' => [
304  'type' => 'text',
305  'name' => 'pagelink-depth',
306  'id' => 'pagelink-depth',
307  'label-message' => 'export-pagelinks',
308  'default' => '0',
309  'size' => 20,
310  ],
311  ];
312  }
313 
314  $formDescriptor += [
315  'wpDownload' => [
316  'type' => 'check',
317  'name' => 'wpDownload',
318  'id' => 'wpDownload',
319  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
320  'label-message' => 'export-download',
321  ],
322  ];
323 
324  if ( $config->get( 'ExportAllowListContributors' ) ) {
325  $formDescriptor += [
326  'listauthors' => [
327  'type' => 'check',
328  'label-message' => 'exportlistauthors',
329  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
330  'name' => 'listauthors',
331  'id' => 'listauthors',
332  ],
333  ];
334  }
335 
336  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
337  $htmlForm->setSubmitTextMsg( 'export-submit' );
338  $htmlForm->prepareForm()->displayForm( false );
339  $this->addHelpLink( 'Help:Export' );
340  }
341 
345  protected function userCanOverrideExportDepth() {
346  return $this->permManager->userHasRight( $this->getUser(), 'override-export-depth' );
347  }
348 
358  protected function doExport( $page, $history, $list_authors, $exportall ) {
359  // If we are grabbing everything, enable full history and ignore the rest
360  if ( $exportall ) {
361  $history = WikiExporter::FULL;
362  } else {
363  $pageSet = []; // Inverted index of all pages to look up
364 
365  // Split up and normalize input
366  foreach ( explode( "\n", $page ) as $pageName ) {
367  $pageName = trim( $pageName );
368  $title = Title::newFromText( $pageName );
369  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
370  // Only record each page once!
371  $pageSet[$title->getPrefixedText()] = true;
372  }
373  }
374 
375  // Set of original pages to pass on to further manipulation...
376  $inputPages = array_keys( $pageSet );
377 
378  // Look up any linked pages if asked...
379  if ( $this->templates ) {
380  $pageSet = $this->getTemplates( $inputPages, $pageSet );
381  }
382  $linkDepth = $this->pageLinkDepth;
383  if ( $linkDepth ) {
384  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
385  }
386 
387  $pages = array_keys( $pageSet );
388 
389  // Normalize titles to the same format and remove dupes, see T19374
390  foreach ( $pages as $k => $v ) {
391  $pages[$k] = str_replace( ' ', '_', $v );
392  }
393 
394  $pages = array_unique( $pages );
395  }
396 
397  /* Ok, let's get to it... */
398  $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
399 
400  $exporter = new WikiExporter( $db, $history );
401  $exporter->list_authors = $list_authors;
402  $exporter->openStream();
403 
404  if ( $exportall ) {
405  $exporter->allPages();
406  } else {
407  foreach ( $pages as $page ) {
408  # T10824: Only export pages the user can read
409  $title = Title::newFromText( $page );
410  if ( $title === null ) {
411  // @todo Perhaps output an <error> tag or something.
412  continue;
413  }
414 
415  if ( !$this->permManager->userCan( 'read', $this->getUser(), $title ) ) {
416  // @todo Perhaps output an <error> tag or something.
417  continue;
418  }
419 
420  $exporter->pageByTitle( $title );
421  }
422  }
423 
424  $exporter->closeStream();
425  }
426 
431  protected function getPagesFromCategory( $title ) {
432  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
433 
434  $name = $title->getDBkey();
435 
436  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
437  $res = $dbr->select(
438  [ 'page', 'categorylinks' ],
439  [ 'page_namespace', 'page_title' ],
440  [ 'cl_from=page_id', 'cl_to' => $name ],
441  __METHOD__,
442  [ 'LIMIT' => $maxPages ]
443  );
444 
445  $pages = [];
446 
447  foreach ( $res as $row ) {
448  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
449  }
450 
451  return $pages;
452  }
453 
458  protected function getPagesFromNamespace( $nsindex ) {
459  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
460 
461  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
462  $res = $dbr->select(
463  'page',
464  [ 'page_namespace', 'page_title' ],
465  [ 'page_namespace' => $nsindex ],
466  __METHOD__,
467  [ 'LIMIT' => $maxPages ]
468  );
469 
470  $pages = [];
471 
472  foreach ( $res as $row ) {
473  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
474  }
475 
476  return $pages;
477  }
478 
485  protected function getTemplates( $inputPages, $pageSet ) {
486  return $this->getLinks( $inputPages, $pageSet,
487  'templatelinks',
488  [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
489  [ 'page_id=tl_from' ]
490  );
491  }
492 
498  protected function validateLinkDepth( $depth ) {
499  if ( $depth < 0 ) {
500  return 0;
501  }
502 
503  if ( !$this->userCanOverrideExportDepth() ) {
504  $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
505  if ( $depth > $maxLinkDepth ) {
506  return $maxLinkDepth;
507  }
508  }
509 
510  /*
511  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
512  * crazy-big export from being done by someone setting the depth
513  * number too high. In other words, last resort safety net.
514  */
515 
516  return intval( min( $depth, 5 ) );
517  }
518 
526  protected function getPageLinks( $inputPages, $pageSet, $depth ) {
527  for ( ; $depth > 0; --$depth ) {
528  $pageSet = $this->getLinks(
529  $inputPages, $pageSet, 'pagelinks',
530  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
531  [ 'page_id=pl_from' ]
532  );
533  $inputPages = array_keys( $pageSet );
534  }
535 
536  return $pageSet;
537  }
538 
548  protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
549  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
550 
551  foreach ( $inputPages as $page ) {
552  $title = Title::newFromText( $page );
553 
554  if ( $title ) {
555  $pageSet[$title->getPrefixedText()] = true;
558  $result = $dbr->select(
559  [ 'page', $table ],
560  $fields,
561  array_merge(
562  $join,
563  [
564  'page_namespace' => $title->getNamespace(),
565  'page_title' => $title->getDBkey()
566  ]
567  ),
568  __METHOD__
569  );
570 
571  foreach ( $result as $row ) {
572  $template = Title::makeTitle( $row->namespace, $row->title );
573  $pageSet[$template->getPrefixedText()] = true;
574  }
575  }
576  }
577 
578  return $pageSet;
579  }
580 
581  protected function getGroupName() {
582  return 'pagetools';
583  }
584 }
SpecialPage\msg
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
Definition: SpecialPage.php:900
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:328
SpecialExport\getLinks
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
Definition: SpecialExport.php:548
wfResetOutputBuffers
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Definition: GlobalFunctions.php:1642
Title\makeName
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:822
SpecialExport
A special page that allows users to export pages in a XML file.
Definition: SpecialExport.php:35
SpecialPage\getOutput
getOutput()
Get the OutputPage being used for this instance.
Definition: SpecialPage.php:788
SpecialExport\execute
execute( $par)
Default execute method Checks user permissions.
Definition: SpecialExport.php:57
SpecialExport\$loadBalancer
ILoadBalancer $loadBalancer
Definition: SpecialExport.php:42
WikiExporter\CURRENT
const CURRENT
Definition: WikiExporter.php:54
SpecialExport\getTemplates
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
Definition: SpecialExport.php:485
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1815
SpecialExport\doExport
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
Definition: SpecialExport.php:358
SpecialExport\$curonly
$curonly
Definition: SpecialExport.php:36
SpecialExport\$templates
$templates
Definition: SpecialExport.php:36
$res
$res
Definition: testCompression.php:57
$dbr
$dbr
Definition: testCompression.php:54
SpecialExport\getGroupName
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
Definition: SpecialExport.php:581
NS_MAIN
const NS_MAIN
Definition: Defines.php:69
SpecialPage\addHelpLink
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
Definition: SpecialPage.php:936
SpecialExport\getPagesFromCategory
getPagesFromCategory( $title)
Definition: SpecialExport.php:431
SpecialExport\$doExport
$doExport
Definition: SpecialExport.php:36
SpecialPage\getConfig
getConfig()
Shortcut to get main config object.
Definition: SpecialPage.php:866
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
SpecialExport\getPageLinks
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
Definition: SpecialExport.php:526
$title
$title
Definition: testCompression.php:38
SpecialPage\setHeaders
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!...
Definition: SpecialPage.php:616
SpecialPage\getUser
getUser()
Shortcut to get the User executing this instance.
Definition: SpecialPage.php:798
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:591
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1844
WikiExporter
Definition: WikiExporter.php:40
SpecialPage\getContext
getContext()
Gets the context this SpecialPage is executed in.
Definition: SpecialPage.php:762
SpecialExport\$pageLinkDepth
$pageLinkDepth
Definition: SpecialExport.php:36
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:617
SpecialExport\__construct
__construct(PermissionManager $permManager, ILoadBalancer $loadBalancer)
Definition: SpecialExport.php:48
MediaWiki\Permissions\PermissionManager
A service class for checking permissions To obtain an instance, use MediaWikiServices::getInstance()-...
Definition: PermissionManager.php:50
SpecialPage
Parent class for all special pages.
Definition: SpecialPage.php:42
SpecialPage\getRequest
getRequest()
Get the WebRequest being used for this instance.
Definition: SpecialPage.php:778
WikiExporter\FULL
const FULL
Definition: WikiExporter.php:53
SpecialExport\validateLinkDepth
validateLinkDepth( $depth)
Validate link depth setting, if available.
Definition: SpecialExport.php:498
SpecialExport\getPagesFromNamespace
getPagesFromNamespace( $nsindex)
Definition: SpecialExport.php:458
SpecialExport\$permManager
PermissionManager $permManager
Definition: SpecialExport.php:39
$t
$t
Definition: testCompression.php:74
HTMLForm\factory
static factory( $displayFormat,... $arguments)
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:316
SpecialPage\outputHeader
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
Definition: SpecialPage.php:707
SpecialExport\userCanOverrideExportDepth
userCanOverrideExportDepth()
Definition: SpecialExport.php:345
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81