MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
28 
34 class SpecialExport extends SpecialPage {
36 
37  public function __construct() {
38  parent::__construct( 'Export' );
39  }
40 
41  public function execute( $par ) {
42  $this->setHeaders();
43  $this->outputHeader();
44  $config = $this->getConfig();
45 
46  // Set some variables
47  $this->curonly = true;
48  $this->doExport = false;
49  $request = $this->getRequest();
50  $this->templates = $request->getCheck( 'templates' );
51  $this->pageLinkDepth = $this->validateLinkDepth(
52  $request->getIntOrNull( 'pagelink-depth' )
53  );
54  $nsindex = '';
55  $exportall = false;
56 
57  if ( $request->getCheck( 'addcat' ) ) {
58  $page = $request->getText( 'pages' );
59  $catname = $request->getText( 'catname' );
60 
61  if ( $catname !== '' && $catname !== null && $catname !== false ) {
62  $t = Title::makeTitleSafe( NS_MAIN, $catname );
63  if ( $t ) {
69  $catpages = $this->getPagesFromCategory( $t );
70  if ( $catpages ) {
71  if ( $page !== '' ) {
72  $page .= "\n";
73  }
74  $page .= implode( "\n", $catpages );
75  }
76  }
77  }
78  } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
79  $page = $request->getText( 'pages' );
80  $nsindex = $request->getText( 'nsindex', '' );
81 
82  if ( strval( $nsindex ) !== '' ) {
86  $nspages = $this->getPagesFromNamespace( $nsindex );
87  if ( $nspages ) {
88  $page .= "\n" . implode( "\n", $nspages );
89  }
90  }
91  } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
92  $this->doExport = true;
93  $exportall = true;
94 
95  /* Although $page and $history are not used later on, we
96  nevertheless set them to avoid that PHP notices about using
97  undefined variables foul up our XML output (see call to
98  doExport(...) further down) */
99  $page = '';
100  $history = '';
101  } elseif ( $request->wasPosted() && $par == '' ) {
102  // Log to see if certain parameters are actually used.
103  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
104  LoggerFactory::getInstance( 'export' )->debug(
105  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
106  'dir' => $request->getRawVal( 'dir' ),
107  'offset' => $request->getRawVal( 'offset' ),
108  'limit' => $request->getRawVal( 'limit' ),
109  ] );
110 
111  $page = $request->getText( 'pages' );
112  $this->curonly = $request->getCheck( 'curonly' );
113  $rawOffset = $request->getVal( 'offset' );
114 
115  if ( $rawOffset ) {
116  $offset = wfTimestamp( TS_MW, $rawOffset );
117  } else {
118  $offset = null;
119  }
120 
121  $maxHistory = $config->get( 'ExportMaxHistory' );
122  $limit = $request->getInt( 'limit' );
123  $dir = $request->getVal( 'dir' );
124  $history = [
125  'dir' => 'asc',
126  'offset' => false,
127  'limit' => $maxHistory,
128  ];
129  $historyCheck = $request->getCheck( 'history' );
130 
131  if ( $this->curonly ) {
132  $history = WikiExporter::CURRENT;
133  } elseif ( !$historyCheck ) {
134  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
135  $history['limit'] = $limit;
136  }
137 
138  if ( !is_null( $offset ) ) {
139  $history['offset'] = $offset;
140  }
141 
142  if ( strtolower( $dir ) == 'desc' ) {
143  $history['dir'] = 'desc';
144  }
145  }
146 
147  if ( $page != '' ) {
148  $this->doExport = true;
149  }
150  } else {
151  // Default to current-only for GET requests.
152  $page = $request->getText( 'pages', $par );
153  $historyCheck = $request->getCheck( 'history' );
154 
155  if ( $historyCheck ) {
156  $history = WikiExporter::FULL;
157  } else {
158  $history = WikiExporter::CURRENT;
159  }
160 
161  if ( $page != '' ) {
162  $this->doExport = true;
163  }
164  }
165 
166  if ( !$config->get( 'ExportAllowHistory' ) ) {
167  // Override
168  $history = WikiExporter::CURRENT;
169  }
170 
171  $list_authors = $request->getCheck( 'listauthors' );
172  if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
173  $list_authors = false;
174  }
175 
176  if ( $this->doExport ) {
177  $this->getOutput()->disable();
178 
179  // Cancel output buffering and gzipping if set
180  // This should provide safer streaming for pages with history
182  $request->response()->header( "Content-type: application/xml; charset=utf-8" );
183  $request->response()->header( "X-Robots-Tag: noindex,nofollow" );
184 
185  if ( $request->getCheck( 'wpDownload' ) ) {
186  // Provide a sane filename suggestion
187  $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
188  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
189  }
190 
191  $this->doExport( $page, $history, $list_authors, $exportall );
192 
193  return;
194  }
195 
196  $out = $this->getOutput();
197  $out->addWikiMsg( 'exporttext' );
198 
199  if ( $page == '' ) {
200  $categoryName = $request->getText( 'catname' );
201  } else {
202  $categoryName = '';
203  }
204 
205  $formDescriptor = [
206  'catname' => [
207  'type' => 'textwithbutton',
208  'name' => 'catname',
209  'horizontal-label' => true,
210  'label-message' => 'export-addcattext',
211  'default' => $categoryName,
212  'size' => 40,
213  'buttontype' => 'submit',
214  'buttonname' => 'addcat',
215  'buttondefault' => $this->msg( 'export-addcat' )->text(),
216  'hide-if' => [ '===', 'exportall', '1' ],
217  ],
218  ];
219  if ( $config->get( 'ExportFromNamespaces' ) ) {
220  $formDescriptor += [
221  'nsindex' => [
222  'type' => 'namespaceselectwithbutton',
223  'default' => $nsindex,
224  'label-message' => 'export-addnstext',
225  'horizontal-label' => true,
226  'name' => 'nsindex',
227  'id' => 'namespace',
228  'cssclass' => 'namespaceselector',
229  'buttontype' => 'submit',
230  'buttonname' => 'addns',
231  'buttondefault' => $this->msg( 'export-addns' )->text(),
232  'hide-if' => [ '===', 'exportall', '1' ],
233  ],
234  ];
235  }
236 
237  if ( $config->get( 'ExportAllowAll' ) ) {
238  $formDescriptor += [
239  'exportall' => [
240  'type' => 'check',
241  'label-message' => 'exportall',
242  'name' => 'exportall',
243  'id' => 'exportall',
244  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
245  ],
246  ];
247  }
248 
249  $formDescriptor += [
250  'textarea' => [
251  'class' => HTMLTextAreaField::class,
252  'name' => 'pages',
253  'label-message' => 'export-manual',
254  'nodata' => true,
255  'rows' => 10,
256  'default' => $page,
257  'hide-if' => [ '===', 'exportall', '1' ],
258  ],
259  ];
260 
261  if ( $config->get( 'ExportAllowHistory' ) ) {
262  $formDescriptor += [
263  'curonly' => [
264  'type' => 'check',
265  'label-message' => 'exportcuronly',
266  'name' => 'curonly',
267  'id' => 'curonly',
268  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
269  ],
270  ];
271  } else {
272  $out->addWikiMsg( 'exportnohistory' );
273  }
274 
275  $formDescriptor += [
276  'templates' => [
277  'type' => 'check',
278  'label-message' => 'export-templates',
279  'name' => 'templates',
280  'id' => 'wpExportTemplates',
281  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
282  ],
283  ];
284 
285  if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
286  $formDescriptor += [
287  'pagelink-depth' => [
288  'type' => 'text',
289  'name' => 'pagelink-depth',
290  'id' => 'pagelink-depth',
291  'label-message' => 'export-pagelinks',
292  'default' => '0',
293  'size' => 20,
294  ],
295  ];
296  }
297 
298  $formDescriptor += [
299  'wpDownload' => [
300  'type' => 'check',
301  'name' => 'wpDownload',
302  'id' => 'wpDownload',
303  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
304  'label-message' => 'export-download',
305  ],
306  ];
307 
308  if ( $config->get( 'ExportAllowListContributors' ) ) {
309  $formDescriptor += [
310  'listauthors' => [
311  'type' => 'check',
312  'label-message' => 'exportlistauthors',
313  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
314  'name' => 'listauthors',
315  'id' => 'listauthors',
316  ],
317  ];
318  }
319 
320  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
321  $htmlForm->setSubmitTextMsg( 'export-submit' );
322  $htmlForm->prepareForm()->displayForm( false );
323  $this->addHelpLink( 'Help:Export' );
324  }
325 
329  private function userCanOverrideExportDepth() {
330  return MediaWikiServices::getInstance()
331  ->getPermissionManager()
332  ->userHasRight( $this->getUser(), 'override-export-depth' );
333  }
334 
344  private function doExport( $page, $history, $list_authors, $exportall ) {
345  // If we are grabbing everything, enable full history and ignore the rest
346  if ( $exportall ) {
347  $history = WikiExporter::FULL;
348  } else {
349  $pageSet = []; // Inverted index of all pages to look up
350 
351  // Split up and normalize input
352  foreach ( explode( "\n", $page ) as $pageName ) {
353  $pageName = trim( $pageName );
354  $title = Title::newFromText( $pageName );
355  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
356  // Only record each page once!
357  $pageSet[$title->getPrefixedText()] = true;
358  }
359  }
360 
361  // Set of original pages to pass on to further manipulation...
362  $inputPages = array_keys( $pageSet );
363 
364  // Look up any linked pages if asked...
365  if ( $this->templates ) {
366  $pageSet = $this->getTemplates( $inputPages, $pageSet );
367  }
368  $linkDepth = $this->pageLinkDepth;
369  if ( $linkDepth ) {
370  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
371  }
372 
373  $pages = array_keys( $pageSet );
374 
375  // Normalize titles to the same format and remove dupes, see T19374
376  foreach ( $pages as $k => $v ) {
377  $pages[$k] = str_replace( " ", "_", $v );
378  }
379 
380  $pages = array_unique( $pages );
381  }
382 
383  /* Ok, let's get to it... */
384  $db = wfGetDB( DB_REPLICA );
385 
386  $exporter = new WikiExporter( $db, $history );
387  $exporter->list_authors = $list_authors;
388  $exporter->openStream();
389 
390  if ( $exportall ) {
391  $exporter->allPages();
392  } else {
393  $permissionManager = MediaWikiServices::getInstance()->getPermissionManager();
394 
395  foreach ( $pages as $page ) {
396  # T10824: Only export pages the user can read
397  $title = Title::newFromText( $page );
398  if ( is_null( $title ) ) {
399  // @todo Perhaps output an <error> tag or something.
400  continue;
401  }
402 
403  if ( !$permissionManager->userCan( 'read', $this->getUser(), $title ) ) {
404  // @todo Perhaps output an <error> tag or something.
405  continue;
406  }
407 
408  $exporter->pageByTitle( $title );
409  }
410  }
411 
412  $exporter->closeStream();
413  }
414 
419  private function getPagesFromCategory( $title ) {
420  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
421 
422  $name = $title->getDBkey();
423 
424  $dbr = wfGetDB( DB_REPLICA );
425  $res = $dbr->select(
426  [ 'page', 'categorylinks' ],
427  [ 'page_namespace', 'page_title' ],
428  [ 'cl_from=page_id', 'cl_to' => $name ],
429  __METHOD__,
430  [ 'LIMIT' => $maxPages ]
431  );
432 
433  $pages = [];
434 
435  foreach ( $res as $row ) {
436  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
437  }
438 
439  return $pages;
440  }
441 
446  private function getPagesFromNamespace( $nsindex ) {
447  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
448 
449  $dbr = wfGetDB( DB_REPLICA );
450  $res = $dbr->select(
451  'page',
452  [ 'page_namespace', 'page_title' ],
453  [ 'page_namespace' => $nsindex ],
454  __METHOD__,
455  [ 'LIMIT' => $maxPages ]
456  );
457 
458  $pages = [];
459 
460  foreach ( $res as $row ) {
461  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
462  }
463 
464  return $pages;
465  }
466 
473  private function getTemplates( $inputPages, $pageSet ) {
474  return $this->getLinks( $inputPages, $pageSet,
475  'templatelinks',
476  [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
477  [ 'page_id=tl_from' ]
478  );
479  }
480 
486  private function validateLinkDepth( $depth ) {
487  if ( $depth < 0 ) {
488  return 0;
489  }
490 
491  if ( !$this->userCanOverrideExportDepth() ) {
492  $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
493  if ( $depth > $maxLinkDepth ) {
494  return $maxLinkDepth;
495  }
496  }
497 
498  /*
499  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
500  * crazy-big export from being done by someone setting the depth
501  * number too high. In other words, last resort safety net.
502  */
503 
504  return intval( min( $depth, 5 ) );
505  }
506 
514  private function getPageLinks( $inputPages, $pageSet, $depth ) {
515  for ( ; $depth > 0; --$depth ) {
516  $pageSet = $this->getLinks(
517  $inputPages, $pageSet, 'pagelinks',
518  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
519  [ 'page_id=pl_from' ]
520  );
521  $inputPages = array_keys( $pageSet );
522  }
523 
524  return $pageSet;
525  }
526 
536  private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
537  $dbr = wfGetDB( DB_REPLICA );
538 
539  foreach ( $inputPages as $page ) {
540  $title = Title::newFromText( $page );
541 
542  if ( $title ) {
543  $pageSet[$title->getPrefixedText()] = true;
546  $result = $dbr->select(
547  [ 'page', $table ],
548  $fields,
549  array_merge(
550  $join,
551  [
552  'page_namespace' => $title->getNamespace(),
553  'page_title' => $title->getDBkey()
554  ]
555  ),
556  __METHOD__
557  );
558 
559  foreach ( $result as $row ) {
560  $template = Title::makeTitle( $row->namespace, $row->title );
561  $pageSet[$template->getPrefixedText()] = true;
562  }
563  }
564  }
565 
566  return $pageSet;
567  }
568 
569  protected function getGroupName() {
570  return 'pagetools';
571  }
572 }
getContext()
Gets the context this SpecialPage is executed in.
const NS_MAIN
Definition: Defines.php:60
A special page that allows users to export pages in a XML file.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
getOutput()
Get the OutputPage being used for this instance.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
static factory( $displayFormat,... $arguments)
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:307
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getPagesFromCategory( $title)
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getPagesFromNamespace( $nsindex)
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes! ...
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:612
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
validateLinkDepth( $depth)
Validate link depth setting, if available.
getUser()
Shortcut to get the User executing this instance.
getConfig()
Shortcut to get main config object.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
const DB_REPLICA
Definition: defines.php:25
getRequest()
Get the WebRequest being used for this instance.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:813
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:319