MediaWiki  1.34.0
SpecialExport.php
Go to the documentation of this file.
1 <?php
28 
34 class SpecialExport extends SpecialPage {
36 
37  public function __construct() {
38  parent::__construct( 'Export' );
39  }
40 
41  public function execute( $par ) {
42  $this->setHeaders();
43  $this->outputHeader();
44  $config = $this->getConfig();
45 
46  // Set some variables
47  $this->curonly = true;
48  $this->doExport = false;
49  $request = $this->getRequest();
50  $this->templates = $request->getCheck( 'templates' );
51  $this->pageLinkDepth = $this->validateLinkDepth(
52  $request->getIntOrNull( 'pagelink-depth' )
53  );
54  $nsindex = '';
55  $exportall = false;
56 
57  if ( $request->getCheck( 'addcat' ) ) {
58  $page = $request->getText( 'pages' );
59  $catname = $request->getText( 'catname' );
60 
61  if ( $catname !== '' && $catname !== null && $catname !== false ) {
62  $t = Title::makeTitleSafe( NS_MAIN, $catname );
63  if ( $t ) {
69  $catpages = $this->getPagesFromCategory( $t );
70  if ( $catpages ) {
71  if ( $page !== '' ) {
72  $page .= "\n";
73  }
74  $page .= implode( "\n", $catpages );
75  }
76  }
77  }
78  } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
79  $page = $request->getText( 'pages' );
80  $nsindex = $request->getText( 'nsindex', '' );
81 
82  if ( strval( $nsindex ) !== '' ) {
86  $nspages = $this->getPagesFromNamespace( $nsindex );
87  if ( $nspages ) {
88  $page .= "\n" . implode( "\n", $nspages );
89  }
90  }
91  } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
92  $this->doExport = true;
93  $exportall = true;
94 
95  /* Although $page and $history are not used later on, we
96  nevertheless set them to avoid that PHP notices about using
97  undefined variables foul up our XML output (see call to
98  doExport(...) further down) */
99  $page = '';
100  $history = '';
101  } elseif ( $request->wasPosted() && $par == '' ) {
102  // Log to see if certain parameters are actually used.
103  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
104  LoggerFactory::getInstance( 'export' )->debug(
105  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
106  'dir' => $request->getRawVal( 'dir' ),
107  'offset' => $request->getRawVal( 'offset' ),
108  'limit' => $request->getRawVal( 'limit' ),
109  ] );
110 
111  $page = $request->getText( 'pages' );
112  $this->curonly = $request->getCheck( 'curonly' );
113  $rawOffset = $request->getVal( 'offset' );
114 
115  if ( $rawOffset ) {
116  $offset = wfTimestamp( TS_MW, $rawOffset );
117  } else {
118  $offset = null;
119  }
120 
121  $maxHistory = $config->get( 'ExportMaxHistory' );
122  $limit = $request->getInt( 'limit' );
123  $dir = $request->getVal( 'dir' );
124  $history = [
125  'dir' => 'asc',
126  'offset' => false,
127  'limit' => $maxHistory,
128  ];
129  $historyCheck = $request->getCheck( 'history' );
130 
131  if ( $this->curonly ) {
132  $history = WikiExporter::CURRENT;
133  } elseif ( !$historyCheck ) {
134  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
135  $history['limit'] = $limit;
136  }
137 
138  if ( !is_null( $offset ) ) {
139  $history['offset'] = $offset;
140  }
141 
142  if ( strtolower( $dir ) == 'desc' ) {
143  $history['dir'] = 'desc';
144  }
145  }
146 
147  if ( $page != '' ) {
148  $this->doExport = true;
149  }
150  } else {
151  // Default to current-only for GET requests.
152  $page = $request->getText( 'pages', $par );
153  $historyCheck = $request->getCheck( 'history' );
154 
155  if ( $historyCheck ) {
156  $history = WikiExporter::FULL;
157  } else {
158  $history = WikiExporter::CURRENT;
159  }
160 
161  if ( $page != '' ) {
162  $this->doExport = true;
163  }
164  }
165 
166  if ( !$config->get( 'ExportAllowHistory' ) ) {
167  // Override
168  $history = WikiExporter::CURRENT;
169  }
170 
171  $list_authors = $request->getCheck( 'listauthors' );
172  if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
173  $list_authors = false;
174  }
175 
176  if ( $this->doExport ) {
177  $this->getOutput()->disable();
178 
179  // Cancel output buffering and gzipping if set
180  // This should provide safer streaming for pages with history
182  $request->response()->header( "Content-type: application/xml; charset=utf-8" );
183  $request->response()->header( "X-Robots-Tag: noindex,nofollow" );
184 
185  if ( $request->getCheck( 'wpDownload' ) ) {
186  // Provide a sane filename suggestion
187  $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
188  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
189  }
190 
191  $this->doExport( $page, $history, $list_authors, $exportall );
192 
193  return;
194  }
195 
196  $out = $this->getOutput();
197  $out->addWikiMsg( 'exporttext' );
198 
199  if ( $page == '' ) {
200  $categoryName = $request->getText( 'catname' );
201  } else {
202  $categoryName = '';
203  }
204 
205  $formDescriptor = [
206  'catname' => [
207  'type' => 'textwithbutton',
208  'name' => 'catname',
209  'horizontal-label' => true,
210  'label-message' => 'export-addcattext',
211  'default' => $categoryName,
212  'size' => 40,
213  'buttontype' => 'submit',
214  'buttonname' => 'addcat',
215  'buttondefault' => $this->msg( 'export-addcat' )->text(),
216  'hide-if' => [ '===', 'exportall', '1' ],
217  ],
218  ];
219  if ( $config->get( 'ExportFromNamespaces' ) ) {
220  $formDescriptor += [
221  'nsindex' => [
222  'type' => 'namespaceselectwithbutton',
223  'default' => $nsindex,
224  'label-message' => 'export-addnstext',
225  'horizontal-label' => true,
226  'name' => 'nsindex',
227  'id' => 'namespace',
228  'cssclass' => 'namespaceselector',
229  'buttontype' => 'submit',
230  'buttonname' => 'addns',
231  'buttondefault' => $this->msg( 'export-addns' )->text(),
232  'hide-if' => [ '===', 'exportall', '1' ],
233  ],
234  ];
235  }
236 
237  if ( $config->get( 'ExportAllowAll' ) ) {
238  $formDescriptor += [
239  'exportall' => [
240  'type' => 'check',
241  'label-message' => 'exportall',
242  'name' => 'exportall',
243  'id' => 'exportall',
244  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
245  ],
246  ];
247  }
248 
249  $formDescriptor += [
250  'textarea' => [
251  'class' => HTMLTextAreaField::class,
252  'name' => 'pages',
253  'label-message' => 'export-manual',
254  'nodata' => true,
255  'rows' => 10,
256  'default' => $page,
257  'hide-if' => [ '===', 'exportall', '1' ],
258  ],
259  ];
260 
261  if ( $config->get( 'ExportAllowHistory' ) ) {
262  $formDescriptor += [
263  'curonly' => [
264  'type' => 'check',
265  'label-message' => 'exportcuronly',
266  'name' => 'curonly',
267  'id' => 'curonly',
268  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
269  ],
270  ];
271  } else {
272  $out->addWikiMsg( 'exportnohistory' );
273  }
274 
275  $formDescriptor += [
276  'templates' => [
277  'type' => 'check',
278  'label-message' => 'export-templates',
279  'name' => 'templates',
280  'id' => 'wpExportTemplates',
281  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
282  ],
283  ];
284 
285  if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
286  $formDescriptor += [
287  'pagelink-depth' => [
288  'type' => 'text',
289  'name' => 'pagelink-depth',
290  'id' => 'pagelink-depth',
291  'label-message' => 'export-pagelinks',
292  'default' => '0',
293  'size' => 20,
294  ],
295  ];
296  }
297 
298  $formDescriptor += [
299  'wpDownload' => [
300  'type' => 'check',
301  'name' => 'wpDownload',
302  'id' => 'wpDownload',
303  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
304  'label-message' => 'export-download',
305  ],
306  ];
307 
308  if ( $config->get( 'ExportAllowListContributors' ) ) {
309  $formDescriptor += [
310  'listauthors' => [
311  'type' => 'check',
312  'label-message' => 'exportlistauthors',
313  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
314  'name' => 'listauthors',
315  'id' => 'listauthors',
316  ],
317  ];
318  }
319 
320  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
321  $htmlForm->setSubmitTextMsg( 'export-submit' );
322  $htmlForm->prepareForm()->displayForm( false );
323  $this->addHelpLink( 'Help:Export' );
324  }
325 
329  private function userCanOverrideExportDepth() {
330  return MediaWikiServices::getInstance()
331  ->getPermissionManager()
332  ->userHasRight( $this->getUser(), 'override-export-depth' );
333  }
334 
344  private function doExport( $page, $history, $list_authors, $exportall ) {
345  // If we are grabbing everything, enable full history and ignore the rest
346  if ( $exportall ) {
347  $history = WikiExporter::FULL;
348  } else {
349  $pageSet = []; // Inverted index of all pages to look up
350 
351  // Split up and normalize input
352  foreach ( explode( "\n", $page ) as $pageName ) {
353  $pageName = trim( $pageName );
354  $title = Title::newFromText( $pageName );
355  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
356  // Only record each page once!
357  $pageSet[$title->getPrefixedText()] = true;
358  }
359  }
360 
361  // Set of original pages to pass on to further manipulation...
362  $inputPages = array_keys( $pageSet );
363 
364  // Look up any linked pages if asked...
365  if ( $this->templates ) {
366  $pageSet = $this->getTemplates( $inputPages, $pageSet );
367  }
368  $linkDepth = $this->pageLinkDepth;
369  if ( $linkDepth ) {
370  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
371  }
372 
373  $pages = array_keys( $pageSet );
374 
375  // Normalize titles to the same format and remove dupes, see T19374
376  foreach ( $pages as $k => $v ) {
377  $pages[$k] = str_replace( " ", "_", $v );
378  }
379 
380  $pages = array_unique( $pages );
381  }
382 
383  /* Ok, let's get to it... */
384  $db = wfGetDB( DB_REPLICA );
385 
386  $exporter = new WikiExporter( $db, $history );
387  $exporter->list_authors = $list_authors;
388  $exporter->openStream();
389 
390  if ( $exportall ) {
391  $exporter->allPages();
392  } else {
393  $permissionManager = MediaWikiServices::getInstance()->getPermissionManager();
394 
395  foreach ( $pages as $page ) {
396  # T10824: Only export pages the user can read
397  $title = Title::newFromText( $page );
398  if ( is_null( $title ) ) {
399  // @todo Perhaps output an <error> tag or something.
400  continue;
401  }
402 
403  if ( !$permissionManager->userCan( 'read', $this->getUser(), $title ) ) {
404  // @todo Perhaps output an <error> tag or something.
405  continue;
406  }
407 
408  $exporter->pageByTitle( $title );
409  }
410  }
411 
412  $exporter->closeStream();
413  }
414 
419  private function getPagesFromCategory( $title ) {
420  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
421 
422  $name = $title->getDBkey();
423 
424  $dbr = wfGetDB( DB_REPLICA );
425  $res = $dbr->select(
426  [ 'page', 'categorylinks' ],
427  [ 'page_namespace', 'page_title' ],
428  [ 'cl_from=page_id', 'cl_to' => $name ],
429  __METHOD__,
430  [ 'LIMIT' => $maxPages ]
431  );
432 
433  $pages = [];
434 
435  foreach ( $res as $row ) {
436  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
437  }
438 
439  return $pages;
440  }
441 
446  private function getPagesFromNamespace( $nsindex ) {
447  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
448 
449  $dbr = wfGetDB( DB_REPLICA );
450  $res = $dbr->select(
451  'page',
452  [ 'page_namespace', 'page_title' ],
453  [ 'page_namespace' => $nsindex ],
454  __METHOD__,
455  [ 'LIMIT' => $maxPages ]
456  );
457 
458  $pages = [];
459 
460  foreach ( $res as $row ) {
461  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
462  }
463 
464  return $pages;
465  }
466 
473  private function getTemplates( $inputPages, $pageSet ) {
474  return $this->getLinks( $inputPages, $pageSet,
475  'templatelinks',
476  [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
477  [ 'page_id=tl_from' ]
478  );
479  }
480 
486  private function validateLinkDepth( $depth ) {
487  if ( $depth < 0 ) {
488  return 0;
489  }
490 
491  if ( !$this->userCanOverrideExportDepth() ) {
492  $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
493  if ( $depth > $maxLinkDepth ) {
494  return $maxLinkDepth;
495  }
496  }
497 
498  /*
499  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
500  * crazy-big export from being done by someone setting the depth
501  * number too high. In other words, last resort safety net.
502  */
503 
504  return intval( min( $depth, 5 ) );
505  }
506 
514  private function getPageLinks( $inputPages, $pageSet, $depth ) {
515  for ( ; $depth > 0; --$depth ) {
516  $pageSet = $this->getLinks(
517  $inputPages, $pageSet, 'pagelinks',
518  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
519  [ 'page_id=pl_from' ]
520  );
521  $inputPages = array_keys( $pageSet );
522  }
523 
524  return $pageSet;
525  }
526 
536  private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
537  $dbr = wfGetDB( DB_REPLICA );
538 
539  foreach ( $inputPages as $page ) {
540  $title = Title::newFromText( $page );
541 
542  if ( $title ) {
543  $pageSet[$title->getPrefixedText()] = true;
546  $result = $dbr->select(
547  [ 'page', $table ],
548  $fields,
549  array_merge(
550  $join,
551  [
552  'page_namespace' => $title->getNamespace(),
553  'page_title' => $title->getDBkey()
554  ]
555  ),
556  __METHOD__
557  );
558 
559  foreach ( $result as $row ) {
560  $template = Title::makeTitle( $row->namespace, $row->title );
561  $pageSet[$template->getPrefixedText()] = true;
562  }
563  }
564  }
565 
566  return $pageSet;
567  }
568 
569  protected function getGroupName() {
570  return 'pagetools';
571  }
572 }
SpecialPage\msg
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
Definition: SpecialPage.php:792
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:316
SpecialExport\getLinks
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
Definition: SpecialExport.php:536
wfResetOutputBuffers
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Definition: GlobalFunctions.php:1696
Title\makeName
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:814
SpecialExport
A special page that allows users to export pages in a XML file.
Definition: SpecialExport.php:34
SpecialPage\getOutput
getOutput()
Get the OutputPage being used for this instance.
Definition: SpecialPage.php:719
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
SpecialExport\execute
execute( $par)
Default execute method Checks user permissions.
Definition: SpecialExport.php:41
WikiExporter\CURRENT
const CURRENT
Definition: WikiExporter.php:52
SpecialExport\getTemplates
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
Definition: SpecialExport.php:473
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1869
SpecialExport\doExport
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
Definition: SpecialExport.php:344
SpecialExport\$curonly
$curonly
Definition: SpecialExport.php:35
SpecialExport\$templates
$templates
Definition: SpecialExport.php:35
$res
$res
Definition: testCompression.php:52
$dbr
$dbr
Definition: testCompression.php:50
SpecialExport\getGroupName
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
Definition: SpecialExport.php:569
NS_MAIN
const NS_MAIN
Definition: Defines.php:60
SpecialPage\addHelpLink
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
Definition: SpecialPage.php:828
SpecialExport\getPagesFromCategory
getPagesFromCategory( $title)
Definition: SpecialExport.php:419
SpecialExport\$doExport
$doExport
Definition: SpecialExport.php:35
SpecialPage\getConfig
getConfig()
Shortcut to get main config object.
Definition: SpecialPage.php:758
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2575
SpecialExport\getPageLinks
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
Definition: SpecialExport.php:514
$t
$t
Definition: make-normalization-table.php:143
$title
$title
Definition: testCompression.php:34
SpecialPage\setHeaders
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
Definition: SpecialPage.php:537
SpecialPage\getUser
getUser()
Shortcut to get the User executing this instance.
Definition: SpecialPage.php:729
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1898
WikiExporter
Definition: WikiExporter.php:38
SpecialPage\getContext
getContext()
Gets the context this SpecialPage is executed in.
Definition: SpecialPage.php:692
SpecialExport\$pageLinkDepth
$pageLinkDepth
Definition: SpecialExport.php:35
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:613
SpecialPage
Parent class for all special pages.
Definition: SpecialPage.php:37
SpecialPage\getRequest
getRequest()
Get the WebRequest being used for this instance.
Definition: SpecialPage.php:709
WikiExporter\FULL
const FULL
Definition: WikiExporter.php:51
SpecialExport\validateLinkDepth
validateLinkDepth( $depth)
Validate link depth setting, if available.
Definition: SpecialExport.php:486
SpecialExport\getPagesFromNamespace
getPagesFromNamespace( $nsindex)
Definition: SpecialExport.php:446
HTMLForm\factory
static factory( $displayFormat,... $arguments)
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:303
SpecialPage\outputHeader
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
Definition: SpecialPage.php:639
SpecialExport\userCanOverrideExportDepth
userCanOverrideExportDepth()
Definition: SpecialExport.php:329
SpecialExport\__construct
__construct()
Definition: SpecialExport.php:37