MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
28 
34 class SpecialExport extends SpecialPage {
36 
38  private $loadBalancer;
39 
43  public function __construct(
45  ) {
46  parent::__construct( 'Export' );
47  $this->loadBalancer = $loadBalancer;
48  }
49 
50  public function execute( $par ) {
51  $this->setHeaders();
52  $this->outputHeader();
53  $config = $this->getConfig();
54 
55  // Set some variables
56  $this->curonly = true;
57  $this->doExport = false;
58  $request = $this->getRequest();
59  $this->templates = $request->getCheck( 'templates' );
60  $this->pageLinkDepth = $this->validateLinkDepth(
61  $request->getIntOrNull( 'pagelink-depth' )
62  );
63  $nsindex = '';
64  $exportall = false;
65 
66  if ( $request->getCheck( 'addcat' ) ) {
67  $page = $request->getText( 'pages' );
68  $catname = $request->getText( 'catname' );
69 
70  if ( $catname !== '' && $catname !== null && $catname !== false ) {
71  $t = Title::makeTitleSafe( NS_MAIN, $catname );
72  if ( $t ) {
78  $catpages = $this->getPagesFromCategory( $t );
79  if ( $catpages ) {
80  if ( $page !== '' ) {
81  $page .= "\n";
82  }
83  $page .= implode( "\n", $catpages );
84  }
85  }
86  }
87  } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
88  $page = $request->getText( 'pages' );
89  $nsindex = $request->getText( 'nsindex', '' );
90 
91  if ( strval( $nsindex ) !== '' ) {
95  $nspages = $this->getPagesFromNamespace( $nsindex );
96  if ( $nspages ) {
97  $page .= "\n" . implode( "\n", $nspages );
98  }
99  }
100  } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
101  $this->doExport = true;
102  $exportall = true;
103 
104  /* Although $page and $history are not used later on, we
105  nevertheless set them to avoid that PHP notices about using
106  undefined variables foul up our XML output (see call to
107  doExport(...) further down) */
108  $page = '';
109  $history = '';
110  } elseif ( $request->wasPosted() && $par == '' ) {
111  // Log to see if certain parameters are actually used.
112  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
113  LoggerFactory::getInstance( 'export' )->debug(
114  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
115  'dir' => $request->getRawVal( 'dir' ),
116  'offset' => $request->getRawVal( 'offset' ),
117  'limit' => $request->getRawVal( 'limit' ),
118  ] );
119 
120  $page = $request->getText( 'pages' );
121  $this->curonly = $request->getCheck( 'curonly' );
122  $rawOffset = $request->getVal( 'offset' );
123 
124  if ( $rawOffset ) {
125  $offset = wfTimestamp( TS_MW, $rawOffset );
126  } else {
127  $offset = null;
128  }
129 
130  $maxHistory = $config->get( 'ExportMaxHistory' );
131  $limit = $request->getInt( 'limit' );
132  $dir = $request->getVal( 'dir' );
133  $history = [
134  'dir' => 'asc',
135  'offset' => false,
136  'limit' => $maxHistory,
137  ];
138  $historyCheck = $request->getCheck( 'history' );
139 
140  if ( $this->curonly ) {
141  $history = WikiExporter::CURRENT;
142  } elseif ( !$historyCheck ) {
143  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
144  $history['limit'] = $limit;
145  }
146 
147  if ( $offset !== null ) {
148  $history['offset'] = $offset;
149  }
150 
151  if ( strtolower( $dir ) == 'desc' ) {
152  $history['dir'] = 'desc';
153  }
154  }
155 
156  if ( $page != '' ) {
157  $this->doExport = true;
158  }
159  } else {
160  // Default to current-only for GET requests.
161  $page = $request->getText( 'pages', $par );
162  $historyCheck = $request->getCheck( 'history' );
163 
164  if ( $historyCheck ) {
165  $history = WikiExporter::FULL;
166  } else {
167  $history = WikiExporter::CURRENT;
168  }
169 
170  if ( $page != '' ) {
171  $this->doExport = true;
172  }
173  }
174 
175  if ( !$config->get( 'ExportAllowHistory' ) ) {
176  // Override
177  $history = WikiExporter::CURRENT;
178  }
179 
180  $list_authors = $request->getCheck( 'listauthors' );
181  if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
182  $list_authors = false;
183  }
184 
185  if ( $this->doExport ) {
186  $this->getOutput()->disable();
187 
188  // Cancel output buffering and gzipping if set
189  // This should provide safer streaming for pages with history
191  $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
192  $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
193 
194  if ( $request->getCheck( 'wpDownload' ) ) {
195  // Provide a sane filename suggestion
196  $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
197  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
198  }
199 
200  $this->doExport( $page, $history, $list_authors, $exportall );
201 
202  return;
203  }
204 
205  $out = $this->getOutput();
206  $out->addWikiMsg( 'exporttext' );
207 
208  if ( $page == '' ) {
209  $categoryName = $request->getText( 'catname' );
210  } else {
211  $categoryName = '';
212  }
213 
214  $formDescriptor = [
215  'catname' => [
216  'type' => 'textwithbutton',
217  'name' => 'catname',
218  'horizontal-label' => true,
219  'label-message' => 'export-addcattext',
220  'default' => $categoryName,
221  'size' => 40,
222  'buttontype' => 'submit',
223  'buttonname' => 'addcat',
224  'buttondefault' => $this->msg( 'export-addcat' )->text(),
225  'hide-if' => [ '===', 'exportall', '1' ],
226  ],
227  ];
228  if ( $config->get( 'ExportFromNamespaces' ) ) {
229  $formDescriptor += [
230  'nsindex' => [
231  'type' => 'namespaceselectwithbutton',
232  'default' => $nsindex,
233  'label-message' => 'export-addnstext',
234  'horizontal-label' => true,
235  'name' => 'nsindex',
236  'id' => 'namespace',
237  'cssclass' => 'namespaceselector',
238  'buttontype' => 'submit',
239  'buttonname' => 'addns',
240  'buttondefault' => $this->msg( 'export-addns' )->text(),
241  'hide-if' => [ '===', 'exportall', '1' ],
242  ],
243  ];
244  }
245 
246  if ( $config->get( 'ExportAllowAll' ) ) {
247  $formDescriptor += [
248  'exportall' => [
249  'type' => 'check',
250  'label-message' => 'exportall',
251  'name' => 'exportall',
252  'id' => 'exportall',
253  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
254  ],
255  ];
256  }
257 
258  $formDescriptor += [
259  'textarea' => [
260  'class' => HTMLTextAreaField::class,
261  'name' => 'pages',
262  'label-message' => 'export-manual',
263  'nodata' => true,
264  'rows' => 10,
265  'default' => $page,
266  'hide-if' => [ '===', 'exportall', '1' ],
267  ],
268  ];
269 
270  if ( $config->get( 'ExportAllowHistory' ) ) {
271  $formDescriptor += [
272  'curonly' => [
273  'type' => 'check',
274  'label-message' => 'exportcuronly',
275  'name' => 'curonly',
276  'id' => 'curonly',
277  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
278  ],
279  ];
280  } else {
281  $out->addWikiMsg( 'exportnohistory' );
282  }
283 
284  $formDescriptor += [
285  'templates' => [
286  'type' => 'check',
287  'label-message' => 'export-templates',
288  'name' => 'templates',
289  'id' => 'wpExportTemplates',
290  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
291  ],
292  ];
293 
294  if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
295  $formDescriptor += [
296  'pagelink-depth' => [
297  'type' => 'text',
298  'name' => 'pagelink-depth',
299  'id' => 'pagelink-depth',
300  'label-message' => 'export-pagelinks',
301  'default' => '0',
302  'size' => 20,
303  ],
304  ];
305  }
306 
307  $formDescriptor += [
308  'wpDownload' => [
309  'type' => 'check',
310  'name' => 'wpDownload',
311  'id' => 'wpDownload',
312  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
313  'label-message' => 'export-download',
314  ],
315  ];
316 
317  if ( $config->get( 'ExportAllowListContributors' ) ) {
318  $formDescriptor += [
319  'listauthors' => [
320  'type' => 'check',
321  'label-message' => 'exportlistauthors',
322  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
323  'name' => 'listauthors',
324  'id' => 'listauthors',
325  ],
326  ];
327  }
328 
329  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
330  $htmlForm->setSubmitTextMsg( 'export-submit' );
331  $htmlForm->prepareForm()->displayForm( false );
332  $this->addHelpLink( 'Help:Export' );
333  }
334 
338  protected function userCanOverrideExportDepth() {
339  return $this->getAuthority()->isAllowed( 'override-export-depth' );
340  }
341 
351  protected function doExport( $page, $history, $list_authors, $exportall ) {
352  // If we are grabbing everything, enable full history and ignore the rest
353  if ( $exportall ) {
354  $history = WikiExporter::FULL;
355  } else {
356  $pageSet = []; // Inverted index of all pages to look up
357 
358  // Split up and normalize input
359  foreach ( explode( "\n", $page ) as $pageName ) {
360  $pageName = trim( $pageName );
361  $title = Title::newFromText( $pageName );
362  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
363  // Only record each page once!
364  $pageSet[$title->getPrefixedText()] = true;
365  }
366  }
367 
368  // Set of original pages to pass on to further manipulation...
369  $inputPages = array_keys( $pageSet );
370 
371  // Look up any linked pages if asked...
372  if ( $this->templates ) {
373  $pageSet = $this->getTemplates( $inputPages, $pageSet );
374  }
375  $linkDepth = $this->pageLinkDepth;
376  if ( $linkDepth ) {
377  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
378  }
379 
380  $pages = array_keys( $pageSet );
381 
382  // Normalize titles to the same format and remove dupes, see T19374
383  foreach ( $pages as $k => $v ) {
384  $pages[$k] = str_replace( ' ', '_', $v );
385  }
386 
387  $pages = array_unique( $pages );
388  }
389 
390  /* Ok, let's get to it... */
391  $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
392 
393  $exporter = new WikiExporter( $db, $history );
394  $exporter->list_authors = $list_authors;
395  $exporter->openStream();
396 
397  if ( $exportall ) {
398  $exporter->allPages();
399  } else {
400  foreach ( $pages as $page ) {
401  # T10824: Only export pages the user can read
402  $title = Title::newFromText( $page );
403  if ( $title === null ) {
404  // @todo Perhaps output an <error> tag or something.
405  continue;
406  }
407 
408  if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
409  // @todo Perhaps output an <error> tag or something.
410  continue;
411  }
412 
413  $exporter->pageByTitle( $title );
414  }
415  }
416 
417  $exporter->closeStream();
418  }
419 
424  protected function getPagesFromCategory( $title ) {
425  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
426 
427  $name = $title->getDBkey();
428 
429  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
430  $res = $dbr->select(
431  [ 'page', 'categorylinks' ],
432  [ 'page_namespace', 'page_title' ],
433  [ 'cl_from=page_id', 'cl_to' => $name ],
434  __METHOD__,
435  [ 'LIMIT' => $maxPages ]
436  );
437 
438  $pages = [];
439 
440  foreach ( $res as $row ) {
441  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
442  }
443 
444  return $pages;
445  }
446 
451  protected function getPagesFromNamespace( $nsindex ) {
452  $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
453 
454  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
455  $res = $dbr->select(
456  'page',
457  [ 'page_namespace', 'page_title' ],
458  [ 'page_namespace' => $nsindex ],
459  __METHOD__,
460  [ 'LIMIT' => $maxPages ]
461  );
462 
463  $pages = [];
464 
465  foreach ( $res as $row ) {
466  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
467  }
468 
469  return $pages;
470  }
471 
478  protected function getTemplates( $inputPages, $pageSet ) {
479  return $this->getLinks( $inputPages, $pageSet,
480  'templatelinks',
481  [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
482  [ 'page_id=tl_from' ]
483  );
484  }
485 
491  protected function validateLinkDepth( $depth ) {
492  if ( $depth < 0 ) {
493  return 0;
494  }
495 
496  if ( !$this->userCanOverrideExportDepth() ) {
497  $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
498  if ( $depth > $maxLinkDepth ) {
499  return $maxLinkDepth;
500  }
501  }
502 
503  /*
504  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
505  * crazy-big export from being done by someone setting the depth
506  * number too high. In other words, last resort safety net.
507  */
508 
509  return intval( min( $depth, 5 ) );
510  }
511 
519  protected function getPageLinks( $inputPages, $pageSet, $depth ) {
520  for ( ; $depth > 0; --$depth ) {
521  $pageSet = $this->getLinks(
522  $inputPages, $pageSet, 'pagelinks',
523  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
524  [ 'page_id=pl_from' ]
525  );
526  $inputPages = array_keys( $pageSet );
527  }
528 
529  return $pageSet;
530  }
531 
541  protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
542  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
543 
544  foreach ( $inputPages as $page ) {
545  $title = Title::newFromText( $page );
546 
547  if ( $title ) {
548  $pageSet[$title->getPrefixedText()] = true;
551  $result = $dbr->select(
552  [ 'page', $table ],
553  $fields,
554  array_merge(
555  $join,
556  [
557  'page_namespace' => $title->getNamespace(),
558  'page_title' => $title->getDBkey()
559  ]
560  ),
561  __METHOD__
562  );
563 
564  foreach ( $result as $row ) {
565  $template = Title::makeTitle( $row->namespace, $row->title );
566  $pageSet[$template->getPrefixedText()] = true;
567  }
568  }
569  }
570 
571  return $pageSet;
572  }
573 
574  protected function getGroupName() {
575  return 'pagetools';
576  }
577 }
SpecialPage\msg
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
Definition: SpecialPage.php:911
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:363
SpecialExport\getLinks
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
Definition: SpecialExport.php:541
wfResetOutputBuffers
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Definition: GlobalFunctions.php:1653
Title\makeName
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:856
SpecialExport\__construct
__construct(ILoadBalancer $loadBalancer)
Definition: SpecialExport.php:43
SpecialExport
A special page that allows users to export pages in a XML file.
Definition: SpecialExport.php:34
SpecialPage\getOutput
getOutput()
Get the OutputPage being used for this instance.
Definition: SpecialPage.php:789
SpecialExport\execute
execute( $par)
Default execute method Checks user permissions.
Definition: SpecialExport.php:50
SpecialExport\$loadBalancer
ILoadBalancer $loadBalancer
Definition: SpecialExport.php:38
WikiExporter\CURRENT
const CURRENT
Definition: WikiExporter.php:54
SpecialExport\getTemplates
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
Definition: SpecialExport.php:478
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1832
SpecialExport\doExport
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
Definition: SpecialExport.php:351
SpecialPage\getAuthority
getAuthority()
Shortcut to get the Authority executing this instance.
Definition: SpecialPage.php:809
SpecialExport\$curonly
$curonly
Definition: SpecialExport.php:35
SpecialExport\$templates
$templates
Definition: SpecialExport.php:35
$res
$res
Definition: testCompression.php:57
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
$dbr
$dbr
Definition: testCompression.php:54
SpecialExport\getGroupName
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
Definition: SpecialExport.php:574
SpecialPage\addHelpLink
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
Definition: SpecialPage.php:947
SpecialExport\getPagesFromCategory
getPagesFromCategory( $title)
Definition: SpecialExport.php:424
SpecialExport\$doExport
$doExport
Definition: SpecialExport.php:35
SpecialPage\getConfig
getConfig()
Shortcut to get main config object.
Definition: SpecialPage.php:877
MediaWiki\Logger\LoggerFactory
PSR-3 logger instance factory.
Definition: LoggerFactory.php:45
SpecialExport\getPageLinks
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
Definition: SpecialExport.php:519
$title
$title
Definition: testCompression.php:38
SpecialPage\setHeaders
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
Definition: SpecialPage.php:617
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:626
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:1861
WikiExporter
Definition: WikiExporter.php:40
SpecialPage\getContext
getContext()
Gets the context this SpecialPage is executed in.
Definition: SpecialPage.php:763
SpecialExport\$pageLinkDepth
$pageLinkDepth
Definition: SpecialExport.php:35
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:652
SpecialPage
Parent class for all special pages.
Definition: SpecialPage.php:43
SpecialPage\getRequest
getRequest()
Get the WebRequest being used for this instance.
Definition: SpecialPage.php:779
WikiExporter\FULL
const FULL
Definition: WikiExporter.php:53
SpecialExport\validateLinkDepth
validateLinkDepth( $depth)
Validate link depth setting, if available.
Definition: SpecialExport.php:491
SpecialExport\getPagesFromNamespace
getPagesFromNamespace( $nsindex)
Definition: SpecialExport.php:451
$t
$t
Definition: testCompression.php:74
HTMLForm\factory
static factory( $displayFormat,... $arguments)
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:322
SpecialPage\outputHeader
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
Definition: SpecialPage.php:708
SpecialExport\userCanOverrideExportDepth
userCanOverrideExportDepth()
Definition: SpecialExport.php:338
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81