MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
31 
37 class SpecialExport extends SpecialPage {
39 
41  private $loadBalancer;
42 
44  private $wikiExporterFactory;
45 
47  private $titleFormatter;
48 
50  private $linksMigration;
51 
58  public function __construct(
59  ILoadBalancer $loadBalancer,
60  WikiExporterFactory $wikiExporterFactory,
61  TitleFormatter $titleFormatter,
62  LinksMigration $linksMigration
63  ) {
64  parent::__construct( 'Export' );
65  $this->loadBalancer = $loadBalancer;
66  $this->wikiExporterFactory = $wikiExporterFactory;
67  $this->titleFormatter = $titleFormatter;
68  $this->linksMigration = $linksMigration;
69  }
70 
71  public function execute( $par ) {
72  $this->setHeaders();
73  $this->outputHeader();
74  $config = $this->getConfig();
75 
76  // Set some variables
77  $this->curonly = true;
78  $this->doExport = false;
79  $request = $this->getRequest();
80  $this->templates = $request->getCheck( 'templates' );
81  $this->pageLinkDepth = $this->validateLinkDepth(
82  $request->getIntOrNull( 'pagelink-depth' )
83  );
84  $nsindex = '';
85  $exportall = false;
86 
87  if ( $request->getCheck( 'addcat' ) ) {
88  $page = $request->getText( 'pages' );
89  $catname = $request->getText( 'catname' );
90 
91  if ( $catname !== '' && $catname !== null && $catname !== false ) {
92  $t = Title::makeTitleSafe( NS_MAIN, $catname );
93  if ( $t ) {
99  $catpages = $this->getPagesFromCategory( $t );
100  if ( $catpages ) {
101  if ( $page !== '' ) {
102  $page .= "\n";
103  }
104  $page .= implode( "\n", $catpages );
105  }
106  }
107  }
108  } elseif ( $request->getCheck( 'addns' ) &&
109  $config->get( MainConfigNames::ExportFromNamespaces ) ) {
110  $page = $request->getText( 'pages' );
111  $nsindex = $request->getText( 'nsindex', '' );
112 
113  if ( strval( $nsindex ) !== '' ) {
117  $nspages = $this->getPagesFromNamespace( (int)$nsindex );
118  if ( $nspages ) {
119  $page .= "\n" . implode( "\n", $nspages );
120  }
121  }
122  } elseif ( $request->getCheck( 'exportall' ) &&
123  $config->get( MainConfigNames::ExportAllowAll ) ) {
124  $this->doExport = true;
125  $exportall = true;
126 
127  /* Although $page and $history are not used later on, we
128  nevertheless set them to avoid that PHP notices about using
129  undefined variables foul up our XML output (see call to
130  doExport(...) further down) */
131  $page = '';
132  $history = '';
133  } elseif ( $request->wasPosted() && $par == '' ) {
134  // Log to see if certain parameters are actually used.
135  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
136  LoggerFactory::getInstance( 'export' )->debug(
137  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
138  'dir' => $request->getRawVal( 'dir' ),
139  'offset' => $request->getRawVal( 'offset' ),
140  'limit' => $request->getRawVal( 'limit' ),
141  ] );
142 
143  $page = $request->getText( 'pages' );
144  $this->curonly = $request->getCheck( 'curonly' );
145  $rawOffset = $request->getVal( 'offset' );
146 
147  if ( $rawOffset ) {
148  $offset = wfTimestamp( TS_MW, $rawOffset );
149  } else {
150  $offset = null;
151  }
152 
153  $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
154  $limit = $request->getInt( 'limit' );
155  $dir = $request->getVal( 'dir' );
156  $history = [
157  'dir' => 'asc',
158  'offset' => false,
159  'limit' => $maxHistory,
160  ];
161  $historyCheck = $request->getCheck( 'history' );
162 
163  if ( $this->curonly ) {
164  $history = WikiExporter::CURRENT;
165  } elseif ( !$historyCheck ) {
166  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
167  $history['limit'] = $limit;
168  }
169 
170  if ( $offset !== null ) {
171  $history['offset'] = $offset;
172  }
173 
174  if ( strtolower( $dir ) == 'desc' ) {
175  $history['dir'] = 'desc';
176  }
177  }
178 
179  if ( $page != '' ) {
180  $this->doExport = true;
181  }
182  } else {
183  // Default to current-only for GET requests.
184  $page = $request->getText( 'pages', $par ?? '' );
185  $historyCheck = $request->getCheck( 'history' );
186 
187  if ( $historyCheck ) {
188  $history = WikiExporter::FULL;
189  } else {
190  $history = WikiExporter::CURRENT;
191  }
192 
193  if ( $page != '' ) {
194  $this->doExport = true;
195  }
196  }
197 
198  if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
199  // Override
200  $history = WikiExporter::CURRENT;
201  }
202 
203  $list_authors = $request->getCheck( 'listauthors' );
204  if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
205  $list_authors = false;
206  }
207 
208  if ( $this->doExport ) {
209  $this->getOutput()->disable();
210 
211  // Cancel output buffering and gzipping if set
212  // This should provide safer streaming for pages with history
214  $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
215  $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
216 
217  if ( $request->getCheck( 'wpDownload' ) ) {
218  // Provide a sensible filename suggestion
219  $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
220  wfTimestampNow() . '.xml' );
221  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
222  }
223 
224  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
225  // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
226  $this->doExport( $page, $history, $list_authors, $exportall );
227 
228  return;
229  }
230 
231  $out = $this->getOutput();
232  $out->addWikiMsg( 'exporttext' );
233 
234  if ( $page == '' ) {
235  $categoryName = $request->getText( 'catname' );
236  } else {
237  $categoryName = '';
238  }
239  $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
240  $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
241 
242  $formDescriptor = [
243  'catname' => [
244  'type' => 'textwithbutton',
245  'name' => 'catname',
246  'horizontal-label' => true,
247  'label-message' => 'export-addcattext',
248  'default' => $categoryName,
249  'size' => 40,
250  'buttontype' => 'submit',
251  'buttonname' => 'addcat',
252  'buttondefault' => $this->msg( 'export-addcat' )->text(),
253  ] + $hideIf,
254  ];
255  if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
256  $formDescriptor += [
257  'nsindex' => [
258  'type' => 'namespaceselectwithbutton',
259  'default' => $nsindex,
260  'label-message' => 'export-addnstext',
261  'horizontal-label' => true,
262  'name' => 'nsindex',
263  'id' => 'namespace',
264  'cssclass' => 'namespaceselector',
265  'buttontype' => 'submit',
266  'buttonname' => 'addns',
267  'buttondefault' => $this->msg( 'export-addns' )->text(),
268  ] + $hideIf,
269  ];
270  }
271 
272  if ( $canExportAll ) {
273  $formDescriptor += [
274  'exportall' => [
275  'type' => 'check',
276  'label-message' => 'exportall',
277  'name' => 'exportall',
278  'id' => 'exportall',
279  'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
280  ],
281  ];
282  }
283 
284  $formDescriptor += [
285  'textarea' => [
286  'class' => HTMLTextAreaField::class,
287  'name' => 'pages',
288  'label-message' => 'export-manual',
289  'nodata' => true,
290  'rows' => 10,
291  'default' => $page,
292  ] + $hideIf,
293  ];
294 
295  if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
296  $formDescriptor += [
297  'curonly' => [
298  'type' => 'check',
299  'label-message' => 'exportcuronly',
300  'name' => 'curonly',
301  'id' => 'curonly',
302  'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
303  ],
304  ];
305  } else {
306  $out->addWikiMsg( 'exportnohistory' );
307  }
308 
309  $formDescriptor += [
310  'templates' => [
311  'type' => 'check',
312  'label-message' => 'export-templates',
313  'name' => 'templates',
314  'id' => 'wpExportTemplates',
315  'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
316  ],
317  ];
318 
319  if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) ||
320  $this->userCanOverrideExportDepth() ) {
321  $formDescriptor += [
322  'pagelink-depth' => [
323  'type' => 'text',
324  'name' => 'pagelink-depth',
325  'id' => 'pagelink-depth',
326  'label-message' => 'export-pagelinks',
327  'default' => '0',
328  'size' => 20,
329  ],
330  ];
331  }
332 
333  $formDescriptor += [
334  'wpDownload' => [
335  'type' => 'check',
336  'name' => 'wpDownload',
337  'id' => 'wpDownload',
338  'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
339  'label-message' => 'export-download',
340  ],
341  ];
342 
343  if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
344  $formDescriptor += [
345  'listauthors' => [
346  'type' => 'check',
347  'label-message' => 'exportlistauthors',
348  'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
349  'name' => 'listauthors',
350  'id' => 'listauthors',
351  ],
352  ];
353  }
354 
355  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
356  $htmlForm->setSubmitTextMsg( 'export-submit' );
357  $htmlForm->prepareForm()->displayForm( false );
358  $this->addHelpLink( 'Help:Export' );
359  }
360 
364  protected function userCanOverrideExportDepth() {
365  return $this->getAuthority()->isAllowed( 'override-export-depth' );
366  }
367 
377  protected function doExport( $page, $history, $list_authors, $exportall ) {
378  // If we are grabbing everything, enable full history and ignore the rest
379  if ( $exportall ) {
380  $history = WikiExporter::FULL;
381  } else {
382  $pageSet = []; // Inverted index of all pages to look up
383 
384  // Split up and normalize input
385  foreach ( explode( "\n", $page ) as $pageName ) {
386  $pageName = trim( $pageName );
387  $title = Title::newFromText( $pageName );
388  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
389  // Only record each page once!
390  $pageSet[$title->getPrefixedText()] = true;
391  }
392  }
393 
394  // Set of original pages to pass on to further manipulation...
395  $inputPages = array_keys( $pageSet );
396 
397  // Look up any linked pages if asked...
398  if ( $this->templates ) {
399  $pageSet = $this->getTemplates( $inputPages, $pageSet );
400  }
401  $pageSet = $this->getExtraPages( $inputPages, $pageSet );
402  $linkDepth = $this->pageLinkDepth;
403  if ( $linkDepth ) {
404  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
405  }
406 
407  $pages = array_keys( $pageSet );
408 
409  // Normalize titles to the same format and remove dupes, see T19374
410  foreach ( $pages as $k => $v ) {
411  $pages[$k] = str_replace( ' ', '_', $v );
412  }
413 
414  $pages = array_unique( $pages );
415  }
416 
417  /* Ok, let's get to it... */
418  $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
419 
420  $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
421  $exporter->list_authors = $list_authors;
422  $exporter->openStream();
423 
424  if ( $exportall ) {
425  $exporter->allPages();
426  } else {
427  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
428  foreach ( $pages as $page ) {
429  # T10824: Only export pages the user can read
430  $title = Title::newFromText( $page );
431  if ( $title === null ) {
432  // @todo Perhaps output an <error> tag or something.
433  continue;
434  }
435 
436  if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
437  // @todo Perhaps output an <error> tag or something.
438  continue;
439  }
440 
441  $exporter->pageByTitle( $title );
442  }
443  }
444 
445  $exporter->closeStream();
446  }
447 
452  protected function getPagesFromCategory( $title ) {
453  $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
454 
455  $name = $title->getDBkey();
456 
457  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
458  $res = $dbr->select(
459  [ 'page', 'categorylinks' ],
460  [ 'page_namespace', 'page_title' ],
461  [ 'cl_from=page_id', 'cl_to' => $name ],
462  __METHOD__,
463  [ 'LIMIT' => $maxPages ]
464  );
465 
466  $pages = [];
467 
468  foreach ( $res as $row ) {
469  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470  }
471 
472  return $pages;
473  }
474 
479  protected function getPagesFromNamespace( $nsindex ) {
480  $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481 
482  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
483  $res = $dbr->select(
484  'page',
485  [ 'page_namespace', 'page_title' ],
486  [ 'page_namespace' => $nsindex ],
487  __METHOD__,
488  [ 'LIMIT' => $maxPages ]
489  );
490 
491  $pages = [];
492 
493  foreach ( $res as $row ) {
494  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
495  }
496 
497  return $pages;
498  }
499 
506  protected function getTemplates( $inputPages, $pageSet ) {
507  list( $nsField, $titleField ) = $this->linksMigration->getTitleFields( 'templatelinks' );
508  $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
509  return $this->getLinks( $inputPages, $pageSet,
510  $queryInfo['tables'],
511  [ 'namespace' => $nsField, 'title' => $titleField ],
512  array_merge(
513  [ 'templatelinks' => [ 'JOIN', [ 'page_id=tl_from' ] ] ],
514  $queryInfo['joins']
515  )
516  );
517  }
518 
525  private function getExtraPages( $inputPages, $pageSet ) {
526  $extraPages = [];
527  $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
528  foreach ( $extraPages as $extraPage ) {
529  $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
530  }
531  return $pageSet;
532  }
533 
539  protected function validateLinkDepth( $depth ) {
540  if ( $depth < 0 ) {
541  return 0;
542  }
543 
544  if ( !$this->userCanOverrideExportDepth() ) {
545  $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
546  if ( $depth > $maxLinkDepth ) {
547  return $maxLinkDepth;
548  }
549  }
550 
551  /*
552  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
553  * crazy-big export from being done by someone setting the depth
554  * number too high. In other words, last resort safety net.
555  */
556 
557  return intval( min( $depth, 5 ) );
558  }
559 
567  protected function getPageLinks( $inputPages, $pageSet, $depth ) {
568  for ( ; $depth > 0; --$depth ) {
569  $pageSet = $this->getLinks(
570  $inputPages, $pageSet, [ 'pagelinks' ],
571  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
572  [ 'pagelinks' => [ 'JOIN', [ 'page_id=pl_from' ] ] ]
573  );
574  $inputPages = array_keys( $pageSet );
575  }
576 
577  return $pageSet;
578  }
579 
589  protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
590  $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
591  $table[] = 'page';
592 
593  foreach ( $inputPages as $page ) {
594  $title = Title::newFromText( $page );
595  if ( $title ) {
596  $pageSet[$title->getPrefixedText()] = true;
599  $result = $dbr->select(
600  $table,
601  $fields,
602  [
603  'page_namespace' => $title->getNamespace(),
604  'page_title' => $title->getDBkey()
605  ],
606  __METHOD__,
607  [],
608  $join
609  );
610 
611  foreach ( $result as $row ) {
612  $template = Title::makeTitle( $row->namespace, $row->title );
613  $pageSet[$template->getPrefixedText()] = true;
614  }
615  }
616  }
617 
618  return $pageSet;
619  }
620 
621  protected function getGroupName() {
622  return 'pagetools';
623  }
624 }
const NS_MAIN
Definition: Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
static factory( $displayFormat, $descriptor, IContextSource $context, $messagePrefix='')
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:344
Factory service for WikiExporter instances.
Service for compat reading of links tables.
PSR-3 logger instance factory.
A class containing constants representing the names of configuration variables.
A special page that allows users to export pages in a XML file.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
__construct(ILoadBalancer $loadBalancer, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
getPagesFromNamespace( $nsindex)
execute( $par)
Default execute method Checks user permissions.
validateLinkDepth( $depth)
Validate link depth setting, if available.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPagesFromCategory( $title)
Parent class for all special pages.
Definition: SpecialPage.php:44
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getOutput()
Get the OutputPage being used for this instance.
getContext()
Gets the context this SpecialPage is executed in.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getAuthority()
Shortcut to get the Authority executing this instance.
getConfig()
Shortcut to get main config object.
getRequest()
Get the WebRequest being used for this instance.
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:854
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:370
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:664
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:638
A title formatter service for MediaWiki.
Create and track the database connections and transactions for a given database cluster.
const DB_REPLICA
Definition: defines.php:26