MediaWiki  master
SpecialExport.php
Go to the documentation of this file.
1 <?php
26 namespace MediaWiki\Specials;
27 
28 use HTMLForm;
37 use WikiExporter;
39 
45 class SpecialExport extends SpecialPage {
47 
48  private IConnectionProvider $dbProvider;
49  private WikiExporterFactory $wikiExporterFactory;
50  private TitleFormatter $titleFormatter;
51  private LinksMigration $linksMigration;
52 
59  public function __construct(
60  IConnectionProvider $dbProvider,
61  WikiExporterFactory $wikiExporterFactory,
62  TitleFormatter $titleFormatter,
63  LinksMigration $linksMigration
64  ) {
65  parent::__construct( 'Export' );
66  $this->dbProvider = $dbProvider;
67  $this->wikiExporterFactory = $wikiExporterFactory;
68  $this->titleFormatter = $titleFormatter;
69  $this->linksMigration = $linksMigration;
70  }
71 
72  public function execute( $par ) {
73  $this->setHeaders();
74  $this->outputHeader();
75  $config = $this->getConfig();
76 
77  $this->curonly = true;
78  $this->doExport = false;
79  $request = $this->getRequest();
80  $this->templates = $request->getCheck( 'templates' );
81  $this->pageLinkDepth = $this->validateLinkDepth(
82  $request->getIntOrNull( 'pagelink-depth' )
83  );
84  $nsindex = '';
85  $exportall = false;
86 
87  if ( $request->getCheck( 'addcat' ) ) {
88  $page = $request->getText( 'pages' );
89  $catname = $request->getText( 'catname' );
90 
91  if ( $catname !== '' && $catname !== null && $catname !== false ) {
92  $t = Title::makeTitleSafe( NS_MAIN, $catname );
93  if ( $t ) {
99  $catpages = $this->getPagesFromCategory( $t );
100  if ( $catpages ) {
101  if ( $page !== '' ) {
102  $page .= "\n";
103  }
104  $page .= implode( "\n", $catpages );
105  }
106  }
107  }
108  } elseif ( $request->getCheck( 'addns' ) &&
109  $config->get( MainConfigNames::ExportFromNamespaces ) ) {
110  $page = $request->getText( 'pages' );
111  $nsindex = $request->getText( 'nsindex', '' );
112 
113  if ( strval( $nsindex ) !== '' ) {
117  $nspages = $this->getPagesFromNamespace( (int)$nsindex );
118  if ( $nspages ) {
119  $page .= "\n" . implode( "\n", $nspages );
120  }
121  }
122  } elseif ( $request->getCheck( 'exportall' ) &&
123  $config->get( MainConfigNames::ExportAllowAll ) ) {
124  $this->doExport = true;
125  $exportall = true;
126 
127  /* Although $page and $history are not used later on, we
128  nevertheless set them to avoid that PHP notices about using
129  undefined variables foul up our XML output (see call to
130  doExport(...) further down) */
131  $page = '';
132  $history = '';
133  } elseif ( $request->wasPosted() && $par == '' ) {
134  // Log to see if certain parameters are actually used.
135  // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
136  LoggerFactory::getInstance( 'export' )->debug(
137  'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
138  'dir' => $request->getRawVal( 'dir' ),
139  'offset' => $request->getRawVal( 'offset' ),
140  'limit' => $request->getRawVal( 'limit' ),
141  ] );
142 
143  $page = $request->getText( 'pages' );
144  $this->curonly = $request->getCheck( 'curonly' );
145  $rawOffset = $request->getVal( 'offset' );
146 
147  if ( $rawOffset ) {
148  $offset = wfTimestamp( TS_MW, $rawOffset );
149  } else {
150  $offset = null;
151  }
152 
153  $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
154  $limit = $request->getInt( 'limit' );
155  $dir = $request->getVal( 'dir' );
156  $history = [
157  'dir' => 'asc',
158  'offset' => false,
159  'limit' => $maxHistory,
160  ];
161  $historyCheck = $request->getCheck( 'history' );
162 
163  if ( $this->curonly ) {
164  $history = WikiExporter::CURRENT;
165  } elseif ( !$historyCheck ) {
166  if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
167  $history['limit'] = $limit;
168  }
169 
170  if ( $offset !== null ) {
171  $history['offset'] = $offset;
172  }
173 
174  if ( strtolower( $dir ) == 'desc' ) {
175  $history['dir'] = 'desc';
176  }
177  }
178 
179  if ( $page != '' ) {
180  $this->doExport = true;
181  }
182  } else {
183  // Default to current-only for GET requests.
184  $page = $request->getText( 'pages', $par ?? '' );
185  $historyCheck = $request->getCheck( 'history' );
186 
187  if ( $historyCheck ) {
188  $history = WikiExporter::FULL;
189  } else {
190  $history = WikiExporter::CURRENT;
191  }
192 
193  if ( $page != '' ) {
194  $this->doExport = true;
195  }
196  }
197 
198  if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
199  // Override
200  $history = WikiExporter::CURRENT;
201  }
202 
203  $list_authors = $request->getCheck( 'listauthors' );
204  if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
205  $list_authors = false;
206  }
207 
208  if ( $this->doExport ) {
209  $this->getOutput()->disable();
210 
211  // Cancel output buffering and gzipping if set
212  // This should provide safer streaming for pages with history
214  $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
215  $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
216 
217  if ( $request->getCheck( 'wpDownload' ) ) {
218  // Provide a sensible filename suggestion
219  $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
220  wfTimestampNow() . '.xml' );
221  $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
222  }
223 
224  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
225  // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
226  $this->doExport( $page, $history, $list_authors, $exportall );
227 
228  return;
229  }
230 
231  $out = $this->getOutput();
232  $out->addWikiMsg( 'exporttext' );
233 
234  if ( $page == '' ) {
235  $categoryName = $request->getText( 'catname' );
236  } else {
237  $categoryName = '';
238  }
239  $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
240  $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
241 
242  $formDescriptor = [
243  'catname' => [
244  'type' => 'textwithbutton',
245  'name' => 'catname',
246  'horizontal-label' => true,
247  'label-message' => 'export-addcattext',
248  'default' => $categoryName,
249  'size' => 40,
250  'buttontype' => 'submit',
251  'buttonname' => 'addcat',
252  'buttondefault' => $this->msg( 'export-addcat' )->text(),
253  ] + $hideIf,
254  ];
255  if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
256  $formDescriptor += [
257  'nsindex' => [
258  'type' => 'namespaceselectwithbutton',
259  'default' => $nsindex,
260  'label-message' => 'export-addnstext',
261  'horizontal-label' => true,
262  'name' => 'nsindex',
263  'id' => 'namespace',
264  'cssclass' => 'namespaceselector',
265  'buttontype' => 'submit',
266  'buttonname' => 'addns',
267  'buttondefault' => $this->msg( 'export-addns' )->text(),
268  ] + $hideIf,
269  ];
270  }
271 
272  if ( $canExportAll ) {
273  $formDescriptor += [
274  'exportall' => [
275  'type' => 'check',
276  'label-message' => 'exportall',
277  'name' => 'exportall',
278  'id' => 'exportall',
279  'default' => $request->wasPosted() && $request->getCheck( 'exportall' ),
280  ],
281  ];
282  }
283 
284  $formDescriptor += [
285  'textarea' => [
286  'class' => HTMLTextAreaField::class,
287  'name' => 'pages',
288  'label-message' => 'export-manual',
289  'nodata' => true,
290  'rows' => 10,
291  'default' => $page,
292  ] + $hideIf,
293  ];
294 
295  if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
296  $formDescriptor += [
297  'curonly' => [
298  'type' => 'check',
299  'label-message' => 'exportcuronly',
300  'name' => 'curonly',
301  'id' => 'curonly',
302  'default' => !$request->wasPosted() || $request->getCheck( 'curonly' ),
303  ],
304  ];
305  } else {
306  $out->addWikiMsg( 'exportnohistory' );
307  }
308 
309  $formDescriptor += [
310  'templates' => [
311  'type' => 'check',
312  'label-message' => 'export-templates',
313  'name' => 'templates',
314  'id' => 'wpExportTemplates',
315  'default' => $request->wasPosted() && $request->getCheck( 'templates' ),
316  ],
317  ];
318 
319  if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) ||
320  $this->userCanOverrideExportDepth() ) {
321  $formDescriptor += [
322  'pagelink-depth' => [
323  'type' => 'text',
324  'name' => 'pagelink-depth',
325  'id' => 'pagelink-depth',
326  'label-message' => 'export-pagelinks',
327  'default' => '0',
328  'size' => 20,
329  ],
330  ];
331  }
332 
333  $formDescriptor += [
334  'wpDownload' => [
335  'type' => 'check',
336  'name' => 'wpDownload',
337  'id' => 'wpDownload',
338  'default' => !$request->wasPosted() || $request->getCheck( 'wpDownload' ),
339  'label-message' => 'export-download',
340  ],
341  ];
342 
343  if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
344  $formDescriptor += [
345  'listauthors' => [
346  'type' => 'check',
347  'label-message' => 'exportlistauthors',
348  'default' => $request->wasPosted() && $request->getCheck( 'listauthors' ),
349  'name' => 'listauthors',
350  'id' => 'listauthors',
351  ],
352  ];
353  }
354 
355  $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
356  $htmlForm->setSubmitTextMsg( 'export-submit' );
357  $htmlForm->prepareForm()->displayForm( false );
358  $this->addHelpLink( 'Help:Export' );
359  }
360 
364  protected function userCanOverrideExportDepth() {
365  return $this->getAuthority()->isAllowed( 'override-export-depth' );
366  }
367 
377  protected function doExport( $page, $history, $list_authors, $exportall ) {
378  // If we are grabbing everything, enable full history and ignore the rest
379  if ( $exportall ) {
380  $history = WikiExporter::FULL;
381  } else {
382  $pageSet = []; // Inverted index of all pages to look up
383 
384  // Split up and normalize input
385  foreach ( explode( "\n", $page ) as $pageName ) {
386  $pageName = trim( $pageName );
387  $title = Title::newFromText( $pageName );
388  if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
389  // Only record each page once!
390  $pageSet[$title->getPrefixedText()] = true;
391  }
392  }
393 
394  // Set of original pages to pass on to further manipulation...
395  $inputPages = array_keys( $pageSet );
396 
397  // Look up any linked pages if asked...
398  if ( $this->templates ) {
399  $pageSet = $this->getTemplates( $inputPages, $pageSet );
400  }
401  $pageSet = $this->getExtraPages( $inputPages, $pageSet );
402  $linkDepth = $this->pageLinkDepth;
403  if ( $linkDepth ) {
404  $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
405  }
406 
407  $pages = array_keys( $pageSet );
408 
409  // Normalize titles to the same format and remove dupes, see T19374
410  foreach ( $pages as $k => $v ) {
411  $pages[$k] = str_replace( ' ', '_', $v );
412  }
413 
414  $pages = array_unique( $pages );
415  }
416 
417  /* Ok, let's get to it... */
418  $db = $this->dbProvider->getReplicaDatabase();
419 
420  $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
421  $exporter->list_authors = $list_authors;
422  $exporter->openStream();
423 
424  if ( $exportall ) {
425  $exporter->allPages();
426  } else {
427  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
428  foreach ( $pages as $page ) {
429  # T10824: Only export pages the user can read
430  $title = Title::newFromText( $page );
431  if ( $title === null ) {
432  // @todo Perhaps output an <error> tag or something.
433  continue;
434  }
435 
436  if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
437  // @todo Perhaps output an <error> tag or something.
438  continue;
439  }
440 
441  $exporter->pageByTitle( $title );
442  }
443  }
444 
445  $exporter->closeStream();
446  }
447 
452  protected function getPagesFromCategory( $title ) {
453  $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
454 
455  $name = $title->getDBkey();
456 
457  $dbr = $this->dbProvider->getReplicaDatabase();
458  $res = $dbr->newSelectQueryBuilder()
459  ->select( [ 'page_namespace', 'page_title' ] )
460  ->from( 'page' )
461  ->join( 'categorylinks', null, 'cl_from=page_id' )
462  ->where( [ 'cl_to' => $name ] )
463  ->limit( $maxPages )
464  ->caller( __METHOD__ )->fetchResultSet();
465 
466  $pages = [];
467 
468  foreach ( $res as $row ) {
469  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470  }
471 
472  return $pages;
473  }
474 
479  protected function getPagesFromNamespace( $nsindex ) {
480  $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481 
482  $dbr = $this->dbProvider->getReplicaDatabase();
483  $res = $dbr->newSelectQueryBuilder()
484  ->select( [ 'page_namespace', 'page_title' ] )
485  ->from( 'page' )
486  ->where( [ 'page_namespace' => $nsindex ] )
487  ->limit( $maxPages )
488  ->caller( __METHOD__ )->fetchResultSet();
489 
490  $pages = [];
491 
492  foreach ( $res as $row ) {
493  $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
494  }
495 
496  return $pages;
497  }
498 
505  protected function getTemplates( $inputPages, $pageSet ) {
506  [ $nsField, $titleField ] = $this->linksMigration->getTitleFields( 'templatelinks' );
507  $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
508  return $this->getLinks( $inputPages, $pageSet,
509  $queryInfo['tables'],
510  [ 'namespace' => $nsField, 'title' => $titleField ],
511  array_merge(
512  [ 'templatelinks' => [ 'JOIN', [ 'page_id=tl_from' ] ] ],
513  $queryInfo['joins']
514  )
515  );
516  }
517 
524  private function getExtraPages( $inputPages, $pageSet ) {
525  $extraPages = [];
526  $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
527  foreach ( $extraPages as $extraPage ) {
528  $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
529  }
530  return $pageSet;
531  }
532 
538  protected function validateLinkDepth( $depth ) {
539  if ( $depth === null || $depth < 0 ) {
540  return 0;
541  }
542 
543  if ( !$this->userCanOverrideExportDepth() ) {
544  $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
545  if ( $depth > $maxLinkDepth ) {
546  return $maxLinkDepth;
547  }
548  }
549 
550  /*
551  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
552  * crazy-big export from being done by someone setting the depth
553  * number too high. In other words, last resort safety net.
554  */
555 
556  return intval( min( $depth, 5 ) );
557  }
558 
566  protected function getPageLinks( $inputPages, $pageSet, $depth ) {
567  for ( ; $depth > 0; --$depth ) {
568  $pageSet = $this->getLinks(
569  $inputPages, $pageSet, [ 'pagelinks' ],
570  [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
571  [ 'pagelinks' => [ 'JOIN', [ 'page_id=pl_from' ] ] ]
572  );
573  $inputPages = array_keys( $pageSet );
574  }
575 
576  return $pageSet;
577  }
578 
588  protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
589  $dbr = $this->dbProvider->getReplicaDatabase();
590  $table[] = 'page';
591 
592  foreach ( $inputPages as $page ) {
593  $title = Title::newFromText( $page );
594  if ( $title ) {
595  $pageSet[$title->getPrefixedText()] = true;
598  $result = $dbr->select(
599  $table,
600  $fields,
601  [
602  'page_namespace' => $title->getNamespace(),
603  'page_title' => $title->getDBkey()
604  ],
605  __METHOD__,
606  [],
607  $join
608  );
609 
610  foreach ( $result as $row ) {
611  $template = Title::makeTitle( $row->namespace, $row->title );
612  $pageSet[$template->getPrefixedText()] = true;
613  }
614  }
615  }
616 
617  return $pageSet;
618  }
619 
620  protected function getGroupName() {
621  return 'pagetools';
622  }
623 }
624 
628 class_alias( SpecialExport::class, 'SpecialExport' );
const NS_MAIN
Definition: Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Object handling generic submission, CSRF protection, layout and other logic for UI forms in a reusabl...
Definition: HTMLForm.php:158
static factory( $displayFormat, $descriptor, IContextSource $context, $messagePrefix='')
Construct a HTMLForm object for given display type.
Definition: HTMLForm.php:360
Factory service for WikiExporter instances.
Service for compat reading of links tables.
PSR-3 logger instance factory.
static getInstance( $channel)
Get a named logger instance from the currently configured logger factory.
A class containing constants representing the names of configuration variables.
const ExportMaxLinkDepth
Name constant for the ExportMaxLinkDepth setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ExportAllowAll
Name constant for the ExportAllowAll setting, for use with Config::get()
const ExportMaxHistory
Name constant for the ExportMaxHistory setting, for use with Config::get()
const ExportAllowListContributors
Name constant for the ExportAllowListContributors setting, for use with Config::get()
const ExportPagelistLimit
Name constant for the ExportPagelistLimit setting, for use with Config::get()
const ExportFromNamespaces
Name constant for the ExportFromNamespaces setting, for use with Config::get()
const ExportAllowHistory
Name constant for the ExportAllowHistory setting, for use with Config::get()
Parent class for all special pages.
Definition: SpecialPage.php:65
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getConfig()
Shortcut to get main config object.
getContext()
Gets the context this SpecialPage is executed in.
getRequest()
Get the WebRequest being used for this instance.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getOutput()
Get the OutputPage being used for this instance.
getAuthority()
Shortcut to get the Authority executing this instance.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A special page that allows users to export pages in a XML file.
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
execute( $par)
Default execute method Checks user permissions.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
validateLinkDepth( $depth)
Validate link depth setting, if available.
__construct(IConnectionProvider $dbProvider, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
Represents a title within MediaWiki.
Definition: Title.php:76
static makeName( $ns, $title, $fragment='', $interwiki='', $canonicalNamespace=false)
Make a prefixed DB key from a DB key and a namespace index.
Definition: Title.php:840
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:400
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:650
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:624
A title formatter service for MediaWiki.
Provide primary and replica IDatabase connections.