MediaWiki 1.39.10
SpecialExport.php
Go to the documentation of this file.
1<?php
31
39
41 private $loadBalancer;
42
44 private $wikiExporterFactory;
45
47 private $titleFormatter;
48
50 private $linksMigration;
51
58 public function __construct(
59 ILoadBalancer $loadBalancer,
60 WikiExporterFactory $wikiExporterFactory,
61 TitleFormatter $titleFormatter,
62 LinksMigration $linksMigration
63 ) {
64 parent::__construct( 'Export' );
65 $this->loadBalancer = $loadBalancer;
66 $this->wikiExporterFactory = $wikiExporterFactory;
67 $this->titleFormatter = $titleFormatter;
68 $this->linksMigration = $linksMigration;
69 }
70
71 public function execute( $par ) {
72 $this->setHeaders();
73 $this->outputHeader();
74 $config = $this->getConfig();
75
76 // Set some variables
77 $this->curonly = true;
78 $this->doExport = false;
79 $request = $this->getRequest();
80 $this->templates = $request->getCheck( 'templates' );
81 $this->pageLinkDepth = $this->validateLinkDepth(
82 $request->getIntOrNull( 'pagelink-depth' )
83 );
84 $nsindex = '';
85 $exportall = false;
86
87 if ( $request->getCheck( 'addcat' ) ) {
88 $page = $request->getText( 'pages' );
89 $catname = $request->getText( 'catname' );
90
91 if ( $catname !== '' && $catname !== null && $catname !== false ) {
92 $t = Title::makeTitleSafe( NS_MAIN, $catname );
93 if ( $t ) {
99 $catpages = $this->getPagesFromCategory( $t );
100 if ( $catpages ) {
101 if ( $page !== '' ) {
102 $page .= "\n";
103 }
104 $page .= implode( "\n", $catpages );
105 }
106 }
107 }
108 } elseif ( $request->getCheck( 'addns' ) &&
109 $config->get( MainConfigNames::ExportFromNamespaces ) ) {
110 $page = $request->getText( 'pages' );
111 $nsindex = $request->getText( 'nsindex', '' );
112
113 if ( strval( $nsindex ) !== '' ) {
117 $nspages = $this->getPagesFromNamespace( (int)$nsindex );
118 if ( $nspages ) {
119 $page .= "\n" . implode( "\n", $nspages );
120 }
121 }
122 } elseif ( $request->getCheck( 'exportall' ) &&
123 $config->get( MainConfigNames::ExportAllowAll ) ) {
124 $this->doExport = true;
125 $exportall = true;
126
127 /* Although $page and $history are not used later on, we
128 nevertheless set them to avoid that PHP notices about using
129 undefined variables foul up our XML output (see call to
130 doExport(...) further down) */
131 $page = '';
132 $history = '';
133 } elseif ( $request->wasPosted() && $par == '' ) {
134 // Log to see if certain parameters are actually used.
135 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
136 LoggerFactory::getInstance( 'export' )->debug(
137 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
138 'dir' => $request->getRawVal( 'dir' ),
139 'offset' => $request->getRawVal( 'offset' ),
140 'limit' => $request->getRawVal( 'limit' ),
141 ] );
142
143 $page = $request->getText( 'pages' );
144 $this->curonly = $request->getCheck( 'curonly' );
145 $rawOffset = $request->getVal( 'offset' );
146
147 if ( $rawOffset ) {
148 $offset = wfTimestamp( TS_MW, $rawOffset );
149 } else {
150 $offset = null;
151 }
152
153 $maxHistory = $config->get( MainConfigNames::ExportMaxHistory );
154 $limit = $request->getInt( 'limit' );
155 $dir = $request->getVal( 'dir' );
156 $history = [
157 'dir' => 'asc',
158 'offset' => false,
159 'limit' => $maxHistory,
160 ];
161 $historyCheck = $request->getCheck( 'history' );
162
163 if ( $this->curonly ) {
164 $history = WikiExporter::CURRENT;
165 } elseif ( !$historyCheck ) {
166 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
167 $history['limit'] = $limit;
168 }
169
170 if ( $offset !== null ) {
171 $history['offset'] = $offset;
172 }
173
174 if ( strtolower( $dir ?? '' ) == 'desc' ) {
175 $history['dir'] = 'desc';
176 }
177 }
178
179 if ( $page != '' ) {
180 $this->doExport = true;
181 }
182 } else {
183 // Default to current-only for GET requests.
184 $page = $request->getText( 'pages', $par ?? '' );
185 $historyCheck = $request->getCheck( 'history' );
186
187 if ( $historyCheck ) {
188 $history = WikiExporter::FULL;
189 } else {
190 $history = WikiExporter::CURRENT;
191 }
192
193 if ( $page != '' ) {
194 $this->doExport = true;
195 }
196 }
197
198 if ( !$config->get( MainConfigNames::ExportAllowHistory ) ) {
199 // Override
200 $history = WikiExporter::CURRENT;
201 }
202
203 $list_authors = $request->getCheck( 'listauthors' );
204 if ( !$this->curonly || !$config->get( MainConfigNames::ExportAllowListContributors ) ) {
205 $list_authors = false;
206 }
207
208 if ( $this->doExport ) {
209 $this->getOutput()->disable();
210
211 // Cancel output buffering and gzipping if set
212 // This should provide safer streaming for pages with history
214 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
215 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
216
217 if ( $request->getCheck( 'wpDownload' ) ) {
218 // Provide a sensible filename suggestion
219 $filename = urlencode( $config->get( MainConfigNames::Sitename ) . '-' .
220 wfTimestampNow() . '.xml' );
221 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
222 }
223
224 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
225 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable history is set when used
226 $this->doExport( $page, $history, $list_authors, $exportall );
227
228 return;
229 }
230
231 $out = $this->getOutput();
232 $out->addWikiMsg( 'exporttext' );
233
234 if ( $page == '' ) {
235 $categoryName = $request->getText( 'catname' );
236 } else {
237 $categoryName = '';
238 }
239 $canExportAll = $config->get( MainConfigNames::ExportAllowAll );
240 $hideIf = $canExportAll ? [ 'hide-if' => [ '===', 'exportall', '1' ] ] : [];
241
242 $formDescriptor = [
243 'catname' => [
244 'type' => 'textwithbutton',
245 'name' => 'catname',
246 'horizontal-label' => true,
247 'label-message' => 'export-addcattext',
248 'default' => $categoryName,
249 'size' => 40,
250 'buttontype' => 'submit',
251 'buttonname' => 'addcat',
252 'buttondefault' => $this->msg( 'export-addcat' )->text(),
253 ] + $hideIf,
254 ];
255 if ( $config->get( MainConfigNames::ExportFromNamespaces ) ) {
256 $formDescriptor += [
257 'nsindex' => [
258 'type' => 'namespaceselectwithbutton',
259 'default' => $nsindex,
260 'label-message' => 'export-addnstext',
261 'horizontal-label' => true,
262 'name' => 'nsindex',
263 'id' => 'namespace',
264 'cssclass' => 'namespaceselector',
265 'buttontype' => 'submit',
266 'buttonname' => 'addns',
267 'buttondefault' => $this->msg( 'export-addns' )->text(),
268 ] + $hideIf,
269 ];
270 }
271
272 if ( $canExportAll ) {
273 $formDescriptor += [
274 'exportall' => [
275 'type' => 'check',
276 'label-message' => 'exportall',
277 'name' => 'exportall',
278 'id' => 'exportall',
279 'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
280 ],
281 ];
282 }
283
284 $formDescriptor += [
285 'textarea' => [
286 'class' => HTMLTextAreaField::class,
287 'name' => 'pages',
288 'label-message' => 'export-manual',
289 'nodata' => true,
290 'rows' => 10,
291 'default' => $page,
292 ] + $hideIf,
293 ];
294
295 if ( $config->get( MainConfigNames::ExportAllowHistory ) ) {
296 $formDescriptor += [
297 'curonly' => [
298 'type' => 'check',
299 'label-message' => 'exportcuronly',
300 'name' => 'curonly',
301 'id' => 'curonly',
302 'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
303 ],
304 ];
305 } else {
306 $out->addWikiMsg( 'exportnohistory' );
307 }
308
309 $formDescriptor += [
310 'templates' => [
311 'type' => 'check',
312 'label-message' => 'export-templates',
313 'name' => 'templates',
314 'id' => 'wpExportTemplates',
315 'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
316 ],
317 ];
318
319 if ( $config->get( MainConfigNames::ExportMaxLinkDepth ) ||
320 $this->userCanOverrideExportDepth() ) {
321 $formDescriptor += [
322 'pagelink-depth' => [
323 'type' => 'text',
324 'name' => 'pagelink-depth',
325 'id' => 'pagelink-depth',
326 'label-message' => 'export-pagelinks',
327 'default' => '0',
328 'size' => 20,
329 ],
330 ];
331 }
332
333 $formDescriptor += [
334 'wpDownload' => [
335 'type' => 'check',
336 'name' => 'wpDownload',
337 'id' => 'wpDownload',
338 'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
339 'label-message' => 'export-download',
340 ],
341 ];
342
343 if ( $config->get( MainConfigNames::ExportAllowListContributors ) ) {
344 $formDescriptor += [
345 'listauthors' => [
346 'type' => 'check',
347 'label-message' => 'exportlistauthors',
348 'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
349 'name' => 'listauthors',
350 'id' => 'listauthors',
351 ],
352 ];
353 }
354
355 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
356 $htmlForm->setSubmitTextMsg( 'export-submit' );
357 $htmlForm->prepareForm()->displayForm( false );
358 $this->addHelpLink( 'Help:Export' );
359 }
360
364 protected function userCanOverrideExportDepth() {
365 return $this->getAuthority()->isAllowed( 'override-export-depth' );
366 }
367
377 protected function doExport( $page, $history, $list_authors, $exportall ) {
378 // If we are grabbing everything, enable full history and ignore the rest
379 if ( $exportall ) {
380 $history = WikiExporter::FULL;
381 } else {
382 $pageSet = []; // Inverted index of all pages to look up
383
384 // Split up and normalize input
385 foreach ( explode( "\n", $page ) as $pageName ) {
386 $pageName = trim( $pageName );
387 $title = Title::newFromText( $pageName );
388 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
389 // Only record each page once!
390 $pageSet[$title->getPrefixedText()] = true;
391 }
392 }
393
394 // Set of original pages to pass on to further manipulation...
395 $inputPages = array_keys( $pageSet );
396
397 // Look up any linked pages if asked...
398 if ( $this->templates ) {
399 $pageSet = $this->getTemplates( $inputPages, $pageSet );
400 }
401 $pageSet = $this->getExtraPages( $inputPages, $pageSet );
402 $linkDepth = $this->pageLinkDepth;
403 if ( $linkDepth ) {
404 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
405 }
406
407 $pages = array_keys( $pageSet );
408
409 // Normalize titles to the same format and remove dupes, see T19374
410 foreach ( $pages as $k => $v ) {
411 $pages[$k] = str_replace( ' ', '_', $v );
412 }
413
414 $pages = array_unique( $pages );
415 }
416
417 /* Ok, let's get to it... */
418 $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
419
420 $exporter = $this->wikiExporterFactory->getWikiExporter( $db, $history );
421 $exporter->list_authors = $list_authors;
422 $exporter->openStream();
423
424 if ( $exportall ) {
425 $exporter->allPages();
426 } else {
427 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
428 foreach ( $pages as $page ) {
429 # T10824: Only export pages the user can read
430 $title = Title::newFromText( $page );
431 if ( $title === null ) {
432 // @todo Perhaps output an <error> tag or something.
433 continue;
434 }
435
436 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
437 // @todo Perhaps output an <error> tag or something.
438 continue;
439 }
440
441 $exporter->pageByTitle( $title );
442 }
443 }
444
445 $exporter->closeStream();
446 }
447
452 protected function getPagesFromCategory( $title ) {
453 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
454
455 $name = $title->getDBkey();
456
457 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
458 $res = $dbr->select(
459 [ 'page', 'categorylinks' ],
460 [ 'page_namespace', 'page_title' ],
461 [ 'cl_from=page_id', 'cl_to' => $name ],
462 __METHOD__,
463 [ 'LIMIT' => $maxPages ]
464 );
465
466 $pages = [];
467
468 foreach ( $res as $row ) {
469 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
470 }
471
472 return $pages;
473 }
474
479 protected function getPagesFromNamespace( $nsindex ) {
480 $maxPages = $this->getConfig()->get( MainConfigNames::ExportPagelistLimit );
481
482 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
483 $res = $dbr->select(
484 'page',
485 [ 'page_namespace', 'page_title' ],
486 [ 'page_namespace' => $nsindex ],
487 __METHOD__,
488 [ 'LIMIT' => $maxPages ]
489 );
490
491 $pages = [];
492
493 foreach ( $res as $row ) {
494 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
495 }
496
497 return $pages;
498 }
499
506 protected function getTemplates( $inputPages, $pageSet ) {
507 list( $nsField, $titleField ) = $this->linksMigration->getTitleFields( 'templatelinks' );
508 $queryInfo = $this->linksMigration->getQueryInfo( 'templatelinks' );
509 return $this->getLinks( $inputPages, $pageSet,
510 $queryInfo['tables'],
511 [ 'namespace' => $nsField, 'title' => $titleField ],
512 array_merge(
513 [ 'templatelinks' => [ 'JOIN', [ 'page_id=tl_from' ] ] ],
514 $queryInfo['joins']
515 )
516 );
517 }
518
525 private function getExtraPages( $inputPages, $pageSet ) {
526 $extraPages = [];
527 $this->getHookRunner()->onSpecialExportGetExtraPages( $inputPages, $extraPages );
528 foreach ( $extraPages as $extraPage ) {
529 $pageSet[$this->titleFormatter->getPrefixedText( $extraPage )] = true;
530 }
531 return $pageSet;
532 }
533
539 protected function validateLinkDepth( $depth ) {
540 if ( $depth < 0 ) {
541 return 0;
542 }
543
544 if ( !$this->userCanOverrideExportDepth() ) {
545 $maxLinkDepth = $this->getConfig()->get( MainConfigNames::ExportMaxLinkDepth );
546 if ( $depth > $maxLinkDepth ) {
547 return $maxLinkDepth;
548 }
549 }
550
551 /*
552 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
553 * crazy-big export from being done by someone setting the depth
554 * number too high. In other words, last resort safety net.
555 */
556
557 return intval( min( $depth, 5 ) );
558 }
559
567 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
568 for ( ; $depth > 0; --$depth ) {
569 $pageSet = $this->getLinks(
570 $inputPages, $pageSet, [ 'pagelinks' ],
571 [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
572 [ 'pagelinks' => [ 'JOIN', [ 'page_id=pl_from' ] ] ]
573 );
574 $inputPages = array_keys( $pageSet );
575 }
576
577 return $pageSet;
578 }
579
589 protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
590 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
591 $table[] = 'page';
592
593 foreach ( $inputPages as $page ) {
594 $title = Title::newFromText( $page );
595 if ( $title ) {
596 $pageSet[$title->getPrefixedText()] = true;
599 $result = $dbr->select(
600 $table,
601 $fields,
602 [
603 'page_namespace' => $title->getNamespace(),
604 'page_title' => $title->getDBkey()
605 ],
606 __METHOD__,
607 [],
608 $join
609 );
610
611 foreach ( $result as $row ) {
612 $template = Title::makeTitle( $row->namespace, $row->title );
613 $pageSet[$template->getPrefixedText()] = true;
614 }
615 }
616 }
617
618 return $pageSet;
619 }
620
621 protected function getGroupName() {
622 return 'pagetools';
623 }
624}
const NS_MAIN
Definition Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
Factory service for WikiExporter instances.
Service for compat reading of links tables.
PSR-3 logger instance factory.
A class containing constants representing the names of configuration variables.
A special page that allows users to export pages in a XML file.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
__construct(ILoadBalancer $loadBalancer, WikiExporterFactory $wikiExporterFactory, TitleFormatter $titleFormatter, LinksMigration $linksMigration)
getPagesFromNamespace( $nsindex)
execute( $par)
Default execute method Checks user permissions.
validateLinkDepth( $depth)
Validate link depth setting, if available.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPagesFromCategory( $title)
Parent class for all special pages.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getOutput()
Get the OutputPage being used for this instance.
getContext()
Gets the context this SpecialPage is executed in.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getAuthority()
Shortcut to get the Authority executing this instance.
getConfig()
Shortcut to get main config object.
getRequest()
Get the WebRequest being used for this instance.
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
A title formatter service for MediaWiki.
Create and track the database connections and transactions for a given database cluster.