MediaWiki REL1_37
SpecialExport.php
Go to the documentation of this file.
1<?php
28
36
39
43 public function __construct(
45 ) {
46 parent::__construct( 'Export' );
47 $this->loadBalancer = $loadBalancer;
48 }
49
50 public function execute( $par ) {
51 $this->setHeaders();
52 $this->outputHeader();
53 $config = $this->getConfig();
54
55 // Set some variables
56 $this->curonly = true;
57 $this->doExport = false;
58 $request = $this->getRequest();
59 $this->templates = $request->getCheck( 'templates' );
60 $this->pageLinkDepth = $this->validateLinkDepth(
61 $request->getIntOrNull( 'pagelink-depth' )
62 );
63 $nsindex = '';
64 $exportall = false;
65
66 if ( $request->getCheck( 'addcat' ) ) {
67 $page = $request->getText( 'pages' );
68 $catname = $request->getText( 'catname' );
69
70 if ( $catname !== '' && $catname !== null && $catname !== false ) {
71 $t = Title::makeTitleSafe( NS_MAIN, $catname );
72 if ( $t ) {
78 $catpages = $this->getPagesFromCategory( $t );
79 if ( $catpages ) {
80 if ( $page !== '' ) {
81 $page .= "\n";
82 }
83 $page .= implode( "\n", $catpages );
84 }
85 }
86 }
87 } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
88 $page = $request->getText( 'pages' );
89 $nsindex = $request->getText( 'nsindex', '' );
90
91 if ( strval( $nsindex ) !== '' ) {
95 $nspages = $this->getPagesFromNamespace( $nsindex );
96 if ( $nspages ) {
97 $page .= "\n" . implode( "\n", $nspages );
98 }
99 }
100 } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
101 $this->doExport = true;
102 $exportall = true;
103
104 /* Although $page and $history are not used later on, we
105 nevertheless set them to avoid that PHP notices about using
106 undefined variables foul up our XML output (see call to
107 doExport(...) further down) */
108 $page = '';
109 $history = '';
110 } elseif ( $request->wasPosted() && $par == '' ) {
111 // Log to see if certain parameters are actually used.
112 // If not, we could deprecate them and do some cleanup, here and in WikiExporter.
113 LoggerFactory::getInstance( 'export' )->debug(
114 'Special:Export POST, dir: [{dir}], offset: [{offset}], limit: [{limit}]', [
115 'dir' => $request->getRawVal( 'dir' ),
116 'offset' => $request->getRawVal( 'offset' ),
117 'limit' => $request->getRawVal( 'limit' ),
118 ] );
119
120 $page = $request->getText( 'pages' );
121 $this->curonly = $request->getCheck( 'curonly' );
122 $rawOffset = $request->getVal( 'offset' );
123
124 if ( $rawOffset ) {
125 $offset = wfTimestamp( TS_MW, $rawOffset );
126 } else {
127 $offset = null;
128 }
129
130 $maxHistory = $config->get( 'ExportMaxHistory' );
131 $limit = $request->getInt( 'limit' );
132 $dir = $request->getVal( 'dir' );
133 $history = [
134 'dir' => 'asc',
135 'offset' => false,
136 'limit' => $maxHistory,
137 ];
138 $historyCheck = $request->getCheck( 'history' );
139
140 if ( $this->curonly ) {
141 $history = WikiExporter::CURRENT;
142 } elseif ( !$historyCheck ) {
143 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
144 $history['limit'] = $limit;
145 }
146
147 if ( $offset !== null ) {
148 $history['offset'] = $offset;
149 }
150
151 if ( strtolower( $dir ) == 'desc' ) {
152 $history['dir'] = 'desc';
153 }
154 }
155
156 if ( $page != '' ) {
157 $this->doExport = true;
158 }
159 } else {
160 // Default to current-only for GET requests.
161 $page = $request->getText( 'pages', $par );
162 $historyCheck = $request->getCheck( 'history' );
163
164 if ( $historyCheck ) {
165 $history = WikiExporter::FULL;
166 } else {
167 $history = WikiExporter::CURRENT;
168 }
169
170 if ( $page != '' ) {
171 $this->doExport = true;
172 }
173 }
174
175 if ( !$config->get( 'ExportAllowHistory' ) ) {
176 // Override
177 $history = WikiExporter::CURRENT;
178 }
179
180 $list_authors = $request->getCheck( 'listauthors' );
181 if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
182 $list_authors = false;
183 }
184
185 if ( $this->doExport ) {
186 $this->getOutput()->disable();
187
188 // Cancel output buffering and gzipping if set
189 // This should provide safer streaming for pages with history
191 $request->response()->header( 'Content-type: application/xml; charset=utf-8' );
192 $request->response()->header( 'X-Robots-Tag: noindex,nofollow' );
193
194 if ( $request->getCheck( 'wpDownload' ) ) {
195 // Provide a sane filename suggestion
196 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
197 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
198 }
199
200 $this->doExport( $page, $history, $list_authors, $exportall );
201
202 return;
203 }
204
205 $out = $this->getOutput();
206 $out->addWikiMsg( 'exporttext' );
207
208 if ( $page == '' ) {
209 $categoryName = $request->getText( 'catname' );
210 } else {
211 $categoryName = '';
212 }
213
214 $formDescriptor = [
215 'catname' => [
216 'type' => 'textwithbutton',
217 'name' => 'catname',
218 'horizontal-label' => true,
219 'label-message' => 'export-addcattext',
220 'default' => $categoryName,
221 'size' => 40,
222 'buttontype' => 'submit',
223 'buttonname' => 'addcat',
224 'buttondefault' => $this->msg( 'export-addcat' )->text(),
225 'hide-if' => [ '===', 'exportall', '1' ],
226 ],
227 ];
228 if ( $config->get( 'ExportFromNamespaces' ) ) {
229 $formDescriptor += [
230 'nsindex' => [
231 'type' => 'namespaceselectwithbutton',
232 'default' => $nsindex,
233 'label-message' => 'export-addnstext',
234 'horizontal-label' => true,
235 'name' => 'nsindex',
236 'id' => 'namespace',
237 'cssclass' => 'namespaceselector',
238 'buttontype' => 'submit',
239 'buttonname' => 'addns',
240 'buttondefault' => $this->msg( 'export-addns' )->text(),
241 'hide-if' => [ '===', 'exportall', '1' ],
242 ],
243 ];
244 }
245
246 if ( $config->get( 'ExportAllowAll' ) ) {
247 $formDescriptor += [
248 'exportall' => [
249 'type' => 'check',
250 'label-message' => 'exportall',
251 'name' => 'exportall',
252 'id' => 'exportall',
253 'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
254 ],
255 ];
256 }
257
258 $formDescriptor += [
259 'textarea' => [
260 'class' => HTMLTextAreaField::class,
261 'name' => 'pages',
262 'label-message' => 'export-manual',
263 'nodata' => true,
264 'rows' => 10,
265 'default' => $page,
266 'hide-if' => [ '===', 'exportall', '1' ],
267 ],
268 ];
269
270 if ( $config->get( 'ExportAllowHistory' ) ) {
271 $formDescriptor += [
272 'curonly' => [
273 'type' => 'check',
274 'label-message' => 'exportcuronly',
275 'name' => 'curonly',
276 'id' => 'curonly',
277 'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
278 ],
279 ];
280 } else {
281 $out->addWikiMsg( 'exportnohistory' );
282 }
283
284 $formDescriptor += [
285 'templates' => [
286 'type' => 'check',
287 'label-message' => 'export-templates',
288 'name' => 'templates',
289 'id' => 'wpExportTemplates',
290 'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
291 ],
292 ];
293
294 if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
295 $formDescriptor += [
296 'pagelink-depth' => [
297 'type' => 'text',
298 'name' => 'pagelink-depth',
299 'id' => 'pagelink-depth',
300 'label-message' => 'export-pagelinks',
301 'default' => '0',
302 'size' => 20,
303 ],
304 ];
305 }
306
307 $formDescriptor += [
308 'wpDownload' => [
309 'type' => 'check',
310 'name' => 'wpDownload',
311 'id' => 'wpDownload',
312 'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
313 'label-message' => 'export-download',
314 ],
315 ];
316
317 if ( $config->get( 'ExportAllowListContributors' ) ) {
318 $formDescriptor += [
319 'listauthors' => [
320 'type' => 'check',
321 'label-message' => 'exportlistauthors',
322 'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
323 'name' => 'listauthors',
324 'id' => 'listauthors',
325 ],
326 ];
327 }
328
329 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
330 $htmlForm->setSubmitTextMsg( 'export-submit' );
331 $htmlForm->prepareForm()->displayForm( false );
332 $this->addHelpLink( 'Help:Export' );
333 }
334
338 protected function userCanOverrideExportDepth() {
339 return $this->getAuthority()->isAllowed( 'override-export-depth' );
340 }
341
351 protected function doExport( $page, $history, $list_authors, $exportall ) {
352 // If we are grabbing everything, enable full history and ignore the rest
353 if ( $exportall ) {
354 $history = WikiExporter::FULL;
355 } else {
356 $pageSet = []; // Inverted index of all pages to look up
357
358 // Split up and normalize input
359 foreach ( explode( "\n", $page ) as $pageName ) {
360 $pageName = trim( $pageName );
361 $title = Title::newFromText( $pageName );
362 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
363 // Only record each page once!
364 $pageSet[$title->getPrefixedText()] = true;
365 }
366 }
367
368 // Set of original pages to pass on to further manipulation...
369 $inputPages = array_keys( $pageSet );
370
371 // Look up any linked pages if asked...
372 if ( $this->templates ) {
373 $pageSet = $this->getTemplates( $inputPages, $pageSet );
374 }
375 $linkDepth = $this->pageLinkDepth;
376 if ( $linkDepth ) {
377 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
378 }
379
380 $pages = array_keys( $pageSet );
381
382 // Normalize titles to the same format and remove dupes, see T19374
383 foreach ( $pages as $k => $v ) {
384 $pages[$k] = str_replace( ' ', '_', $v );
385 }
386
387 $pages = array_unique( $pages );
388 }
389
390 /* Ok, let's get to it... */
391 $db = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
392
393 $exporter = new WikiExporter( $db, $history );
394 $exporter->list_authors = $list_authors;
395 $exporter->openStream();
396
397 if ( $exportall ) {
398 $exporter->allPages();
399 } else {
400 foreach ( $pages as $page ) {
401 # T10824: Only export pages the user can read
402 $title = Title::newFromText( $page );
403 if ( $title === null ) {
404 // @todo Perhaps output an <error> tag or something.
405 continue;
406 }
407
408 if ( !$this->getAuthority()->authorizeRead( 'read', $title ) ) {
409 // @todo Perhaps output an <error> tag or something.
410 continue;
411 }
412
413 $exporter->pageByTitle( $title );
414 }
415 }
416
417 $exporter->closeStream();
418 }
419
424 protected function getPagesFromCategory( $title ) {
425 $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
426
427 $name = $title->getDBkey();
428
429 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
430 $res = $dbr->select(
431 [ 'page', 'categorylinks' ],
432 [ 'page_namespace', 'page_title' ],
433 [ 'cl_from=page_id', 'cl_to' => $name ],
434 __METHOD__,
435 [ 'LIMIT' => $maxPages ]
436 );
437
438 $pages = [];
439
440 foreach ( $res as $row ) {
441 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
442 }
443
444 return $pages;
445 }
446
451 protected function getPagesFromNamespace( $nsindex ) {
452 $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
453
454 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
455 $res = $dbr->select(
456 'page',
457 [ 'page_namespace', 'page_title' ],
458 [ 'page_namespace' => $nsindex ],
459 __METHOD__,
460 [ 'LIMIT' => $maxPages ]
461 );
462
463 $pages = [];
464
465 foreach ( $res as $row ) {
466 $pages[] = Title::makeName( $row->page_namespace, $row->page_title );
467 }
468
469 return $pages;
470 }
471
478 protected function getTemplates( $inputPages, $pageSet ) {
479 return $this->getLinks( $inputPages, $pageSet,
480 'templatelinks',
481 [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
482 [ 'page_id=tl_from' ]
483 );
484 }
485
491 protected function validateLinkDepth( $depth ) {
492 if ( $depth < 0 ) {
493 return 0;
494 }
495
496 if ( !$this->userCanOverrideExportDepth() ) {
497 $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
498 if ( $depth > $maxLinkDepth ) {
499 return $maxLinkDepth;
500 }
501 }
502
503 /*
504 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
505 * crazy-big export from being done by someone setting the depth
506 * number too high. In other words, last resort safety net.
507 */
508
509 return intval( min( $depth, 5 ) );
510 }
511
519 protected function getPageLinks( $inputPages, $pageSet, $depth ) {
520 for ( ; $depth > 0; --$depth ) {
521 $pageSet = $this->getLinks(
522 $inputPages, $pageSet, 'pagelinks',
523 [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
524 [ 'page_id=pl_from' ]
525 );
526 $inputPages = array_keys( $pageSet );
527 }
528
529 return $pageSet;
530 }
531
541 protected function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
542 $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA );
543
544 foreach ( $inputPages as $page ) {
545 $title = Title::newFromText( $page );
546
547 if ( $title ) {
548 $pageSet[$title->getPrefixedText()] = true;
551 $result = $dbr->select(
552 [ 'page', $table ],
553 $fields,
554 array_merge(
555 $join,
556 [
557 'page_namespace' => $title->getNamespace(),
558 'page_title' => $title->getDBkey()
559 ]
560 ),
561 __METHOD__
562 );
563
564 foreach ( $result as $row ) {
565 $template = Title::makeTitle( $row->namespace, $row->title );
566 $pageSet[$template->getPrefixedText()] = true;
567 }
568 }
569 }
570
571 return $pageSet;
572 }
573
574 protected function getGroupName() {
575 return 'pagetools';
576 }
577}
const NS_MAIN
Definition Defines.php:64
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfResetOutputBuffers( $resetGzipEncoding=true)
Clear away any user-level output buffers, discarding contents.
PSR-3 logger instance factory.
A special page that allows users to export pages in a XML file.
getGroupName()
Under which header this special page is listed in Special:SpecialPages See messages 'specialpages-gro...
doExport( $page, $history, $list_authors, $exportall)
Do the actual page exporting.
getPageLinks( $inputPages, $pageSet, $depth)
Expand a list of pages to include pages linked to from that page.
getLinks( $inputPages, $pageSet, $table, $fields, $join)
Expand a list of pages to include items used in those pages.
__construct(ILoadBalancer $loadBalancer)
getPagesFromNamespace( $nsindex)
execute( $par)
Default execute method Checks user permissions.
validateLinkDepth( $depth)
Validate link depth setting, if available.
getTemplates( $inputPages, $pageSet)
Expand a list of pages to include templates used in those pages.
getPagesFromCategory( $title)
ILoadBalancer $loadBalancer
Parent class for all special pages.
outputHeader( $summaryMessageKey='')
Outputs a summary message on top of special pages Per default the message key is the canonical name o...
setHeaders()
Sets headers - this should be called from the execute() method of all derived classes!
getOutput()
Get the OutputPage being used for this instance.
getContext()
Gets the context this SpecialPage is executed in.
msg( $key,... $params)
Wrapper around wfMessage that sets the current context.
getAuthority()
Shortcut to get the Authority executing this instance.
getConfig()
Shortcut to get main config object.
getRequest()
Get the WebRequest being used for this instance.
addHelpLink( $to, $overrideBaseUrl=false)
Adds help link with an icon via page indicators.
Database cluster connection, tracking, load balancing, and transaction manager interface.