49 public $list_authors =
false;
52 public $dumpUploads =
false;
55 public $dumpUploadFileContents =
false;
58 public $author_list =
"";
66 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
67 public const STUB = XmlDumpWriter::WRITE_STUB;
90 private $revisionStore;
99 private $commentStore;
106 return MediaWikiServices::getInstance()->getMainConfig()->get(
107 MainConfigNames::XmlDumpSchemaVersion );
130 $history = self::CURRENT,
132 $limitNamespaces =
null
135 $this->commentStore = $commentStore;
136 $this->history = $history;
139 self::schemaVersion(),
146 $this->hookRunner =
new HookRunner( $hookContainer );
147 $this->revisionStore = $revisionStore;
148 $this->titleParser = $titleParser;
157 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
168 $this->sink =& $sink;
172 $output = $this->writer->openStream();
173 $this->sink->writeOpenStream( $output );
177 $output = $this->writer->closeStream();
178 $this->sink->writeCloseStream( $output );
200 $condition =
'rev_page >= ' . intval( $start );
202 $condition .=
' AND rev_page < ' . intval( $end );
205 $condition =
'page_id >= ' . intval( $start );
207 $condition .=
' AND page_id < ' . intval( $end );
210 $this->
dumpFrom( $condition, $orderRevs );
221 $condition =
'rev_id >= ' . intval( $start );
223 $condition .=
' AND rev_id < ' . intval( $end );
234 ' AND page_title=' . $this->db->addQuotes( $page->
getDBkey() ) );
243 $link = $this->titleParser->parseTitle( $name );
245 'page_namespace=' . $link->getNamespace() .
246 ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) );
248 throw new MWException(
"Can't export invalid title" );
256 foreach ( $names as $name ) {
270 $condition =
'log_id >= ' . intval( $start );
272 $condition .=
' AND log_id < ' . intval( $end );
285 $this->author_list =
"<contributors>";
288 $res = $this->revisionStore->newSelectQueryBuilder( $this->db )
291 ->where( $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0' )
293 ->caller( __METHOD__ )->fetchResultSet();
295 foreach ( $res as $row ) {
296 $this->author_list .=
"<contributor>" .
298 htmlspecialchars( $row->rev_user_text ) .
301 ( (int)$row->rev_user ) .
305 $this->author_list .=
"</contributors>";
314 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
315 if ( is_int( $this->history ) && ( $this->history & self::LOGS ) ) {
329 $hideLogs = LogEventsList::getExcludeClause( $this->db );
331 $where[] = $hideLogs;
333 # Add on any caller specified conditions
338 $commentQuery = $this->commentStore->getJoin(
'log_comment' );
340 $tables = array_merge(
341 [
'logging',
'actor' ], $commentQuery[
'tables']
344 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
345 'log_title',
'log_params',
'log_deleted',
'actor_user',
'actor_name'
346 ] + $commentQuery[
'fields'];
348 'ORDER BY' =>
'log_id',
349 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
350 'LIMIT' => self::BATCH_SIZE,
353 'actor' => [
'JOIN',
'actor_id=log_actor' ]
354 ] + $commentQuery[
'joins'];
358 $result = $this->db->select(
361 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
367 if ( !$result->numRows() ) {
372 $this->reloadDBConfig();
383 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
384 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
391 $tables = array_merge( [
'page' ], array_diff( $revQuery[
'tables'], [
'page' ] ) );
392 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
393 $join = $revQuery[
'joins'] + [
394 'revision' => $revQuery[
'joins'][
'page'],
395 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
396 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
398 unset( $join[
'page'] );
400 $fields = array_merge( $revQuery[
'fields'], $slotQuery[
'fields'] );
402 if ( $this->text != self::STUB ) {
403 $fields[
'_load_content'] =
'1';
407 if ( $cond !==
'' ) {
410 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
411 $opts[
'USE INDEX'] = [];
414 if ( is_array( $this->history ) ) {
415 # Time offset/limit for all pages/history...
417 if ( $this->history[
'dir'] ==
'asc' ) {
418 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
421 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
424 if ( !empty( $this->history[
'offset'] ) ) {
425 $conds[] =
"rev_timestamp $op " .
426 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
429 if ( !empty( $this->history[
'limit'] ) ) {
430 $maxRowCount = intval( $this->history[
'limit'] );
432 } elseif ( $this->history & self::FULL ) {
433 # Full history dumps...
434 # query optimization for history stub dumps
435 if ( $this->text == self::STUB ) {
436 $opts[] =
'STRAIGHT_JOIN';
437 unset( $join[
'revision'] );
438 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
440 } elseif ( $this->history & self::CURRENT ) {
441 # Latest revision dumps...
442 if ( $this->list_authors && $cond !=
'' ) {
445 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
446 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
447 } elseif ( $this->history & self::STABLE ) {
448 # "Stable" revision dumps...
449 # Default JOIN, to be overridden...
450 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
451 # One, and only one hook should set this, and return false
452 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
453 throw new MWException( __METHOD__ .
" given invalid history dump type." );
455 } elseif ( $this->history & self::RANGE ) {
456 # Dump of revisions within a specified range. Condition already set in revsByRange().
458 # Unknown history specification parameter?
459 throw new MWException( __METHOD__ .
" given invalid history dump type." );
468 $opts[
'LIMIT'] = self::BATCH_SIZE;
470 $this->hookRunner->onModifyExportQuery(
471 $this->db, $tables, $cond, $opts, $join, $conds );
475 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
476 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
480 $queryConds = $conds;
481 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
482 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
484 # Do the query and process any results, remembering max ids for the next iteration.
485 $result = $this->db->select(
493 if ( $result->numRows() > 0 ) {
495 $rowCount += $result->numRows();
496 $revPage = $lastRow->rev_page;
497 $revId = $lastRow->rev_id;
503 if ( $done && $lastRow ) {
508 $this->reloadDBConfig();
533 $revRow = $slotRows[0];
535 if ( $this->limitNamespaces &&
536 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
541 if ( $lastRow ===
null ||
542 $lastRow->page_namespace !== $revRow->page_namespace ||
543 $lastRow->page_title !== $revRow->page_title ) {
544 if ( $lastRow !==
null ) {
546 if ( $this->dumpUploads ) {
547 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
549 $output .= $this->writer->closePage();
550 $this->sink->writeClosePage( $output );
552 $output = $this->writer->openPage( $revRow );
553 $this->sink->writeOpenPage( $revRow, $output );
556 $output = $this->writer->writeRevision( $revRow, $slotRows );
557 $this->sink->writeRevision( $revRow, $output );
559 MWDebug::warning(
'Problem encountered retrieving rev and slot metadata for'
560 .
' revision ' . $revRow->rev_id .
': ' . $ex->getMessage() );
566 throw new LogicException(
'Error while processing a stream of slot rows' );
587 $slotRows[] = $carry;
592 while ( $row = $results->fetchObject() ) {
593 if ( $prev && $prev->rev_id !== $row->rev_id ) {
611 if ( $this->dumpUploads ) {
612 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
614 $output .= $this->author_list;
615 $output .= $this->writer->closePage();
616 $this->sink->writeClosePage( $output );
624 foreach ( $resultset as $row ) {
625 $output = $this->writer->writeLogItem( $row );
626 $this->sink->writeLogItem( $row, $output );
628 return $row->log_id ??
null;
637 private function reloadDBConfig() {
638 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()
A class containing constants representing the names of configuration variables.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
__construct( $db, CommentStore $commentStore, HookContainer $hookContainer, RevisionStore $revisionStore, TitleParser $titleParser, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
pageByTitle(PageIdentity $page)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
bool $list_authors
Return distinct author list (when not returning full history)
Interface for objects (potentially) representing an editable wiki page.