49 public $list_authors =
false;
52 public $dumpUploads =
false;
55 public $dumpUploadFileContents =
false;
58 public $author_list =
"";
66 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
67 public const STUB = XmlDumpWriter::WRITE_STUB;
90 private $revisionStore;
99 private $commentStore;
106 return MediaWikiServices::getInstance()->getMainConfig()->get(
107 MainConfigNames::XmlDumpSchemaVersion );
130 $history = self::CURRENT,
132 $limitNamespaces =
null
135 $this->commentStore = $commentStore;
136 $this->history = $history;
139 self::schemaVersion(),
146 $this->hookRunner =
new HookRunner( $hookContainer );
147 $this->revisionStore = $revisionStore;
148 $this->titleParser = $titleParser;
157 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
168 $this->sink =& $sink;
172 $output = $this->writer->openStream();
173 $this->sink->writeOpenStream( $output );
177 $output = $this->writer->closeStream();
178 $this->sink->writeCloseStream( $output );
200 $condition =
'rev_page >= ' . intval( $start );
202 $condition .=
' AND rev_page < ' . intval( $end );
205 $condition =
'page_id >= ' . intval( $start );
207 $condition .=
' AND page_id < ' . intval( $end );
210 $this->
dumpFrom( $condition, $orderRevs );
221 $condition =
'rev_id >= ' . intval( $start );
223 $condition .=
' AND rev_id < ' . intval( $end );
234 ' AND page_title=' . $this->db->addQuotes( $page->
getDBkey() ) );
242 $link = $this->titleParser->parseTitle( $name );
244 'page_namespace=' . $link->getNamespace() .
245 ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) );
247 throw new RuntimeException(
"Can't export invalid title" );
255 foreach ( $names as $name ) {
269 $condition =
'log_id >= ' . intval( $start );
271 $condition .=
' AND log_id < ' . intval( $end );
284 $this->author_list =
"<contributors>";
287 $res = $this->revisionStore->newSelectQueryBuilder( $this->db )
290 ->where( $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0' )
292 ->caller( __METHOD__ )->fetchResultSet();
294 foreach ( $res as $row ) {
295 $this->author_list .=
"<contributor>" .
297 htmlspecialchars( $row->rev_user_text ) .
300 ( (int)$row->rev_user ) .
304 $this->author_list .=
"</contributors>";
311 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
312 if ( is_int( $this->history ) && ( $this->history & self::LOGS ) ) {
325 $hideLogs = LogEventsList::getExcludeClause( $this->db );
327 $where[] = $hideLogs;
329 # Add on any caller specified conditions
334 $commentQuery = $this->commentStore->getJoin(
'log_comment' );
336 $tables = array_merge(
337 [
'logging',
'actor' ], $commentQuery[
'tables']
340 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
341 'log_title',
'log_params',
'log_deleted',
'actor_user',
'actor_name'
342 ] + $commentQuery[
'fields'];
344 'ORDER BY' =>
'log_id',
345 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
346 'LIMIT' => self::BATCH_SIZE,
349 'actor' => [
'JOIN',
'actor_id=log_actor' ]
350 ] + $commentQuery[
'joins'];
354 $result = $this->db->select(
357 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
363 if ( !$result->numRows() ) {
368 $this->reloadDBConfig();
377 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
378 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
385 $tables = array_merge( [
'page' ], array_diff( $revQuery[
'tables'], [
'page' ] ) );
386 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
387 $join = $revQuery[
'joins'] + [
388 'revision' => $revQuery[
'joins'][
'page'],
389 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
390 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
392 unset( $join[
'page'] );
394 $fields = array_merge( $revQuery[
'fields'], $slotQuery[
'fields'] );
396 if ( $this->text != self::STUB ) {
397 $fields[
'_load_content'] =
'1';
401 if ( $cond !==
'' ) {
404 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
405 $opts[
'USE INDEX'] = [];
408 if ( is_array( $this->history ) ) {
409 # Time offset/limit for all pages/history...
411 if ( $this->history[
'dir'] ==
'asc' ) {
412 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
415 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
418 if ( !empty( $this->history[
'offset'] ) ) {
419 $conds[] =
"rev_timestamp $op " .
420 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
423 if ( !empty( $this->history[
'limit'] ) ) {
424 $maxRowCount = intval( $this->history[
'limit'] );
426 } elseif ( $this->history & self::FULL ) {
427 # Full history dumps...
428 # query optimization for history stub dumps
429 if ( $this->text == self::STUB ) {
430 $opts[] =
'STRAIGHT_JOIN';
431 unset( $join[
'revision'] );
432 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
434 } elseif ( $this->history & self::CURRENT ) {
435 # Latest revision dumps...
436 if ( $this->list_authors && $cond !=
'' ) {
439 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
440 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
441 } elseif ( $this->history & self::STABLE ) {
442 # "Stable" revision dumps...
443 # Default JOIN, to be overridden...
444 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
445 # One, and only one hook should set this, and return false
446 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
447 throw new LogicException( __METHOD__ .
" given invalid history dump type." );
449 } elseif ( $this->history & self::RANGE ) {
450 # Dump of revisions within a specified range. Condition already set in revsByRange().
452 # Unknown history specification parameter?
453 throw new UnexpectedValueException( __METHOD__ .
" given invalid history dump type." );
462 $opts[
'LIMIT'] = self::BATCH_SIZE;
464 $this->hookRunner->onModifyExportQuery(
465 $this->db, $tables, $cond, $opts, $join, $conds );
469 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
470 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
474 $queryConds = $conds;
475 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
476 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
478 # Do the query and process any results, remembering max ids for the next iteration.
479 $result = $this->db->select(
487 if ( $result->numRows() > 0 ) {
489 $rowCount += $result->numRows();
490 $revPage = $lastRow->rev_page;
491 $revId = $lastRow->rev_id;
497 if ( $done && $lastRow ) {
502 $this->reloadDBConfig();
527 $revRow = $slotRows[0];
529 if ( $this->limitNamespaces &&
530 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
535 if ( $lastRow ===
null ||
536 $lastRow->page_namespace !== $revRow->page_namespace ||
537 $lastRow->page_title !== $revRow->page_title ) {
538 if ( $lastRow !==
null ) {
540 if ( $this->dumpUploads ) {
541 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
543 $output .= $this->writer->closePage();
544 $this->sink->writeClosePage( $output );
546 $output = $this->writer->openPage( $revRow );
547 $this->sink->writeOpenPage( $revRow, $output );
550 $output = $this->writer->writeRevision( $revRow, $slotRows );
551 $this->sink->writeRevision( $revRow, $output );
553 MWDebug::warning(
'Problem encountered retrieving rev and slot metadata for'
554 .
' revision ' . $revRow->rev_id .
': ' . $ex->getMessage() );
560 throw new LogicException(
'Error while processing a stream of slot rows' );
581 $slotRows[] = $carry;
586 while ( $row = $results->fetchObject() ) {
587 if ( $prev && $prev->rev_id !== $row->rev_id ) {
605 if ( $this->dumpUploads ) {
606 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
608 $output .= $this->author_list;
609 $output .= $this->writer->closePage();
610 $this->sink->writeClosePage( $output );
618 foreach ( $resultset as $row ) {
619 $output = $this->writer->writeLogItem( $row );
620 $this->sink->writeLogItem( $row, $output );
622 return $row->log_id ??
null;
631 private function reloadDBConfig() {
632 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()
A class containing constants representing the names of configuration variables.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
__construct( $db, CommentStore $commentStore, HookContainer $hookContainer, RevisionStore $revisionStore, TitleParser $titleParser, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
pageByTitle(PageIdentity $page)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
bool $list_authors
Return distinct author list (when not returning full history)
Interface for objects (potentially) representing an editable wiki page.