46 public $list_authors =
false;
49 public $dumpUploads =
false;
52 public $dumpUploadFileContents =
false;
55 public $author_list =
"";
63 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
64 public const STUB = XmlDumpWriter::WRITE_STUB;
87 private $revisionStore;
100 return MediaWikiServices::getInstance()->getMainConfig()->get(
101 MainConfigNames::XmlDumpSchemaVersion );
122 $history = self::CURRENT,
124 $limitNamespaces =
null
127 $this->history = $history;
130 $this->writer =
new XmlDumpWriter( $text, self::schemaVersion() );
134 $this->hookRunner =
new HookRunner( $hookContainer );
135 $this->revisionStore = $revisionStore;
136 $this->titleParser = $titleParser;
145 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
156 $this->sink =& $sink;
160 $output = $this->writer->openStream();
161 $this->sink->writeOpenStream( $output );
165 $output = $this->writer->closeStream();
166 $this->sink->writeCloseStream( $output );
188 $condition =
'rev_page >= ' . intval( $start );
190 $condition .=
' AND rev_page < ' . intval( $end );
193 $condition =
'page_id >= ' . intval( $start );
195 $condition .=
' AND page_id < ' . intval( $end );
198 $this->
dumpFrom( $condition, $orderRevs );
209 $condition =
'rev_id >= ' . intval( $start );
211 $condition .=
' AND rev_id < ' . intval( $end );
222 ' AND page_title=' . $this->db->addQuotes( $page->
getDBkey() ) );
231 $link = $this->titleParser->parseTitle( $name );
233 'page_namespace=' . $link->getNamespace() .
234 ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) );
236 throw new MWException(
"Can't export invalid title" );
244 foreach ( $names as $name ) {
258 $condition =
'log_id >= ' . intval( $start );
260 $condition .=
' AND log_id < ' . intval( $end );
273 $this->author_list =
"<contributors>";
276 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
277 $res = $this->db->select(
280 'rev_user_text' =>
$revQuery[
'fields'][
'rev_user_text'],
281 'rev_user' =>
$revQuery[
'fields'][
'rev_user'],
284 $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0',
292 foreach (
$res as $row ) {
293 $this->author_list .=
"<contributor>" .
295 htmlspecialchars( $row->rev_user_text ) .
298 ( (int)$row->rev_user ) .
302 $this->author_list .=
"</contributors>";
311 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
312 if ( is_int( $this->history ) && ( $this->history & self::LOGS ) ) {
326 $hideLogs = LogEventsList::getExcludeClause( $this->db );
328 $where[] = $hideLogs;
330 # Add on any caller specified conditions
336 $commentQuery = CommentStore::getStore()->getJoin(
'log_comment' );
338 $tables = array_merge(
339 [
'logging',
'actor' ], $commentQuery[
'tables']
342 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
343 'log_title',
'log_params',
'log_deleted',
'actor_user',
'actor_name'
344 ] + $commentQuery[
'fields'];
346 'ORDER BY' =>
'log_id',
347 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
348 'LIMIT' => self::BATCH_SIZE,
351 'actor' => [
'JOIN',
'actor_id=log_actor' ]
352 ] + $commentQuery[
'joins'];
356 $result = $this->db->select(
359 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
365 if ( !$result->numRows() ) {
370 $this->reloadDBConfig();
381 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
382 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
389 $tables = array_merge( [
'page' ], array_diff(
$revQuery[
'tables'], [
'page' ] ) );
390 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
392 'revision' =>
$revQuery[
'joins'][
'page'],
393 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
394 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
396 unset( $join[
'page'] );
398 $fields = array_merge(
$revQuery[
'fields'], $slotQuery[
'fields'] );
400 if ( $this->text != self::STUB ) {
401 $fields[
'_load_content'] =
'1';
405 if ( $cond !==
'' ) {
408 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
409 $opts[
'USE INDEX'] = [];
412 if ( is_array( $this->history ) ) {
413 # Time offset/limit for all pages/history...
415 if ( $this->history[
'dir'] ==
'asc' ) {
416 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
419 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
422 if ( !empty( $this->history[
'offset'] ) ) {
423 $conds[] =
"rev_timestamp $op " .
424 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
427 if ( !empty( $this->history[
'limit'] ) ) {
428 $maxRowCount = intval( $this->history[
'limit'] );
430 } elseif ( $this->history & self::FULL ) {
431 # Full history dumps...
432 # query optimization for history stub dumps
433 if ( $this->text == self::STUB ) {
434 $opts[] =
'STRAIGHT_JOIN';
435 unset( $join[
'revision'] );
436 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
438 } elseif ( $this->history & self::CURRENT ) {
439 # Latest revision dumps...
440 if ( $this->list_authors && $cond !=
'' ) {
443 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
444 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
445 } elseif ( $this->history & self::STABLE ) {
446 # "Stable" revision dumps...
447 # Default JOIN, to be overridden...
448 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
449 # One, and only one hook should set this, and return false
450 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
451 throw new MWException( __METHOD__ .
" given invalid history dump type." );
453 } elseif ( $this->history & self::RANGE ) {
454 # Dump of revisions within a specified range. Condition already set in revsByRange().
456 # Unknown history specification parameter?
457 throw new MWException( __METHOD__ .
" given invalid history dump type." );
467 $opts[
'LIMIT'] = self::BATCH_SIZE;
469 $this->hookRunner->onModifyExportQuery(
470 $this->db, $tables, $cond, $opts, $join, $conds );
474 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
475 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
479 $queryConds = $conds;
480 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
481 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
483 # Do the query and process any results, remembering max ids for the next iteration.
484 $result = $this->db->select(
492 if ( $result->numRows() > 0 ) {
494 $rowCount += $result->numRows();
495 $revPage = $lastRow->rev_page;
496 $revId = $lastRow->rev_id;
502 if ( $done && $lastRow ) {
507 $this->reloadDBConfig();
532 $revRow = $slotRows[0];
534 if ( $this->limitNamespaces &&
535 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
540 if ( $lastRow ===
null ||
541 $lastRow->page_namespace !== $revRow->page_namespace ||
542 $lastRow->page_title !== $revRow->page_title ) {
543 if ( $lastRow !==
null ) {
545 if ( $this->dumpUploads ) {
546 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
548 $output .= $this->writer->closePage();
549 $this->sink->writeClosePage( $output );
551 $output = $this->writer->openPage( $revRow );
552 $this->sink->writeOpenPage( $revRow, $output );
555 $output = $this->writer->writeRevision( $revRow, $slotRows );
556 $this->sink->writeRevision( $revRow, $output );
558 MWDebug::warning(
'Problem encountered retrieving rev and slot metadata for'
559 .
' revision ' . $revRow->rev_id .
': ' . $ex->getMessage() );
565 throw new LogicException(
'Error while processing a stream of slot rows' );
586 $slotRows[] = $carry;
591 while ( $row = $results->fetchObject() ) {
592 if ( $prev && $prev->rev_id !== $row->rev_id ) {
610 if ( $this->dumpUploads ) {
611 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
613 $output .= $this->author_list;
614 $output .= $this->writer->closePage();
615 $this->sink->writeClosePage( $output );
623 foreach ( $resultset as $row ) {
624 $output = $this->writer->writeLogItem( $row );
625 $this->sink->writeLogItem( $row, $output );
627 return $row->log_id ??
null;
636 private function reloadDBConfig() {
637 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()
A class containing constants representing the names of configuration variables.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
__construct( $db, HookContainer $hookContainer, RevisionStore $revisionStore, TitleParser $titleParser, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
pageByTitle(PageIdentity $page)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
bool $list_authors
Return distinct author list (when not returning full history)
Interface for objects (potentially) representing an editable wiki page.
A title parser service for MediaWiki.