47 public $list_authors =
false;
50 public $dumpUploads =
false;
53 public $dumpUploadFileContents =
false;
56 public $author_list =
"";
64 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
65 public const STUB = XmlDumpWriter::WRITE_STUB;
88 private $revisionStore;
97 private $commentStore;
104 return MediaWikiServices::getInstance()->getMainConfig()->get(
105 MainConfigNames::XmlDumpSchemaVersion );
128 $history = self::CURRENT,
130 $limitNamespaces =
null
133 $this->commentStore = $commentStore;
134 $this->history = $history;
137 self::schemaVersion(),
144 $this->hookRunner =
new HookRunner( $hookContainer );
145 $this->revisionStore = $revisionStore;
146 $this->titleParser = $titleParser;
155 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
166 $this->sink =& $sink;
170 $output = $this->writer->openStream();
171 $this->sink->writeOpenStream( $output );
175 $output = $this->writer->closeStream();
176 $this->sink->writeCloseStream( $output );
198 $condition =
'rev_page >= ' . intval( $start );
200 $condition .=
' AND rev_page < ' . intval( $end );
203 $condition =
'page_id >= ' . intval( $start );
205 $condition .=
' AND page_id < ' . intval( $end );
208 $this->
dumpFrom( $condition, $orderRevs );
219 $condition =
'rev_id >= ' . intval( $start );
221 $condition .=
' AND rev_id < ' . intval( $end );
232 ' AND page_title=' . $this->db->addQuotes( $page->
getDBkey() ) );
241 $link = $this->titleParser->parseTitle( $name );
243 'page_namespace=' . $link->getNamespace() .
244 ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) );
246 throw new MWException(
"Can't export invalid title" );
254 foreach ( $names as $name ) {
268 $condition =
'log_id >= ' . intval( $start );
270 $condition .=
' AND log_id < ' . intval( $end );
283 $this->author_list =
"<contributors>";
286 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
287 $res = $this->db->newSelectQueryBuilder()
289 'rev_user_text' =>
$revQuery[
'fields'][
'rev_user_text'],
290 'rev_user' =>
$revQuery[
'fields'][
'rev_user'],
294 $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0',
299 ->caller( __METHOD__ )
302 foreach (
$res as $row ) {
303 $this->author_list .=
"<contributor>" .
305 htmlspecialchars( $row->rev_user_text ) .
308 ( (int)$row->rev_user ) .
312 $this->author_list .=
"</contributors>";
321 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
322 if ( is_int( $this->history ) && ( $this->history & self::LOGS ) ) {
336 $hideLogs = LogEventsList::getExcludeClause( $this->db );
338 $where[] = $hideLogs;
340 # Add on any caller specified conditions
345 $commentQuery = $this->commentStore->getJoin(
'log_comment' );
347 $tables = array_merge(
348 [
'logging',
'actor' ], $commentQuery[
'tables']
351 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
352 'log_title',
'log_params',
'log_deleted',
'actor_user',
'actor_name'
353 ] + $commentQuery[
'fields'];
355 'ORDER BY' =>
'log_id',
356 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
357 'LIMIT' => self::BATCH_SIZE,
360 'actor' => [
'JOIN',
'actor_id=log_actor' ]
361 ] + $commentQuery[
'joins'];
365 $result = $this->db->select(
368 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
374 if ( !$result->numRows() ) {
379 $this->reloadDBConfig();
390 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
391 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
398 $tables = array_merge( [
'page' ], array_diff(
$revQuery[
'tables'], [
'page' ] ) );
399 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
401 'revision' =>
$revQuery[
'joins'][
'page'],
402 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
403 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
405 unset( $join[
'page'] );
407 $fields = array_merge(
$revQuery[
'fields'], $slotQuery[
'fields'] );
409 if ( $this->text != self::STUB ) {
410 $fields[
'_load_content'] =
'1';
414 if ( $cond !==
'' ) {
417 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
418 $opts[
'USE INDEX'] = [];
421 if ( is_array( $this->history ) ) {
422 # Time offset/limit for all pages/history...
424 if ( $this->history[
'dir'] ==
'asc' ) {
425 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
428 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
431 if ( !empty( $this->history[
'offset'] ) ) {
432 $conds[] =
"rev_timestamp $op " .
433 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
436 if ( !empty( $this->history[
'limit'] ) ) {
437 $maxRowCount = intval( $this->history[
'limit'] );
439 } elseif ( $this->history & self::FULL ) {
440 # Full history dumps...
441 # query optimization for history stub dumps
442 if ( $this->text == self::STUB ) {
443 $opts[] =
'STRAIGHT_JOIN';
444 unset( $join[
'revision'] );
445 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
447 } elseif ( $this->history & self::CURRENT ) {
448 # Latest revision dumps...
449 if ( $this->list_authors && $cond !=
'' ) {
452 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
453 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
454 } elseif ( $this->history & self::STABLE ) {
455 # "Stable" revision dumps...
456 # Default JOIN, to be overridden...
457 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
458 # One, and only one hook should set this, and return false
459 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
460 throw new MWException( __METHOD__ .
" given invalid history dump type." );
462 } elseif ( $this->history & self::RANGE ) {
463 # Dump of revisions within a specified range. Condition already set in revsByRange().
465 # Unknown history specification parameter?
466 throw new MWException( __METHOD__ .
" given invalid history dump type." );
475 $opts[
'LIMIT'] = self::BATCH_SIZE;
477 $this->hookRunner->onModifyExportQuery(
478 $this->db, $tables, $cond, $opts, $join, $conds );
482 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
483 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
487 $queryConds = $conds;
488 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
489 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
491 # Do the query and process any results, remembering max ids for the next iteration.
492 $result = $this->db->select(
500 if ( $result->numRows() > 0 ) {
502 $rowCount += $result->numRows();
503 $revPage = $lastRow->rev_page;
504 $revId = $lastRow->rev_id;
510 if ( $done && $lastRow ) {
515 $this->reloadDBConfig();
540 $revRow = $slotRows[0];
542 if ( $this->limitNamespaces &&
543 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
548 if ( $lastRow ===
null ||
549 $lastRow->page_namespace !== $revRow->page_namespace ||
550 $lastRow->page_title !== $revRow->page_title ) {
551 if ( $lastRow !==
null ) {
553 if ( $this->dumpUploads ) {
554 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
556 $output .= $this->writer->closePage();
557 $this->sink->writeClosePage( $output );
559 $output = $this->writer->openPage( $revRow );
560 $this->sink->writeOpenPage( $revRow, $output );
563 $output = $this->writer->writeRevision( $revRow, $slotRows );
564 $this->sink->writeRevision( $revRow, $output );
566 MWDebug::warning(
'Problem encountered retrieving rev and slot metadata for'
567 .
' revision ' . $revRow->rev_id .
': ' . $ex->getMessage() );
573 throw new LogicException(
'Error while processing a stream of slot rows' );
594 $slotRows[] = $carry;
599 while ( $row = $results->fetchObject() ) {
600 if ( $prev && $prev->rev_id !== $row->rev_id ) {
618 if ( $this->dumpUploads ) {
619 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
621 $output .= $this->author_list;
622 $output .= $this->writer->closePage();
623 $this->sink->writeClosePage( $output );
631 foreach ( $resultset as $row ) {
632 $output = $this->writer->writeLogItem( $row );
633 $this->sink->writeLogItem( $row, $output );
635 return $row->log_id ??
null;
644 private function reloadDBConfig() {
645 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()
A class containing constants representing the names of configuration variables.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
__construct( $db, CommentStore $commentStore, HookContainer $hookContainer, RevisionStore $revisionStore, TitleParser $titleParser, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
pageByTitle(PageIdentity $page)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting.
bool $list_authors
Return distinct author list (when not returning full history)
Interface for objects (potentially) representing an editable wiki page.
A title parser service for MediaWiki.