61 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
62 public const STUB = XmlDumpWriter::WRITE_STUB;
125 $services = MediaWikiServices::getInstance();
126 $this->hookRunner =
new HookRunner( $services->getHookContainer() );
127 $this->revisionStore = $services->getRevisionStore();
128 $this->titleParser = $services->getTitleParser();
137 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
148 $this->sink =&
$sink;
152 $output = $this->writer->openStream();
153 $this->sink->writeOpenStream( $output );
157 $output = $this->writer->closeStream();
158 $this->sink->writeCloseStream( $output );
180 $condition =
'rev_page >= ' . intval( $start );
182 $condition .=
' AND rev_page < ' . intval( $end );
185 $condition =
'page_id >= ' . intval( $start );
187 $condition .=
' AND page_id < ' . intval( $end );
190 $this->
dumpFrom( $condition, $orderRevs );
201 $condition =
'rev_id >= ' . intval( $start );
203 $condition .=
' AND rev_id < ' . intval( $end );
214 ' AND page_title=' . $this->db->addQuotes( $page->
getDBkey() ) );
223 $link = $this->titleParser->parseTitle( $name );
225 'page_namespace=' . $link->getNamespace() .
226 ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) );
228 throw new MWException(
"Can't export invalid title" );
236 foreach ( $names as $name ) {
250 $condition =
'log_id >= ' . intval( $start );
252 $condition .=
' AND log_id < ' . intval( $end );
265 $this->author_list =
"<contributors>";
268 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
269 $res = $this->db->select(
272 'rev_user_text' =>
$revQuery[
'fields'][
'rev_user_text'],
273 'rev_user' =>
$revQuery[
'fields'][
'rev_user'],
276 $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0',
284 foreach (
$res as $row ) {
285 $this->author_list .=
"<contributor>" .
287 htmlspecialchars( $row->rev_user_text ) .
290 ( (int)$row->rev_user ) .
294 $this->author_list .=
"</contributors>";
303 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
304 if ( $this->history & self::LOGS ) {
318 $hideLogs = LogEventsList::getExcludeClause( $this->db );
320 $where[] = $hideLogs;
322 # Add on any caller specified conditions
328 $commentQuery = CommentStore::getStore()->getJoin(
'log_comment' );
330 $tables = array_merge(
331 [
'logging',
'actor' ], $commentQuery[
'tables']
334 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
335 'log_title',
'log_params',
'log_deleted',
'actor_user',
'actor_name'
336 ] + $commentQuery[
'fields'];
338 'ORDER BY' =>
'log_id',
339 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
343 'actor' => [
'JOIN',
'actor_id=log_actor' ]
344 ] + $commentQuery[
'joins'];
348 $result = $this->db->select(
351 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
357 if ( !$result->numRows() ) {
372 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
373 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
380 $tables = array_merge( [
'page' ], array_diff(
$revQuery[
'tables'], [
'page' ] ) );
381 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
383 'revision' =>
$revQuery[
'joins'][
'page'],
384 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
385 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
387 unset( $join[
'page'] );
389 $fields = array_merge(
$revQuery[
'fields'], $slotQuery[
'fields'] );
390 $fields[] =
'page_restrictions';
392 if ( $this->text != self::STUB ) {
393 $fields[
'_load_content'] =
'1';
397 if ( $cond !==
'' ) {
400 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
401 $opts[
'USE INDEX'] = [];
404 if ( is_array( $this->history ) ) {
405 # Time offset/limit for all pages/history...
407 if ( $this->history[
'dir'] ==
'asc' ) {
408 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
411 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
414 if ( !empty( $this->history[
'offset'] ) ) {
415 $conds[] =
"rev_timestamp $op " .
416 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
419 if ( !empty( $this->history[
'limit'] ) ) {
420 $maxRowCount = intval( $this->history[
'limit'] );
422 } elseif ( $this->history & self::FULL ) {
423 # Full history dumps...
424 # query optimization for history stub dumps
425 if ( $this->text == self::STUB ) {
426 $opts[] =
'STRAIGHT_JOIN';
427 $opts[
'USE INDEX'][
'revision'] =
'rev_page_id';
428 unset( $join[
'revision'] );
429 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
431 } elseif ( $this->history & self::CURRENT ) {
432 # Latest revision dumps...
433 if ( $this->list_authors && $cond !=
'' ) {
436 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
437 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
438 } elseif ( $this->history & self::STABLE ) {
439 # "Stable" revision dumps...
440 # Default JOIN, to be overridden...
441 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
442 # One, and only one hook should set this, and return false
443 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
444 throw new MWException( __METHOD__ .
" given invalid history dump type." );
446 } elseif ( $this->history & self::RANGE ) {
447 # Dump of revisions within a specified range. Condition already set in revsByRange().
449 # Unknown history specification parameter?
450 throw new MWException( __METHOD__ .
" given invalid history dump type." );
462 $this->hookRunner->onModifyExportQuery(
463 $this->db, $tables, $cond, $opts, $join, $conds );
467 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
468 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
472 $queryConds = $conds;
473 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
474 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
476 # Do the query and process any results, remembering max ids for the next iteration.
477 $result = $this->db->select(
485 if ( $result->numRows() > 0 ) {
487 $rowCount += $result->numRows();
488 $revPage = $lastRow->rev_page;
489 $revId = $lastRow->rev_id;
495 if ( $done && $lastRow ) {
521 $revRow = $slotRows[0];
523 if ( $this->limitNamespaces &&
524 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
529 if ( $lastRow ===
null ||
530 $lastRow->page_namespace !== $revRow->page_namespace ||
531 $lastRow->page_title !== $revRow->page_title ) {
532 if ( $lastRow !==
null ) {
534 if ( $this->dumpUploads ) {
535 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
537 $output .= $this->writer->closePage();
538 $this->sink->writeClosePage( $output );
540 $output = $this->writer->openPage( $revRow );
541 $this->sink->writeOpenPage( $revRow, $output );
544 $output = $this->writer->writeRevision( $revRow, $slotRows );
545 $this->sink->writeRevision( $revRow, $output );
547 MWDebug::warning(
'Problem encountered retrieving rev and slot metadata for'
548 .
' revision ' . $revRow->rev_id .
': ' . $ex->getMessage() );
554 throw new LogicException(
'Error while processing a stream of slot rows' );
574 $slotRows[] = $carry;
579 while ( $row = $results->fetchObject() ) {
580 if ( $prev && $prev->rev_id !== $row->rev_id ) {
598 if ( $this->dumpUploads ) {
599 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
602 $output .= $this->writer->closePage();
603 $this->sink->writeClosePage( $output );
611 foreach ( $resultset as $row ) {
612 $output = $this->writer->writeLogItem( $row );
613 $this->sink->writeLogItem( $row, $output );
615 return $row->log_id ??
null;
$wgXmlDumpSchemaVersion
The schema to use per default when generating XML dumps.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
RevisionStore $revisionStore
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
__construct( $db, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
pageByTitle(PageIdentity $page)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion.
bool $list_authors
Return distinct author list (when not returning full history)
Interface for objects (potentially) representing an editable wiki page.
A title parser service for MediaWiki.