59 public const TEXT = XmlDumpWriter::WRITE_CONTENT;
60 public const STUB = XmlDumpWriter::WRITE_STUB;
120 $services = MediaWikiServices::getInstance();
121 $this->hookRunner =
new HookRunner( $services->getHookContainer() );
122 $this->revisionStore = $services->getRevisionStore();
131 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
142 $this->sink =&
$sink;
146 $output = $this->writer->openStream();
147 $this->sink->writeOpenStream( $output );
151 $output = $this->writer->closeStream();
152 $this->sink->writeCloseStream( $output );
174 $condition =
'rev_page >= ' . intval( $start );
176 $condition .=
' AND rev_page < ' . intval( $end );
179 $condition =
'page_id >= ' . intval( $start );
181 $condition .=
' AND page_id < ' . intval( $end );
184 $this->
dumpFrom( $condition, $orderRevs );
195 $condition =
'rev_id >= ' . intval( $start );
197 $condition .=
' AND rev_id < ' . intval( $end );
207 'page_namespace=' .
$title->getNamespace() .
208 ' AND page_title=' . $this->db->addQuotes(
$title->getDBkey() ) );
216 $title = Title::newFromText( $name );
218 throw new MWException(
"Can't export invalid title" );
228 foreach ( $names as $name ) {
242 $condition =
'log_id >= ' . intval( $start );
244 $condition .=
' AND log_id < ' . intval( $end );
257 $this->author_list =
"<contributors>";
260 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
261 $res = $this->db->select(
264 'rev_user_text' =>
$revQuery[
'fields'][
'rev_user_text'],
265 'rev_user' =>
$revQuery[
'fields'][
'rev_user'],
268 $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0',
276 foreach (
$res as $row ) {
277 $this->author_list .=
"<contributor>" .
279 htmlspecialchars( $row->rev_user_text ) .
282 ( (int)$row->rev_user ) .
286 $this->author_list .=
"</contributors>";
295 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
296 if ( $this->history & self::LOGS ) {
310 $hideLogs = LogEventsList::getExcludeClause( $this->db );
312 $where[] = $hideLogs;
314 # Add on any caller specified conditions
320 $commentQuery = CommentStore::getStore()->getJoin(
'log_comment' );
321 $actorQuery = ActorMigration::newMigration()->getJoin(
'log_user' );
323 $tables = array_merge(
324 [
'logging' ], $commentQuery[
'tables'], $actorQuery[
'tables'], [
'user' ]
327 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
328 'log_title',
'log_params',
'log_deleted',
'user_name'
329 ] + $commentQuery[
'fields'] + $actorQuery[
'fields'];
331 'ORDER BY' =>
'log_id',
332 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
336 'user' => [
'JOIN',
'user_id = ' . $actorQuery[
'fields'][
'log_user'] ]
337 ] + $commentQuery[
'joins'] + $actorQuery[
'joins'];
341 $result = $this->db->select(
344 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
350 if ( !$result->numRows() ) {
365 $revQuery = $this->revisionStore->getQueryInfo( [
'page' ] );
366 $slotQuery = $this->revisionStore->getSlotsQueryInfo( [
'content' ] );
373 $tables = array_merge( [
'page' ], array_diff(
$revQuery[
'tables'], [
'page' ] ) );
374 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
376 'revision' =>
$revQuery[
'joins'][
'page'],
377 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
378 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
380 unset( $join[
'page'] );
382 $fields = array_merge(
$revQuery[
'fields'], $slotQuery[
'fields'] );
383 $fields[] =
'page_restrictions';
385 if ( $this->text != self::STUB ) {
386 $fields[
'_load_content'] =
'1';
390 if ( $cond !==
'' ) {
393 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
394 $opts[
'USE INDEX'] = [];
397 if ( is_array( $this->history ) ) {
398 # Time offset/limit for all pages/history...
400 if ( $this->history[
'dir'] ==
'asc' ) {
401 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
404 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
407 if ( !empty( $this->history[
'offset'] ) ) {
408 $conds[] =
"rev_timestamp $op " .
409 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
412 if ( !empty( $this->history[
'limit'] ) ) {
413 $maxRowCount = intval( $this->history[
'limit'] );
415 } elseif ( $this->history & self::FULL ) {
416 # Full history dumps...
417 # query optimization for history stub dumps
418 if ( $this->text == self::STUB ) {
419 $opts[] =
'STRAIGHT_JOIN';
420 $opts[
'USE INDEX'][
'revision'] =
'rev_page_id';
421 unset( $join[
'revision'] );
422 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
424 } elseif ( $this->history & self::CURRENT ) {
425 # Latest revision dumps...
426 if ( $this->list_authors && $cond !=
'' ) {
429 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
430 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
431 } elseif ( $this->history & self::STABLE ) {
432 # "Stable" revision dumps...
433 # Default JOIN, to be overridden...
434 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
435 # One, and only one hook should set this, and return false
436 if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) {
437 throw new MWException( __METHOD__ .
" given invalid history dump type." );
439 } elseif ( $this->history & self::RANGE ) {
440 # Dump of revisions within a specified range. Condition already set in revsByRange().
442 # Unknown history specification parameter?
443 throw new MWException( __METHOD__ .
" given invalid history dump type." );
455 $this->hookRunner->onModifyExportQuery(
456 $this->db, $tables, $cond, $opts, $join, $conds );
460 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
461 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
465 $queryConds = $conds;
466 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
467 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
469 # Do the query and process any results, remembering max ids for the next iteration.
470 $result = $this->db->select(
478 if ( $result->numRows() > 0 ) {
480 $rowCount += $result->numRows();
481 $revPage = $lastRow->rev_page;
482 $revId = $lastRow->rev_id;
488 if ( $done && $lastRow ) {
514 $revRow = $slotRows[0];
516 if ( $this->limitNamespaces &&
517 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
522 if ( $lastRow ===
null ||
523 $lastRow->page_namespace !== $revRow->page_namespace ||
524 $lastRow->page_title !== $revRow->page_title ) {
525 if ( $lastRow !==
null ) {
527 if ( $this->dumpUploads ) {
528 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
530 $output .= $this->writer->closePage();
531 $this->sink->writeClosePage( $output );
533 $output = $this->writer->openPage( $revRow );
534 $this->sink->writeOpenPage( $revRow, $output );
536 $output = $this->writer->writeRevision( $revRow, $slotRows );
537 $this->sink->writeRevision( $revRow, $output );
542 throw new LogicException(
'Error while processing a stream of slot rows' );
562 $slotRows[] = $carry;
567 while ( $row = $results->fetchObject() ) {
568 if ( $prev && $prev->rev_id !== $row->rev_id ) {
586 if ( $this->dumpUploads ) {
587 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
590 $output .= $this->writer->closePage();
591 $this->sink->writeClosePage( $output );
599 foreach ( $resultset as $row ) {
600 $output = $this->writer->writeLogItem( $row );
601 $this->sink->writeLogItem( $row, $output );
603 return $row->log_id ??
null;
$wgXmlDumpSchemaVersion
The schema to use per default when generating XML dumps.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
RevisionStore $revisionStore
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
__construct( $db, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion.
bool $list_authors
Return distinct author list (when not returning full history)