120 $this->writer =
new XmlDumpWriter( $this->text, $schemaVersion );
131 $this->sink =&
$sink;
135 $output = $this->writer->openStream();
136 $this->sink->writeOpenStream( $output );
140 $output = $this->writer->closeStream();
141 $this->sink->writeCloseStream( $output );
163 $condition =
'rev_page >= ' . intval( $start );
165 $condition .=
' AND rev_page < ' . intval( $end );
168 $condition =
'page_id >= ' . intval( $start );
170 $condition .=
' AND page_id < ' . intval( $end );
173 $this->
dumpFrom( $condition, $orderRevs );
184 $condition =
'rev_id >= ' . intval( $start );
186 $condition .=
' AND rev_id < ' . intval( $end );
196 'page_namespace=' .
$title->getNamespace() .
197 ' AND page_title=' . $this->db->addQuotes(
$title->getDBkey() ) );
205 $title = Title::newFromText( $name );
206 if ( is_null(
$title ) ) {
207 throw new MWException(
"Can't export invalid title" );
217 foreach ( $names as $name ) {
231 $condition =
'log_id >= ' . intval( $start );
233 $condition .=
' AND log_id < ' . intval( $end );
246 $this->author_list =
"<contributors>";
250 $res = $this->db->select(
253 'rev_user_text' =>
$revQuery[
'fields'][
'rev_user_text'],
254 'rev_user' =>
$revQuery[
'fields'][
'rev_user'],
257 $this->db->bitAnd(
'rev_deleted', RevisionRecord::DELETED_USER ) .
' = 0',
265 foreach (
$res as $row ) {
266 $this->author_list .=
"<contributor>" .
268 htmlspecialchars( $row->rev_user_text ) .
271 ( (int)$row->rev_user ) .
275 $this->author_list .=
"</contributors>";
284 protected function dumpFrom( $cond =
'', $orderRevs =
false ) {
285 if ( $this->history & self::LOGS ) {
301 $where[] = $hideLogs;
303 # Add on any caller specified conditions
309 $commentQuery = CommentStore::getStore()->getJoin(
'log_comment' );
310 $actorQuery = ActorMigration::newMigration()->getJoin(
'log_user' );
312 $tables = array_merge(
313 [
'logging' ], $commentQuery[
'tables'], $actorQuery[
'tables'], [
'user' ]
316 'log_id',
'log_type',
'log_action',
'log_timestamp',
'log_namespace',
317 'log_title',
'log_params',
'log_deleted',
'user_name'
318 ] + $commentQuery[
'fields'] + $actorQuery[
'fields'];
320 'ORDER BY' =>
'log_id',
321 'USE INDEX' => [
'logging' =>
'PRIMARY' ],
325 'user' => [
'JOIN',
'user_id = ' . $actorQuery[
'fields'][
'log_user'] ]
326 ] + $commentQuery[
'joins'] + $actorQuery[
'joins'];
330 $result = $this->db->select(
333 array_merge( $where, [
'log_id > ' . intval( $lastLogId ) ] ),
339 if ( !$result->numRows() ) {
366 $tables = array_merge( [
'page' ], array_diff(
$revQuery[
'tables'], [
'page' ] ) );
367 $tables = array_merge( $tables, array_diff( $slotQuery[
'tables'], $tables ) );
369 'revision' =>
$revQuery[
'joins'][
'page'],
370 'slots' => [
'JOIN', [
'slot_revision_id = rev_id' ] ],
371 'content' => [
'JOIN', [
'content_id = slot_content_id' ] ],
373 unset( $join[
'page'] );
375 $fields = array_merge(
$revQuery[
'fields'], $slotQuery[
'fields'] );
376 $fields[] =
'page_restrictions';
378 if ( $this->text != self::STUB ) {
379 $fields[
'_load_content'] =
'1';
383 if ( $cond !==
'' ) {
386 $opts = [
'ORDER BY' => [
'rev_page ASC',
'rev_id ASC' ] ];
387 $opts[
'USE INDEX'] = [];
390 if ( is_array( $this->history ) ) {
391 # Time offset/limit for all pages/history...
393 if ( $this->history[
'dir'] ==
'asc' ) {
394 $opts[
'ORDER BY'] =
'rev_timestamp ASC';
397 $opts[
'ORDER BY'] =
'rev_timestamp DESC';
400 if ( !empty( $this->history[
'offset'] ) ) {
401 $conds[] =
"rev_timestamp $op " .
402 $this->db->addQuotes( $this->db->timestamp( $this->history[
'offset'] ) );
405 if ( !empty( $this->history[
'limit'] ) ) {
406 $maxRowCount = intval( $this->history[
'limit'] );
408 } elseif ( $this->history & self::FULL ) {
409 # Full history dumps...
410 # query optimization for history stub dumps
411 if ( $this->text == self::STUB ) {
412 $opts[] =
'STRAIGHT_JOIN';
413 $opts[
'USE INDEX'][
'revision'] =
'rev_page_id';
414 unset( $join[
'revision'] );
415 $join[
'page'] = [
'JOIN',
'rev_page=page_id' ];
417 } elseif ( $this->history & self::CURRENT ) {
418 # Latest revision dumps...
419 if ( $this->list_authors && $cond !=
'' ) {
422 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
423 $opts[
'ORDER BY' ] = [
'page_id ASC' ];
424 } elseif ( $this->history & self::STABLE ) {
425 # "Stable" revision dumps...
426 # Default JOIN, to be overridden...
427 $join[
'revision'] = [
'JOIN',
'page_id=rev_page AND page_latest=rev_id' ];
428 # One, and only one hook should set this, and return false
429 if ( Hooks::run(
'WikiExporter::dumpStableQuery', [ &$tables, &$opts, &$join ] ) ) {
430 throw new MWException( __METHOD__ .
" given invalid history dump type." );
432 } elseif ( $this->history & self::RANGE ) {
433 # Dump of revisions within a specified range. Condition already set in revsByRange().
435 # Unknown history specification parameter?
436 throw new MWException( __METHOD__ .
" given invalid history dump type." );
448 Hooks::run(
'ModifyExportQuery',
449 [ $this->db, &$tables, &$cond, &$opts, &$join ] );
453 if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) {
454 $opts[
'LIMIT'] = $maxRowCount - $rowCount;
458 $queryConds = $conds;
459 $queryConds[] =
'rev_page>' . intval( $revPage ) .
' OR (rev_page=' .
460 intval( $revPage ) .
' AND rev_id' . $op . intval( $revId ) .
')';
462 # Do the query and process any results, remembering max ids for the next iteration.
463 $result = $this->db->select(
471 if ( $result->numRows() > 0 ) {
473 $rowCount += $result->numRows();
474 $revPage = $lastRow->rev_page;
475 $revId = $lastRow->rev_id;
481 if ( $done && $lastRow ) {
507 $revRow = $slotRows[0];
509 if ( $this->limitNamespaces &&
510 !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) {
515 if ( $lastRow ===
null ||
516 $lastRow->page_namespace !== $revRow->page_namespace ||
517 $lastRow->page_title !== $revRow->page_title ) {
518 if ( $lastRow !==
null ) {
520 if ( $this->dumpUploads ) {
521 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
523 $output .= $this->writer->closePage();
524 $this->sink->writeClosePage( $output );
526 $output = $this->writer->openPage( $revRow );
527 $this->sink->writeOpenPage( $revRow, $output );
529 $output = $this->writer->writeRevision( $revRow, $slotRows );
530 $this->sink->writeRevision( $revRow, $output );
535 throw new LogicException(
'Error while processing a stream of slot rows' );
555 $slotRows[] = $carry;
560 while ( $row = $results->fetchObject() ) {
561 if ( $prev && $prev->rev_id !== $row->rev_id ) {
579 if ( $this->dumpUploads ) {
580 $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
583 $output .= $this->writer->closePage();
584 $this->sink->writeClosePage( $output );
592 foreach ( $resultset as $row ) {
593 $output = $this->writer->writeLogItem( $row );
594 $this->sink->writeLogItem( $row, $output );
596 return isset( $row ) ? $row->log_id :
null;
$wgXmlDumpSchemaVersion
The schema to use per default when generating XML dumps.
static getExcludeClause( $db, $audience='public', User $user=null)
SQL clause to skip forbidden log types for this user.
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
revsByRange( $start, $end)
Dumps a series of page and revision records for those pages in the database with revisions falling wi...
dumpFrom( $cond='', $orderRevs=false)
finishPageStreamOutput( $lastRow)
Final page stream output, after all batches are complete.
getSlotRowBatch( $results, &$carry=null)
Returns all slot rows for a revision.
setOutputSink(&$sink)
Set the DumpOutput or DumpFilter object which will receive various row objects and XML output for fil...
pagesByRange( $start, $end, $orderRevs)
Dumps a series of page and revision records for those pages in the database falling within the page_i...
__construct( $db, $history=self::CURRENT, $text=self::TEXT, $limitNamespaces=null)
outputPageStreamBatch( $results, $lastRow)
Runs through a query result set dumping page, revision, and slot records.
bool $dumpUploadFileContents
allPages()
Dumps a series of page and revision records for all pages in the database, either including complete ...
logsByRange( $start, $end)
do_list_authors( $cond)
Generates the distinct list of authors of an article Not called by default (depends on $this->list_au...
array null $limitNamespaces
dumpPages( $cond, $orderRevs)
outputLogStream( $resultset)
setSchemaVersion( $schemaVersion)
static schemaVersion()
Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion.
bool $list_authors
Return distinct author list (when not returning full history)
const WRITE_STUB
Only output subs for revision content.
const WRITE_CONTENT
Output serialized revision content.