Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
53.85% |
126 / 234 |
|
43.48% |
10 / 23 |
CRAP | |
0.00% |
0 / 1 |
| WikiExporter | |
54.08% |
126 / 233 |
|
43.48% |
10 / 23 |
619.76 | |
0.00% |
0 / 1 |
| schemaVersion | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| __construct | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
1 | |||
| setSchemaVersion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setOutputSink | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| openStream | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| closeStream | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| allPages | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| pagesByRange | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| revsByRange | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| pageByTitle | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| pageByName | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
| pagesByName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| allLogs | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| logsByRange | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| do_list_authors | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 | |||
| dumpFrom | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
3.33 | |||
| dumpLogs | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
30 | |||
| dumpPages | |
67.95% |
53 / 78 |
|
0.00% |
0 / 1 |
37.94 | |||
| outputPageStreamBatch | |
63.33% |
19 / 30 |
|
0.00% |
0 / 1 |
19.10 | |||
| getSlotRowBatch | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
5 | |||
| finishPageStreamOutput | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
2.02 | |||
| outputLogStream | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| reloadDBConfig | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Base class for exporting |
| 4 | * |
| 5 | * Copyright © 2003, 2005, 2006 Brooke Vibber <bvibber@wikimedia.org> |
| 6 | * https://www.mediawiki.org/ |
| 7 | * |
| 8 | * @license GPL-2.0-or-later |
| 9 | * @file |
| 10 | */ |
| 11 | |
| 12 | namespace MediaWiki\Export; |
| 13 | |
| 14 | use LogicException; |
| 15 | use MediaWiki\CommentStore\CommentStore; |
| 16 | use MediaWiki\Debug\MWDebug; |
| 17 | use MediaWiki\HookContainer\HookContainer; |
| 18 | use MediaWiki\HookContainer\HookRunner; |
| 19 | use MediaWiki\Logging\LogEventsList; |
| 20 | use MediaWiki\MainConfigNames; |
| 21 | use MediaWiki\MediaWikiServices; |
| 22 | use MediaWiki\Page\PageIdentity; |
| 23 | use MediaWiki\Revision\RevisionAccessException; |
| 24 | use MediaWiki\Revision\RevisionRecord; |
| 25 | use MediaWiki\Revision\RevisionStore; |
| 26 | use MediaWiki\Title\MalformedTitleException; |
| 27 | use MediaWiki\Title\TitleParser; |
| 28 | use RuntimeException; |
| 29 | use UnexpectedValueException; |
| 30 | use Wikimedia\Rdbms\IReadableDatabase; |
| 31 | use Wikimedia\Rdbms\IResultWrapper; |
| 32 | |
| 33 | /** |
| 34 | * @defgroup Dump Dump |
| 35 | */ |
| 36 | |
| 37 | /** |
| 38 | * @ingroup SpecialPage Dump |
| 39 | */ |
| 40 | class WikiExporter { |
| 41 | /** @var bool Return distinct author list (when not returning full history) */ |
| 42 | public $list_authors = false; |
| 43 | |
| 44 | /** @var bool */ |
| 45 | public $dumpUploads = false; |
| 46 | |
| 47 | /** @var bool */ |
| 48 | public $dumpUploadFileContents = false; |
| 49 | |
| 50 | /** @var string */ |
| 51 | public $author_list = ""; |
| 52 | |
| 53 | public const FULL = 1; |
| 54 | public const CURRENT = 2; |
| 55 | public const STABLE = 4; // extension defined |
| 56 | public const LOGS = 8; |
| 57 | public const RANGE = 16; |
| 58 | |
| 59 | public const TEXT = XmlDumpWriter::WRITE_CONTENT; |
| 60 | public const STUB = XmlDumpWriter::WRITE_STUB; |
| 61 | |
| 62 | protected const BATCH_SIZE = 10000; |
| 63 | |
| 64 | /** @var int */ |
| 65 | public $text; |
| 66 | |
| 67 | /** @var DumpOutput */ |
| 68 | public $sink; |
| 69 | |
| 70 | /** @var XmlDumpWriter */ |
| 71 | private $writer; |
| 72 | |
| 73 | /** @var IReadableDatabase */ |
| 74 | protected $db; |
| 75 | |
| 76 | /** @var array|int */ |
| 77 | protected $history; |
| 78 | |
| 79 | /** @var array|null */ |
| 80 | protected $limitNamespaces; |
| 81 | |
| 82 | /** @var RevisionStore */ |
| 83 | private $revisionStore; |
| 84 | |
| 85 | /** @var TitleParser */ |
| 86 | private $titleParser; |
| 87 | |
| 88 | /** @var HookRunner */ |
| 89 | private $hookRunner; |
| 90 | |
| 91 | /** @var CommentStore */ |
| 92 | private $commentStore; |
| 93 | |
| 94 | /** |
| 95 | * Returns the default export schema version, as defined by the XmlDumpSchemaVersion setting. |
| 96 | * @return string |
| 97 | */ |
| 98 | public static function schemaVersion() { |
| 99 | return MediaWikiServices::getInstance()->getMainConfig()->get( |
| 100 | MainConfigNames::XmlDumpSchemaVersion ); |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * @param IReadableDatabase $db |
| 105 | * @param CommentStore $commentStore |
| 106 | * @param HookContainer $hookContainer |
| 107 | * @param RevisionStore $revisionStore |
| 108 | * @param TitleParser $titleParser |
| 109 | * @param int|array $history One of WikiExporter::FULL, WikiExporter::CURRENT, |
| 110 | * WikiExporter::RANGE or WikiExporter::STABLE, or an associative array: |
| 111 | * - offset: non-inclusive offset at which to start the query |
| 112 | * - limit: maximum number of rows to return |
| 113 | * - dir: "asc" or "desc" timestamp order |
| 114 | * @param int $text One of WikiExporter::TEXT or WikiExporter::STUB |
| 115 | * @param null|array $limitNamespaces List of namespace numbers to limit results |
| 116 | */ |
| 117 | public function __construct( |
| 118 | $db, |
| 119 | CommentStore $commentStore, |
| 120 | HookContainer $hookContainer, |
| 121 | RevisionStore $revisionStore, |
| 122 | TitleParser $titleParser, |
| 123 | $history = self::CURRENT, |
| 124 | $text = self::TEXT, |
| 125 | $limitNamespaces = null |
| 126 | ) { |
| 127 | $this->db = $db; |
| 128 | $this->commentStore = $commentStore; |
| 129 | $this->history = $history; |
| 130 | $this->writer = new XmlDumpWriter( |
| 131 | $text, |
| 132 | self::schemaVersion(), |
| 133 | $hookContainer, |
| 134 | $commentStore |
| 135 | ); |
| 136 | $this->sink = new DumpOutput(); |
| 137 | $this->text = $text; |
| 138 | $this->limitNamespaces = $limitNamespaces; |
| 139 | $this->hookRunner = new HookRunner( $hookContainer ); |
| 140 | $this->revisionStore = $revisionStore; |
| 141 | $this->titleParser = $titleParser; |
| 142 | } |
| 143 | |
| 144 | /** |
| 145 | * @param string $schemaVersion which schema version the generated XML should comply to. |
| 146 | * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX |
| 147 | * constants. |
| 148 | */ |
| 149 | public function setSchemaVersion( $schemaVersion ) { |
| 150 | $this->writer = new XmlDumpWriter( $this->text, $schemaVersion ); |
| 151 | } |
| 152 | |
| 153 | /** |
| 154 | * Set the DumpOutput or DumpFilter object which will receive |
| 155 | * various row objects and XML output for filtering. Filters |
| 156 | * can be chained or used as callbacks. |
| 157 | * |
| 158 | * @param DumpOutput|DumpFilter &$sink |
| 159 | */ |
| 160 | public function setOutputSink( &$sink ) { |
| 161 | $this->sink =& $sink; |
| 162 | } |
| 163 | |
| 164 | public function openStream() { |
| 165 | $output = $this->writer->openStream(); |
| 166 | $this->sink->writeOpenStream( $output ); |
| 167 | } |
| 168 | |
| 169 | public function closeStream() { |
| 170 | $output = $this->writer->closeStream(); |
| 171 | $this->sink->writeCloseStream( $output ); |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Dumps a series of page and revision records for all pages |
| 176 | * in the database, either including complete history or only |
| 177 | * the most recent version. |
| 178 | */ |
| 179 | public function allPages() { |
| 180 | $this->dumpFrom( '' ); |
| 181 | } |
| 182 | |
| 183 | /** |
| 184 | * Dumps a series of page and revision records for those pages |
| 185 | * in the database falling within the page_id range given. |
| 186 | * @param int $start Inclusive lower limit (this id is included) |
| 187 | * @param int $end Exclusive upper limit (this id is not included) |
| 188 | * If 0, no upper limit. |
| 189 | * @param bool $orderRevs order revisions within pages in ascending order |
| 190 | */ |
| 191 | public function pagesByRange( $start, $end, $orderRevs ) { |
| 192 | if ( $orderRevs ) { |
| 193 | $condition = 'rev_page >= ' . intval( $start ); |
| 194 | if ( $end ) { |
| 195 | $condition .= ' AND rev_page < ' . intval( $end ); |
| 196 | } |
| 197 | } else { |
| 198 | $condition = 'page_id >= ' . intval( $start ); |
| 199 | if ( $end ) { |
| 200 | $condition .= ' AND page_id < ' . intval( $end ); |
| 201 | } |
| 202 | } |
| 203 | $this->dumpFrom( $condition, $orderRevs ); |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * Dumps a series of page and revision records for those pages |
| 208 | * in the database with revisions falling within the rev_id range given. |
| 209 | * @param int $start Inclusive lower limit (this id is included) |
| 210 | * @param int $end Exclusive upper limit (this id is not included) |
| 211 | * If 0, no upper limit. |
| 212 | */ |
| 213 | public function revsByRange( $start, $end ) { |
| 214 | $condition = 'rev_id >= ' . intval( $start ); |
| 215 | if ( $end ) { |
| 216 | $condition .= ' AND rev_id < ' . intval( $end ); |
| 217 | } |
| 218 | $this->dumpFrom( $condition ); |
| 219 | } |
| 220 | |
| 221 | public function pageByTitle( PageIdentity $page ) { |
| 222 | $this->dumpFrom( |
| 223 | 'page_namespace=' . $page->getNamespace() . |
| 224 | ' AND page_title=' . $this->db->addQuotes( $page->getDBkey() ) ); |
| 225 | } |
| 226 | |
| 227 | /** |
| 228 | * @param string $name |
| 229 | */ |
| 230 | public function pageByName( $name ) { |
| 231 | try { |
| 232 | $link = $this->titleParser->parseTitle( $name ); |
| 233 | $this->dumpFrom( |
| 234 | 'page_namespace=' . $link->getNamespace() . |
| 235 | ' AND page_title=' . $this->db->addQuotes( $link->getDBkey() ) ); |
| 236 | } catch ( MalformedTitleException ) { |
| 237 | throw new RuntimeException( "Can't export invalid title" ); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * @param string[] $names |
| 243 | */ |
| 244 | public function pagesByName( $names ) { |
| 245 | foreach ( $names as $name ) { |
| 246 | $this->pageByName( $name ); |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | public function allLogs() { |
| 251 | $this->dumpFrom( '' ); |
| 252 | } |
| 253 | |
| 254 | /** |
| 255 | * @param int $start |
| 256 | * @param int $end |
| 257 | */ |
| 258 | public function logsByRange( $start, $end ) { |
| 259 | $condition = 'log_id >= ' . intval( $start ); |
| 260 | if ( $end ) { |
| 261 | $condition .= ' AND log_id < ' . intval( $end ); |
| 262 | } |
| 263 | $this->dumpFrom( $condition ); |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Generates the distinct list of authors of an article |
| 268 | * Not called by default (depends on $this->list_authors) |
| 269 | * Can be set by Special:Export when not exporting whole history |
| 270 | * |
| 271 | * @param string $cond |
| 272 | */ |
| 273 | protected function do_list_authors( $cond ) { |
| 274 | $this->author_list = "<contributors>"; |
| 275 | // rev_deleted |
| 276 | |
| 277 | $res = $this->revisionStore->newSelectQueryBuilder( $this->db ) |
| 278 | ->joinPage() |
| 279 | ->distinct() |
| 280 | ->where( $this->db->bitAnd( 'rev_deleted', RevisionRecord::DELETED_USER ) . ' = 0' ) |
| 281 | ->andWhere( $cond ) |
| 282 | ->caller( __METHOD__ )->fetchResultSet(); |
| 283 | |
| 284 | foreach ( $res as $row ) { |
| 285 | $this->author_list .= "<contributor>" . |
| 286 | "<username>" . |
| 287 | htmlspecialchars( $row->rev_user_text ) . |
| 288 | "</username>" . |
| 289 | "<id>" . |
| 290 | ( (int)$row->rev_user ) . |
| 291 | "</id>" . |
| 292 | "</contributor>"; |
| 293 | } |
| 294 | $this->author_list .= "</contributors>"; |
| 295 | } |
| 296 | |
| 297 | /** |
| 298 | * @param string $cond |
| 299 | * @param bool $orderRevs |
| 300 | */ |
| 301 | protected function dumpFrom( $cond = '', $orderRevs = false ) { |
| 302 | if ( is_int( $this->history ) && ( $this->history & self::LOGS ) ) { |
| 303 | $this->dumpLogs( $cond ); |
| 304 | } else { |
| 305 | $this->dumpPages( $cond, $orderRevs ); |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /** |
| 310 | * @param string $cond |
| 311 | */ |
| 312 | protected function dumpLogs( $cond ) { |
| 313 | $where = []; |
| 314 | # Hide private logs |
| 315 | $hideLogs = LogEventsList::getExcludeClause( $this->db ); |
| 316 | if ( $hideLogs ) { |
| 317 | $where[] = $hideLogs; |
| 318 | } |
| 319 | # Add on any caller specified conditions |
| 320 | if ( $cond ) { |
| 321 | $where[] = $cond; |
| 322 | } |
| 323 | |
| 324 | $commentQuery = $this->commentStore->getJoin( 'log_comment' ); |
| 325 | |
| 326 | $lastLogId = 0; |
| 327 | while ( true ) { |
| 328 | $result = $this->db->newSelectQueryBuilder() |
| 329 | ->select( [ |
| 330 | 'log_id', 'log_type', 'log_action', 'log_timestamp', 'log_namespace', |
| 331 | 'log_title', 'log_params', 'log_deleted', 'actor_user', 'actor_name' |
| 332 | ] ) |
| 333 | ->from( 'logging' ) |
| 334 | ->join( 'actor', null, 'actor_id=log_actor' ) |
| 335 | ->where( $where ) |
| 336 | ->andWhere( $this->db->expr( 'log_id', '>', intval( $lastLogId ) ) ) |
| 337 | ->orderBy( 'log_id' ) |
| 338 | ->useIndex( [ 'logging' => 'PRIMARY' ] ) |
| 339 | ->limit( self::BATCH_SIZE ) |
| 340 | ->queryInfo( $commentQuery ) |
| 341 | ->caller( __METHOD__ ) |
| 342 | ->fetchResultSet(); |
| 343 | |
| 344 | if ( !$result->numRows() ) { |
| 345 | break; |
| 346 | } |
| 347 | |
| 348 | $lastLogId = $this->outputLogStream( $result ); |
| 349 | $this->reloadDBConfig(); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | /** |
| 354 | * @param string $cond |
| 355 | * @param bool $orderRevs |
| 356 | */ |
| 357 | protected function dumpPages( $cond, $orderRevs ) { |
| 358 | $revQuery = $this->revisionStore->getQueryInfo( [ 'page' ] ); |
| 359 | $slotQuery = $this->revisionStore->getSlotsQueryInfo( [ 'content' ] ); |
| 360 | |
| 361 | // We want page primary rather than revision. |
| 362 | // We also want to join in the slots and content tables. |
| 363 | // NOTE: This means we may get multiple rows per revision, and more rows |
| 364 | // than the batch size! Should be ok, since the max number of slots is |
| 365 | // fixed and low (dozens at worst). |
| 366 | $tables = array_merge( [ 'page' ], array_diff( $revQuery['tables'], [ 'page' ] ) ); |
| 367 | $tables = array_merge( $tables, array_diff( $slotQuery['tables'], $tables ) ); |
| 368 | $join = $revQuery['joins'] + [ |
| 369 | 'revision' => $revQuery['joins']['page'], |
| 370 | 'slots' => [ 'JOIN', [ 'slot_revision_id = rev_id' ] ], |
| 371 | 'content' => [ 'JOIN', [ 'content_id = slot_content_id' ] ], |
| 372 | ]; |
| 373 | unset( $join['page'] ); |
| 374 | |
| 375 | $fields = array_merge( $revQuery['fields'], $slotQuery['fields'] ); |
| 376 | |
| 377 | if ( $this->text != self::STUB ) { |
| 378 | $fields['_load_content'] = '1'; |
| 379 | } |
| 380 | |
| 381 | $conds = []; |
| 382 | if ( $cond !== '' ) { |
| 383 | $conds[] = $cond; |
| 384 | } |
| 385 | $opts = [ 'ORDER BY' => [ 'rev_page ASC', 'rev_id ASC' ] ]; |
| 386 | $opts['USE INDEX'] = []; |
| 387 | |
| 388 | $op = '>'; |
| 389 | if ( is_array( $this->history ) ) { |
| 390 | # Time offset/limit for all pages/history... |
| 391 | # Set time order |
| 392 | if ( $this->history['dir'] == 'asc' ) { |
| 393 | $opts['ORDER BY'] = 'rev_timestamp ASC'; |
| 394 | } else { |
| 395 | $op = '<'; |
| 396 | $opts['ORDER BY'] = 'rev_timestamp DESC'; |
| 397 | } |
| 398 | # Set offset |
| 399 | if ( !empty( $this->history['offset'] ) ) { |
| 400 | $conds[] = "rev_timestamp $op " . |
| 401 | $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); |
| 402 | } |
| 403 | # Set query limit |
| 404 | if ( !empty( $this->history['limit'] ) ) { |
| 405 | $maxRowCount = intval( $this->history['limit'] ); |
| 406 | } |
| 407 | } elseif ( $this->history & self::FULL ) { |
| 408 | # Full history dumps... |
| 409 | # query optimization for history stub dumps |
| 410 | if ( $this->text == self::STUB ) { |
| 411 | $opts[] = 'STRAIGHT_JOIN'; |
| 412 | unset( $join['revision'] ); |
| 413 | $join['page'] = [ 'JOIN', 'rev_page=page_id' ]; |
| 414 | } |
| 415 | } elseif ( $this->history & self::CURRENT ) { |
| 416 | # Latest revision dumps... |
| 417 | if ( $this->list_authors && $cond != '' ) { // List authors, if so desired |
| 418 | $this->do_list_authors( $cond ); |
| 419 | } |
| 420 | $join['revision'] = [ 'JOIN', 'page_id=rev_page AND page_latest=rev_id' ]; |
| 421 | $opts[ 'ORDER BY' ] = [ 'page_id ASC' ]; |
| 422 | } elseif ( $this->history & self::STABLE ) { |
| 423 | # "Stable" revision dumps... |
| 424 | # Default JOIN, to be overridden... |
| 425 | $join['revision'] = [ 'JOIN', 'page_id=rev_page AND page_latest=rev_id' ]; |
| 426 | # One, and only one hook should set this, and return false |
| 427 | if ( $this->hookRunner->onWikiExporter__dumpStableQuery( $tables, $opts, $join ) ) { |
| 428 | throw new LogicException( __METHOD__ . " given invalid history dump type." ); |
| 429 | } |
| 430 | } elseif ( $this->history & self::RANGE ) { |
| 431 | # Dump of revisions within a specified range. Condition already set in revsByRange(). |
| 432 | } else { |
| 433 | # Unknown history specification parameter? |
| 434 | throw new UnexpectedValueException( __METHOD__ . " given invalid history dump type." ); |
| 435 | } |
| 436 | |
| 437 | $done = false; |
| 438 | $lastRow = null; |
| 439 | $revPage = 0; |
| 440 | $revId = 0; |
| 441 | $rowCount = 0; |
| 442 | |
| 443 | $opts['LIMIT'] = self::BATCH_SIZE; |
| 444 | |
| 445 | $this->hookRunner->onModifyExportQuery( |
| 446 | $this->db, $tables, $cond, $opts, $join, $conds ); |
| 447 | |
| 448 | while ( !$done ) { |
| 449 | // If necessary, impose the overall maximum and stop looping after this iteration. |
| 450 | if ( !empty( $maxRowCount ) && $rowCount + self::BATCH_SIZE > $maxRowCount ) { |
| 451 | $opts['LIMIT'] = $maxRowCount - $rowCount; |
| 452 | $done = true; |
| 453 | } |
| 454 | |
| 455 | # Do the query and process any results, remembering max ids for the next iteration. |
| 456 | $result = $this->db->newSelectQueryBuilder() |
| 457 | ->tables( $tables ) |
| 458 | ->fields( $fields ) |
| 459 | ->where( $conds ) |
| 460 | ->andWhere( $this->db->expr( 'rev_page', '>', intval( $revPage ) )->orExpr( |
| 461 | $this->db->expr( 'rev_page', '=', intval( $revPage ) )->and( 'rev_id', $op, intval( $revId ) ) |
| 462 | ) ) |
| 463 | ->caller( __METHOD__ ) |
| 464 | ->options( $opts ) |
| 465 | ->joinConds( $join ) |
| 466 | ->fetchResultSet(); |
| 467 | if ( $result->numRows() > 0 ) { |
| 468 | $lastRow = $this->outputPageStreamBatch( $result, $lastRow ); |
| 469 | $rowCount += $result->numRows(); |
| 470 | $revPage = $lastRow->rev_page; |
| 471 | $revId = $lastRow->rev_id; |
| 472 | } else { |
| 473 | $done = true; |
| 474 | } |
| 475 | |
| 476 | // If we are finished, close off final page element (if any). |
| 477 | if ( $done && $lastRow ) { |
| 478 | $this->finishPageStreamOutput( $lastRow ); |
| 479 | } |
| 480 | |
| 481 | if ( !$done ) { |
| 482 | $this->reloadDBConfig(); |
| 483 | } |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | /** |
| 488 | * Runs through a query result set dumping page, revision, and slot records. |
| 489 | * The result set should join the page, revision, slots, and content tables, |
| 490 | * and be sorted/grouped by page and revision to avoid duplicate page records in the output. |
| 491 | * |
| 492 | * @param IResultWrapper $results |
| 493 | * @param \stdClass|null $lastRow the last row output from the previous call (or null if none) |
| 494 | * @return \stdClass the last row processed |
| 495 | */ |
| 496 | protected function outputPageStreamBatch( $results, $lastRow ) { |
| 497 | $rowCarry = null; |
| 498 | while ( true ) { |
| 499 | $slotRows = $this->getSlotRowBatch( $results, $rowCarry ); |
| 500 | |
| 501 | if ( !$slotRows ) { |
| 502 | break; |
| 503 | } |
| 504 | |
| 505 | // All revision info is present in all slot rows. |
| 506 | // Use the first slot row as the revision row. |
| 507 | $revRow = $slotRows[0]; |
| 508 | |
| 509 | if ( $this->limitNamespaces && |
| 510 | !in_array( $revRow->page_namespace, $this->limitNamespaces ) ) { |
| 511 | $lastRow = $revRow; |
| 512 | continue; |
| 513 | } |
| 514 | |
| 515 | if ( $lastRow === null || |
| 516 | $lastRow->page_namespace !== $revRow->page_namespace || |
| 517 | $lastRow->page_title !== $revRow->page_title ) { |
| 518 | if ( $lastRow !== null ) { |
| 519 | $output = ''; |
| 520 | if ( $this->dumpUploads ) { |
| 521 | $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); |
| 522 | } |
| 523 | $output .= $this->writer->closePage(); |
| 524 | $this->sink->writeClosePage( $output ); |
| 525 | } |
| 526 | $output = $this->writer->openPage( $revRow ); |
| 527 | $this->sink->writeOpenPage( $revRow, $output ); |
| 528 | } |
| 529 | try { |
| 530 | $output = $this->writer->writeRevision( $revRow, $slotRows ); |
| 531 | $this->sink->writeRevision( $revRow, $output ); |
| 532 | } catch ( RevisionAccessException $ex ) { |
| 533 | MWDebug::warning( 'Problem encountered retrieving rev and slot metadata for' |
| 534 | . ' revision ' . $revRow->rev_id . ': ' . $ex->getMessage() ); |
| 535 | } |
| 536 | $lastRow = $revRow; |
| 537 | } |
| 538 | |
| 539 | if ( $rowCarry ) { |
| 540 | throw new LogicException( 'Error while processing a stream of slot rows' ); |
| 541 | } |
| 542 | |
| 543 | // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive |
| 544 | return $lastRow; |
| 545 | } |
| 546 | |
| 547 | /** |
| 548 | * Returns all slot rows for a revision. |
| 549 | * Takes and returns a carry row from the last batch; |
| 550 | * |
| 551 | * @param IResultWrapper|array $results |
| 552 | * @param null|\stdClass &$carry A row carried over from the last call to getSlotRowBatch() |
| 553 | * |
| 554 | * @return \stdClass[] |
| 555 | */ |
| 556 | protected function getSlotRowBatch( $results, &$carry = null ) { |
| 557 | $slotRows = []; |
| 558 | $prev = null; |
| 559 | |
| 560 | if ( $carry ) { |
| 561 | $slotRows[] = $carry; |
| 562 | $prev = $carry; |
| 563 | $carry = null; |
| 564 | } |
| 565 | |
| 566 | // Reading further rows from the result set for the same rev id |
| 567 | // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
| 568 | while ( $row = $results->fetchObject() ) { |
| 569 | if ( $prev && $prev->rev_id !== $row->rev_id ) { |
| 570 | $carry = $row; |
| 571 | break; |
| 572 | } |
| 573 | $slotRows[] = $row; |
| 574 | $prev = $row; |
| 575 | } |
| 576 | |
| 577 | return $slotRows; |
| 578 | } |
| 579 | |
| 580 | /** |
| 581 | * Final page stream output, after all batches are complete |
| 582 | * |
| 583 | * @param \stdClass $lastRow the last row output from the last batch (or null if none) |
| 584 | */ |
| 585 | protected function finishPageStreamOutput( $lastRow ) { |
| 586 | $output = ''; |
| 587 | if ( $this->dumpUploads ) { |
| 588 | $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); |
| 589 | } |
| 590 | $output .= $this->author_list; |
| 591 | $output .= $this->writer->closePage(); |
| 592 | $this->sink->writeClosePage( $output ); |
| 593 | } |
| 594 | |
| 595 | /** |
| 596 | * @param IResultWrapper $resultset |
| 597 | * @return int|null the log_id value of the last item output, or null if none |
| 598 | */ |
| 599 | protected function outputLogStream( $resultset ) { |
| 600 | foreach ( $resultset as $row ) { |
| 601 | $output = $this->writer->writeLogItem( $row ); |
| 602 | $this->sink->writeLogItem( $row, $output ); |
| 603 | } |
| 604 | return $row->log_id ?? null; |
| 605 | } |
| 606 | |
| 607 | /** |
| 608 | * Attempt to reload the database configuration, so any changes can take effect. |
| 609 | * Dynamic reloading can be enabled by setting $wgLBFactoryConf['configCallback'] |
| 610 | * to a function that returns an array of any keys that should be updated |
| 611 | * in LBFactoryConf. |
| 612 | */ |
| 613 | private function reloadDBConfig() { |
| 614 | MediaWikiServices::getInstance()->getDBLoadBalancerFactory() |
| 615 | ->autoReconfigure(); |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | /** @deprecated class alias since 1.46 */ |
| 620 | class_alias( WikiExporter::class, 'WikiExporter' ); |