Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 63 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| DumpRenderer | |
0.00% |
0 / 63 |
|
0.00% |
0 / 3 |
42 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
| handleRevision | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Take page text out of an XML dump file and render basic HTML out to files. |
| 4 | * This is *NOT* suitable for publishing or offline use; it's intended for |
| 5 | * running comparative tests of parsing behavior using real-world data. |
| 6 | * |
| 7 | * Templates etc are pulled from the local wiki database, not from the dump. |
| 8 | * |
| 9 | * Copyright (C) 2006 Brooke Vibber <bvibber@wikimedia.org> |
| 10 | * https://www.mediawiki.org/ |
| 11 | * |
| 12 | * @license GPL-2.0-or-later |
| 13 | * @file |
| 14 | * @ingroup Maintenance |
| 15 | */ |
| 16 | |
| 17 | use MediaWiki\Maintenance\Maintenance; |
| 18 | use MediaWiki\Parser\ParserOptions; |
| 19 | use MediaWiki\Permissions\UltimateAuthority; |
| 20 | use MediaWiki\Revision\MutableRevisionRecord; |
| 21 | use MediaWiki\User\User; |
| 22 | |
| 23 | // @codeCoverageIgnoreStart |
| 24 | require_once __DIR__ . '/Maintenance.php'; |
| 25 | // @codeCoverageIgnoreEnd |
| 26 | |
| 27 | /** |
| 28 | * Maintenance script that takes page text out of an XML dump file |
| 29 | * and render basic HTML out to files. |
| 30 | * |
| 31 | * @ingroup Maintenance |
| 32 | */ |
| 33 | class DumpRenderer extends Maintenance { |
| 34 | |
| 35 | /** @var int */ |
| 36 | private $count = 0; |
| 37 | private string $outputDirectory; |
| 38 | private float $startTime; |
| 39 | /** @var string */ |
| 40 | private $prefix; |
| 41 | |
| 42 | public function __construct() { |
| 43 | parent::__construct(); |
| 44 | $this->addDescription( |
| 45 | 'Take page text out of an XML dump file and render basic HTML out to files' ); |
| 46 | $this->addOption( 'output-dir', 'The directory to output the HTML files to', true, true ); |
| 47 | $this->addOption( 'prefix', 'Prefix for the rendered files (defaults to wiki)', false, true ); |
| 48 | $this->addOption( 'parser', 'Use an alternative parser class', false, true ); |
| 49 | } |
| 50 | |
| 51 | public function execute() { |
| 52 | $this->outputDirectory = $this->getOption( 'output-dir' ); |
| 53 | $this->prefix = $this->getOption( 'prefix', 'wiki' ); |
| 54 | $this->startTime = microtime( true ); |
| 55 | |
| 56 | if ( $this->hasOption( 'parser' ) ) { |
| 57 | $this->prefix .= '-' . $this->getOption( 'parser' ); |
| 58 | // T236809: We'll need to provide an alternate ParserFactory |
| 59 | // service to make this work. |
| 60 | $this->fatalError( 'Parser class configuration temporarily disabled.' ); |
| 61 | } |
| 62 | |
| 63 | $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] ); |
| 64 | |
| 65 | $source = new ImportStreamSource( $this->getStdin() ); |
| 66 | $importer = $this->getServiceContainer() |
| 67 | ->getWikiImporterFactory() |
| 68 | ->getWikiImporter( $source, new UltimateAuthority( $user ) ); |
| 69 | |
| 70 | $importer->setRevisionCallback( |
| 71 | $this->handleRevision( ... ) ); |
| 72 | $importer->setNoticeCallback( static function ( $msg, $params ) { |
| 73 | echo wfMessage( $msg, $params )->text() . "\n"; |
| 74 | } ); |
| 75 | |
| 76 | $importer->doImport(); |
| 77 | |
| 78 | $delta = microtime( true ) - $this->startTime; |
| 79 | $this->error( "Rendered {$this->count} pages in " . round( $delta, 2 ) . " seconds " ); |
| 80 | if ( $delta > 0 ) { |
| 81 | $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); |
| 82 | } |
| 83 | $this->error( "\n" ); |
| 84 | } |
| 85 | |
| 86 | /** |
| 87 | * Callback function for each revision, turn into HTML and save |
| 88 | */ |
| 89 | public function handleRevision( WikiRevision $rev ) { |
| 90 | $title = $rev->getTitle(); |
| 91 | if ( !$title ) { |
| 92 | $this->error( "Got bogus revision with null title!" ); |
| 93 | |
| 94 | return; |
| 95 | } |
| 96 | $display = $title->getPrefixedText(); |
| 97 | |
| 98 | $this->count++; |
| 99 | |
| 100 | $sanitized = rawurlencode( $display ); |
| 101 | $filename = sprintf( "%s/%s-%07d-%s.html", |
| 102 | $this->outputDirectory, |
| 103 | $this->prefix, |
| 104 | $this->count, |
| 105 | $sanitized ); |
| 106 | $this->output( sprintf( "%s\t%s\n", $filename, $display ) ); |
| 107 | |
| 108 | $user = new User(); |
| 109 | $options = ParserOptions::newFromUser( $user ); |
| 110 | |
| 111 | $content = $rev->getContent(); |
| 112 | $contentRenderer = $this->getServiceContainer()->getContentRenderer(); |
| 113 | // ContentRenderer expects a RevisionRecord, and all we have is a |
| 114 | // WikiRevision from the dump. Make a fake MutableRevisionRecord to |
| 115 | // satisfy it -- the only thing ::getParserOutput actually needs is |
| 116 | // the revision ID and revision timestamp. |
| 117 | $mutableRev = new MutableRevisionRecord( $rev->getTitle() ); |
| 118 | $mutableRev->setId( $rev->getID() ); |
| 119 | $mutableRev->setTimestamp( $rev->getTimestamp() ); |
| 120 | $output = $contentRenderer->getParserOutput( |
| 121 | $content, $title, $mutableRev, $options |
| 122 | ); |
| 123 | |
| 124 | file_put_contents( $filename, |
| 125 | "<!DOCTYPE html>\n" . |
| 126 | "<html lang=\"en\" dir=\"ltr\">\n" . |
| 127 | "<head>\n" . |
| 128 | "<meta charset=\"UTF-8\" />\n" . |
| 129 | "<meta name=\"color-scheme\" content=\"light dark\">" . |
| 130 | "<title>" . htmlspecialchars( $display, ENT_COMPAT ) . "</title>\n" . |
| 131 | "</head>\n" . |
| 132 | "<body>\n" . |
| 133 | // TODO T371004 move runOutputPipeline out of $parserOutput |
| 134 | $output->runOutputPipeline( $options, [] )->getContentHolderText() . |
| 135 | "</body>\n" . |
| 136 | "</html>" ); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | // @codeCoverageIgnoreStart |
| 141 | $maintClass = DumpRenderer::class; |
| 142 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 143 | // @codeCoverageIgnoreEnd |