24require_once __DIR__ .
'/dumpIterator.php';
38 private $saveFailed =
false;
40 private $stripParametersEnabled;
42 private $showParsedOutput;
51 parent::__construct();
52 $this->
addDescription(
'Run a file or dump with several parsers' );
53 $this->
addOption(
'parser1',
'The first parser to compare.',
true,
true );
54 $this->
addOption(
'parser2',
'The second parser to compare.',
true,
true );
57 'Folder in which articles which differ will be stored.',
61 $this->
addOption(
'show-diff',
'Show a diff of the two renderings.',
false,
false );
64 'Binary to use for diffing (can also be provided by DIFF env var).',
70 'Remove parameters of html tags to increase readability.',
76 'Show the parsed html if both Parsers give the same output.',
83 if ( $this->
hasOption(
'save-failed' ) ) {
84 $this->saveFailed = $this->
getOption(
'save-failed' );
87 $this->stripParametersEnabled = $this->
hasOption(
'strip-parameters' );
88 $this->showParsedOutput = $this->
hasOption(
'show-parsed-output' );
90 $this->showDiff = $this->
hasOption(
'show-diff' );
91 if ( $this->showDiff ) {
92 $bin = $this->
getOption(
'diff-bin', getenv(
'DIFF' ) );
100 $this->options = ParserOptions::newFromUser( $user );
106 $this->
error(
"{$this->failed} failed revisions out of {$this->count}" );
107 if ( $this->count > 0 ) {
108 $this->
output(
" (" . ( $this->failed / $this->count ) .
"%)\n" );
112 private function stripParameters(
string $text ): string {
113 if ( !$this->stripParametersEnabled ) {
117 return preg_replace(
'/(<a) [^>]+>/',
'$1>', $text );
126 $parser1Name = $this->getOption(
'parser1' );
127 $parser2Name = $this->getOption(
'parser2' );
129 self::checkParserLocally( $parser1Name );
130 self::checkParserLocally( $parser2Name );
132 $parser1 =
new $parser1Name();
133 $parser2 =
new $parser2Name();
138 $this->error(
"Page {$title->getPrefixedText()} does not contain wikitext "
139 .
"but {$content->getModel()}\n" );
145 '@phan-var WikitextContent $content';
146 $text = strval( $content->getText() );
148 $output1 = $parser1->parse( $text, $title, $this->options );
149 $output2 = $parser2->parse( $text, $title, $this->options );
151 if ( $output1->getText() != $output2->getText() ) {
153 $this->error(
"Parsing for {$title->getPrefixedText()} differs\n" );
155 if ( $this->saveFailed ) {
157 $this->saveFailed .
'/' . rawurlencode( $title->getPrefixedText() ) .
".txt",
161 if ( $this->showDiff ) {
163 explode(
"\n", $this->stripParameters( $output1->getText() ) ),
164 explode(
"\n", $this->stripParameters( $output2->getText() ) )
167 $unifiedDiff = $formatter->format( $diffs );
169 $this->output( $unifiedDiff );
172 $this->output( $title->getPrefixedText() .
"\tOK\n" );
174 if ( $this->showParsedOutput ) {
175 $this->output( $this->stripParameters( $output1->getText() ) );
180 private static function checkParserLocally(
string $parserName ) {
182 if ( !class_exists( $parserName ) && file_exists(
"$parserName.php" ) ) {
191require_once RUN_MAINTENANCE_IF_MAIN;
const CONTENT_MODEL_WIKITEXT
if(!defined('MW_SETUP_CALLBACK'))
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Base class for iterating over a dump.
Content object for wiki text pages.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
addDescription( $text)
Set the description text.
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.