MediaWiki 1.39.10
compareParsers.php
Go to the documentation of this file.
1<?php
31require_once __DIR__ . '/dumpIterator.php';
32
40
41 private $count = 0;
43 private $saveFailed = false;
45 private $stripParametersEnabled;
47 private $showParsedOutput;
49 private $showDiff;
51 private $options;
53 private $failed;
54
55 public function __construct() {
56 parent::__construct();
57 $this->addDescription( 'Run a file or dump with several parsers' );
58 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
59 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
60 $this->addOption(
61 'save-failed',
62 'Folder in which articles which differ will be stored.',
63 false,
64 true
65 );
66 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
67 $this->addOption(
68 'diff-bin',
69 'Binary to use for diffing (can also be provided by DIFF env var).',
70 false,
71 false
72 );
73 $this->addOption(
74 'strip-parameters',
75 'Remove parameters of html tags to increase readability.',
76 false,
77 false
78 );
79 $this->addOption(
80 'show-parsed-output',
81 'Show the parsed html if both Parsers give the same output.',
82 false,
83 false
84 );
85 }
86
87 public function checkOptions() {
88 if ( $this->hasOption( 'save-failed' ) ) {
89 $this->saveFailed = $this->getOption( 'save-failed' );
90 }
91
92 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
93 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
94
95 $this->showDiff = $this->hasOption( 'show-diff' );
96 if ( $this->showDiff ) {
97 $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
98 if ( $bin != '' ) {
99 global $wgDiff;
100 $wgDiff = $bin;
101 }
102 }
103
104 $user = new User();
105 $this->options = ParserOptions::newFromUser( $user );
106
107 $this->failed = 0;
108 }
109
110 public function conclusions() {
111 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
112 if ( $this->count > 0 ) {
113 $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
114 }
115 }
116
117 private function stripParameters( $text ) {
118 if ( !$this->stripParametersEnabled ) {
119 return $text;
120 }
121
122 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
123 }
124
129 public function processRevision( WikiRevision $rev ) {
130 $title = $rev->getTitle();
131
132 $parser1Name = $this->getOption( 'parser1' );
133 $parser2Name = $this->getOption( 'parser2' );
134
135 self::checkParserLocally( $parser1Name );
136 self::checkParserLocally( $parser2Name );
137
138 $parser1 = new $parser1Name();
139 $parser2 = new $parser2Name();
140
141 $content = $rev->getContent();
142
143 if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
144 $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
145 . "but {$content->getModel()}\n" );
146
147 return;
148 }
149
151 '@phan-var WikitextContent $content';
152 $text = strval( $content->getText() );
153
154 $output1 = $parser1->parse( $text, $title, $this->options );
155 $output2 = $parser2->parse( $text, $title, $this->options );
156
157 if ( $output1->getText() != $output2->getText() ) {
158 $this->failed++;
159 $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
160
161 if ( $this->saveFailed ) {
162 file_put_contents(
163 $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
164 $text
165 );
166 }
167 if ( $this->showDiff ) {
168 $diffs = new Diff(
169 explode( "\n", $this->stripParameters( $output1->getText() ) ),
170 explode( "\n", $this->stripParameters( $output2->getText() ) )
171 );
172 $formatter = new UnifiedDiffFormatter();
173 $unifiedDiff = $formatter->format( $diffs );
174
175 $this->output( $unifiedDiff );
176 }
177 } else {
178 $this->output( $title->getPrefixedText() . "\tOK\n" );
179
180 if ( $this->showParsedOutput ) {
181 $this->output( $this->stripParameters( $output1->getText() ) );
182 }
183 }
184 }
185
186 private static function checkParserLocally( $parserName ) {
187 /* Look for the parser in a file appropriately named in the current folder */
188 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
189 global $wgAutoloadClasses;
190 $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
191 }
192 }
193}
194
195$maintClass = CompareParsers::class;
196require_once RUN_MAINTENANCE_IF_MAIN;
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:211
$wgAutoloadClasses
Definition Setup.php:141
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:32
Base class for iterating over a dump.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Set options of the Parser.
A formatter that outputs unified diffs.
internal since 1.36
Definition User.php:70
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.
$content
Definition router.php:76