MediaWiki master
compareParsers.php
Go to the documentation of this file.
1<?php
36
37// @codeCoverageIgnoreStart
38require_once __DIR__ . '/dumpIterator.php';
39// @codeCoverageIgnoreEnd
40
48
50 private $count = 0;
52 private $saveFailed = false;
54 private $stripParametersEnabled;
56 private $showParsedOutput;
58 private $showDiff;
60 private $options;
62 private $failed;
63
64 public function __construct() {
65 parent::__construct();
66 $this->addDescription( 'Run a file or dump with several parsers' );
67 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
68 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
69 $this->addOption(
70 'save-failed',
71 'Folder in which articles which differ will be stored.',
72 false,
73 true
74 );
75 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
76 $this->addOption(
77 'diff-bin',
78 'Binary to use for diffing (can also be provided by DIFF env var).',
79 false,
80 false
81 );
82 $this->addOption(
83 'strip-parameters',
84 'Remove parameters of html tags to increase readability.',
85 false,
86 false
87 );
88 $this->addOption(
89 'show-parsed-output',
90 'Show the parsed html if both Parsers give the same output.',
91 false,
92 false
93 );
94 }
95
96 public function checkOptions() {
97 if ( $this->hasOption( 'save-failed' ) ) {
98 $this->saveFailed = $this->getOption( 'save-failed' );
99 }
100
101 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
102 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
103
104 $this->showDiff = $this->hasOption( 'show-diff' );
105 if ( $this->showDiff ) {
106 $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
107 if ( $bin != '' ) {
108 global $wgDiff;
109 $wgDiff = $bin;
110 }
111 }
112
113 $user = new User();
114 $this->options = ParserOptions::newFromUser( $user );
115
116 $this->failed = 0;
117 }
118
119 public function conclusions() {
120 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
121 if ( $this->count > 0 ) {
122 $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
123 }
124 }
125
126 private function stripParameters( $text ) {
127 if ( !$this->stripParametersEnabled ) {
128 return $text;
129 }
130
131 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
132 }
133
137 public function processRevision( WikiRevision $rev ) {
138 $title = $rev->getTitle();
139
140 $parser1Name = $this->getOption( 'parser1' );
141 $parser2Name = $this->getOption( 'parser2' );
142
143 self::checkParserLocally( $parser1Name );
144 self::checkParserLocally( $parser2Name );
145
146 $parser1 = new $parser1Name();
147 $parser2 = new $parser2Name();
148
149 $content = $rev->getContent();
150
151 if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
152 $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
153 . "but {$content->getModel()}\n" );
154
155 return;
156 }
157
159 '@phan-var WikitextContent $content';
160 $text = strval( $content->getText() );
161
162 $output1 = $parser1->parse( $text, $title, $this->options );
163 $output2 = $parser2->parse( $text, $title, $this->options );
164
165 if ( $output1->getText() != $output2->getText() ) {
166 $this->failed++;
167 $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
168
169 if ( $this->saveFailed ) {
170 file_put_contents(
171 $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
172 $text
173 );
174 }
175 if ( $this->showDiff ) {
176 $diffs = new Diff(
177 explode( "\n", $this->stripParameters( $output1->getText() ) ),
178 explode( "\n", $this->stripParameters( $output2->getText() ) )
179 );
180 $formatter = new UnifiedDiffFormatter();
181 $unifiedDiff = $formatter->format( $diffs );
182
183 $this->output( $unifiedDiff );
184 }
185 } else {
186 $this->output( $title->getPrefixedText() . "\tOK\n" );
187
188 if ( $this->showParsedOutput ) {
189 $this->output( $this->stripParameters( $output1->getText() ) );
190 }
191 }
192 }
193
194 private static function checkParserLocally( $parserName ) {
195 /* Look for the parser in a file appropriately named in the current folder */
196 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
197 global $wgAutoloadClasses;
198 $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
199 }
200 }
201}
202
203// @codeCoverageIgnoreStart
204$maintClass = CompareParsers::class;
205require_once RUN_MAINTENANCE_IF_MAIN;
206// @codeCoverageIgnoreEnd
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:228
$wgAutoloadClasses
Definition Setup.php:153
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Base class for iterating over a dump.
Content object for wiki text pages.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
addDescription( $text)
Set the description text.
Set options of the Parser.
User class for the MediaWiki software.
Definition User.php:119
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:34
A formatter that outputs unified diffs.
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.