MediaWiki master
compareParsers.php
Go to the documentation of this file.
1<?php
34
35require_once __DIR__ . '/dumpIterator.php';
36
44
45 private $count = 0;
47 private $saveFailed = false;
49 private $stripParametersEnabled;
51 private $showParsedOutput;
53 private $showDiff;
55 private $options;
57 private $failed;
58
59 public function __construct() {
60 parent::__construct();
61 $this->addDescription( 'Run a file or dump with several parsers' );
62 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
63 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
64 $this->addOption(
65 'save-failed',
66 'Folder in which articles which differ will be stored.',
67 false,
68 true
69 );
70 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
71 $this->addOption(
72 'diff-bin',
73 'Binary to use for diffing (can also be provided by DIFF env var).',
74 false,
75 false
76 );
77 $this->addOption(
78 'strip-parameters',
79 'Remove parameters of html tags to increase readability.',
80 false,
81 false
82 );
83 $this->addOption(
84 'show-parsed-output',
85 'Show the parsed html if both Parsers give the same output.',
86 false,
87 false
88 );
89 }
90
91 public function checkOptions() {
92 if ( $this->hasOption( 'save-failed' ) ) {
93 $this->saveFailed = $this->getOption( 'save-failed' );
94 }
95
96 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
97 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
98
99 $this->showDiff = $this->hasOption( 'show-diff' );
100 if ( $this->showDiff ) {
101 $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
102 if ( $bin != '' ) {
103 global $wgDiff;
104 $wgDiff = $bin;
105 }
106 }
107
108 $user = new User();
109 $this->options = ParserOptions::newFromUser( $user );
110
111 $this->failed = 0;
112 }
113
114 public function conclusions() {
115 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
116 if ( $this->count > 0 ) {
117 $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
118 }
119 }
120
121 private function stripParameters( $text ) {
122 if ( !$this->stripParametersEnabled ) {
123 return $text;
124 }
125
126 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
127 }
128
133 public function processRevision( WikiRevision $rev ) {
134 $title = $rev->getTitle();
135
136 $parser1Name = $this->getOption( 'parser1' );
137 $parser2Name = $this->getOption( 'parser2' );
138
139 self::checkParserLocally( $parser1Name );
140 self::checkParserLocally( $parser2Name );
141
142 $parser1 = new $parser1Name();
143 $parser2 = new $parser2Name();
144
145 $content = $rev->getContent();
146
147 if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
148 $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
149 . "but {$content->getModel()}\n" );
150
151 return;
152 }
153
155 '@phan-var WikitextContent $content';
156 $text = strval( $content->getText() );
157
158 $output1 = $parser1->parse( $text, $title, $this->options );
159 $output2 = $parser2->parse( $text, $title, $this->options );
160
161 if ( $output1->getText() != $output2->getText() ) {
162 $this->failed++;
163 $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
164
165 if ( $this->saveFailed ) {
166 file_put_contents(
167 $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
168 $text
169 );
170 }
171 if ( $this->showDiff ) {
172 $diffs = new Diff(
173 explode( "\n", $this->stripParameters( $output1->getText() ) ),
174 explode( "\n", $this->stripParameters( $output2->getText() ) )
175 );
176 $formatter = new UnifiedDiffFormatter();
177 $unifiedDiff = $formatter->format( $diffs );
178
179 $this->output( $unifiedDiff );
180 }
181 } else {
182 $this->output( $title->getPrefixedText() . "\tOK\n" );
183
184 if ( $this->showParsedOutput ) {
185 $this->output( $this->stripParameters( $output1->getText() ) );
186 }
187 }
188 }
189
190 private static function checkParserLocally( $parserName ) {
191 /* Look for the parser in a file appropriately named in the current folder */
192 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
193 global $wgAutoloadClasses;
194 $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
195 }
196 }
197}
198
199$maintClass = CompareParsers::class;
200require_once RUN_MAINTENANCE_IF_MAIN;
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:220
$wgAutoloadClasses
Definition Setup.php:149
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Base class for iterating over a dump.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
internal since 1.36
Definition User.php:93
Set options of the Parser.
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:34
A formatter that outputs unified diffs.
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.