MediaWiki master
compareParsers.php
Go to the documentation of this file.
1<?php
35
36// @codeCoverageIgnoreStart
37require_once __DIR__ . '/dumpIterator.php';
38// @codeCoverageIgnoreEnd
39
47
48 private $count = 0;
50 private $saveFailed = false;
52 private $stripParametersEnabled;
54 private $showParsedOutput;
56 private $showDiff;
58 private $options;
60 private $failed;
61
62 public function __construct() {
63 parent::__construct();
64 $this->addDescription( 'Run a file or dump with several parsers' );
65 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
66 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
67 $this->addOption(
68 'save-failed',
69 'Folder in which articles which differ will be stored.',
70 false,
71 true
72 );
73 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
74 $this->addOption(
75 'diff-bin',
76 'Binary to use for diffing (can also be provided by DIFF env var).',
77 false,
78 false
79 );
80 $this->addOption(
81 'strip-parameters',
82 'Remove parameters of html tags to increase readability.',
83 false,
84 false
85 );
86 $this->addOption(
87 'show-parsed-output',
88 'Show the parsed html if both Parsers give the same output.',
89 false,
90 false
91 );
92 }
93
94 public function checkOptions() {
95 if ( $this->hasOption( 'save-failed' ) ) {
96 $this->saveFailed = $this->getOption( 'save-failed' );
97 }
98
99 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
100 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
101
102 $this->showDiff = $this->hasOption( 'show-diff' );
103 if ( $this->showDiff ) {
104 $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
105 if ( $bin != '' ) {
106 global $wgDiff;
107 $wgDiff = $bin;
108 }
109 }
110
111 $user = new User();
112 $this->options = ParserOptions::newFromUser( $user );
113
114 $this->failed = 0;
115 }
116
117 public function conclusions() {
118 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
119 if ( $this->count > 0 ) {
120 $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
121 }
122 }
123
124 private function stripParameters( $text ) {
125 if ( !$this->stripParametersEnabled ) {
126 return $text;
127 }
128
129 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
130 }
131
136 public function processRevision( WikiRevision $rev ) {
137 $title = $rev->getTitle();
138
139 $parser1Name = $this->getOption( 'parser1' );
140 $parser2Name = $this->getOption( 'parser2' );
141
142 self::checkParserLocally( $parser1Name );
143 self::checkParserLocally( $parser2Name );
144
145 $parser1 = new $parser1Name();
146 $parser2 = new $parser2Name();
147
148 $content = $rev->getContent();
149
150 if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
151 $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
152 . "but {$content->getModel()}\n" );
153
154 return;
155 }
156
158 '@phan-var WikitextContent $content';
159 $text = strval( $content->getText() );
160
161 $output1 = $parser1->parse( $text, $title, $this->options );
162 $output2 = $parser2->parse( $text, $title, $this->options );
163
164 if ( $output1->getText() != $output2->getText() ) {
165 $this->failed++;
166 $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
167
168 if ( $this->saveFailed ) {
169 file_put_contents(
170 $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
171 $text
172 );
173 }
174 if ( $this->showDiff ) {
175 $diffs = new Diff(
176 explode( "\n", $this->stripParameters( $output1->getText() ) ),
177 explode( "\n", $this->stripParameters( $output2->getText() ) )
178 );
179 $formatter = new UnifiedDiffFormatter();
180 $unifiedDiff = $formatter->format( $diffs );
181
182 $this->output( $unifiedDiff );
183 }
184 } else {
185 $this->output( $title->getPrefixedText() . "\tOK\n" );
186
187 if ( $this->showParsedOutput ) {
188 $this->output( $this->stripParameters( $output1->getText() ) );
189 }
190 }
191 }
192
193 private static function checkParserLocally( $parserName ) {
194 /* Look for the parser in a file appropriately named in the current folder */
195 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
196 global $wgAutoloadClasses;
197 $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
198 }
199 }
200}
201
202// @codeCoverageIgnoreStart
203$maintClass = CompareParsers::class;
204require_once RUN_MAINTENANCE_IF_MAIN;
205// @codeCoverageIgnoreEnd
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:222
$wgAutoloadClasses
Definition Setup.php:153
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Base class for iterating over a dump.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Content object for wiki text pages.
internal since 1.36
Definition User.php:93
Set options of the Parser.
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
Class representing a 'diff' between two sequences of strings.
Definition Diff.php:34
A formatter that outputs unified diffs.
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.