MediaWiki  master
compareParsers.php
Go to the documentation of this file.
1 <?php
34 
35 require_once __DIR__ . '/dumpIterator.php';
36 
44 
45  private $count = 0;
47  private $saveFailed = false;
49  private $stripParametersEnabled;
51  private $showParsedOutput;
53  private $showDiff;
55  private $options;
57  private $failed;
58 
59  public function __construct() {
60  parent::__construct();
61  $this->addDescription( 'Run a file or dump with several parsers' );
62  $this->addOption( 'parser1', 'The first parser to compare.', true, true );
63  $this->addOption( 'parser2', 'The second parser to compare.', true, true );
64  $this->addOption(
65  'save-failed',
66  'Folder in which articles which differ will be stored.',
67  false,
68  true
69  );
70  $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
71  $this->addOption(
72  'diff-bin',
73  'Binary to use for diffing (can also be provided by DIFF env var).',
74  false,
75  false
76  );
77  $this->addOption(
78  'strip-parameters',
79  'Remove parameters of html tags to increase readability.',
80  false,
81  false
82  );
83  $this->addOption(
84  'show-parsed-output',
85  'Show the parsed html if both Parsers give the same output.',
86  false,
87  false
88  );
89  }
90 
91  public function checkOptions() {
92  if ( $this->hasOption( 'save-failed' ) ) {
93  $this->saveFailed = $this->getOption( 'save-failed' );
94  }
95 
96  $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
97  $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
98 
99  $this->showDiff = $this->hasOption( 'show-diff' );
100  if ( $this->showDiff ) {
101  $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
102  if ( $bin != '' ) {
103  global $wgDiff;
104  $wgDiff = $bin;
105  }
106  }
107 
108  $user = new User();
109  $this->options = ParserOptions::newFromUser( $user );
110 
111  $this->failed = 0;
112  }
113 
114  public function conclusions() {
115  $this->error( "{$this->failed} failed revisions out of {$this->count}" );
116  if ( $this->count > 0 ) {
117  $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
118  }
119  }
120 
121  private function stripParameters( $text ) {
122  if ( !$this->stripParametersEnabled ) {
123  return $text;
124  }
125 
126  return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
127  }
128 
133  public function processRevision( WikiRevision $rev ) {
134  $title = $rev->getTitle();
135 
136  $parser1Name = $this->getOption( 'parser1' );
137  $parser2Name = $this->getOption( 'parser2' );
138 
139  self::checkParserLocally( $parser1Name );
140  self::checkParserLocally( $parser2Name );
141 
142  $parser1 = new $parser1Name();
143  $parser2 = new $parser2Name();
144 
145  $content = $rev->getContent();
146 
147  if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
148  $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
149  . "but {$content->getModel()}\n" );
150 
151  return;
152  }
153 
155  '@phan-var WikitextContent $content';
156  $text = strval( $content->getText() );
157 
158  $output1 = $parser1->parse( $text, $title, $this->options );
159  $output2 = $parser2->parse( $text, $title, $this->options );
160 
161  if ( $output1->getText() != $output2->getText() ) {
162  $this->failed++;
163  $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
164 
165  if ( $this->saveFailed ) {
166  file_put_contents(
167  $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
168  $text
169  );
170  }
171  if ( $this->showDiff ) {
172  $diffs = new Diff(
173  explode( "\n", $this->stripParameters( $output1->getText() ) ),
174  explode( "\n", $this->stripParameters( $output2->getText() ) )
175  );
176  $formatter = new UnifiedDiffFormatter();
177  $unifiedDiff = $formatter->format( $diffs );
178 
179  $this->output( $unifiedDiff );
180  }
181  } else {
182  $this->output( $title->getPrefixedText() . "\tOK\n" );
183 
184  if ( $this->showParsedOutput ) {
185  $this->output( $this->stripParameters( $output1->getText() ) );
186  }
187  }
188  }
189 
190  private static function checkParserLocally( $parserName ) {
191  /* Look for the parser in a file appropriately named in the current folder */
192  if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
193  global $wgAutoloadClasses;
194  $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
195  }
196  }
197 }
198 
199 $maintClass = CompareParsers::class;
200 require_once RUN_MAINTENANCE_IF_MAIN;
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:209
$wgAutoloadClasses
Definition: Setup.php:146
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Base class for iterating over a dump.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
internal since 1.36
Definition: User.php:98
static newFromUser( $user)
Get a ParserOptions object from a given user.
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:34
A formatter that outputs unified diffs.
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.
$content
Definition: router.php:76