MediaWiki  master
compareParsers.php
Go to the documentation of this file.
1 <?php
31 require_once __DIR__ . '/dumpIterator.php';
32 
40 
41  private $count = 0;
43  private $saveFailed = false;
45  private $stripParametersEnabled;
47  private $showParsedOutput;
49  private $showDiff;
51  private $options;
53  private $failed;
54 
55  public function __construct() {
56  parent::__construct();
57  $this->addDescription( 'Run a file or dump with several parsers' );
58  $this->addOption( 'parser1', 'The first parser to compare.', true, true );
59  $this->addOption( 'parser2', 'The second parser to compare.', true, true );
60  $this->addOption(
61  'save-failed',
62  'Folder in which articles which differ will be stored.',
63  false,
64  true
65  );
66  $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
67  $this->addOption(
68  'diff-bin',
69  'Binary to use for diffing (can also be provided by DIFF env var).',
70  false,
71  false
72  );
73  $this->addOption(
74  'strip-parameters',
75  'Remove parameters of html tags to increase readability.',
76  false,
77  false
78  );
79  $this->addOption(
80  'show-parsed-output',
81  'Show the parsed html if both Parsers give the same output.',
82  false,
83  false
84  );
85  }
86 
87  public function checkOptions() {
88  if ( $this->hasOption( 'save-failed' ) ) {
89  $this->saveFailed = $this->getOption( 'save-failed' );
90  }
91 
92  $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
93  $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
94 
95  $this->showDiff = $this->hasOption( 'show-diff' );
96  if ( $this->showDiff ) {
97  $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
98  if ( $bin != '' ) {
99  global $wgDiff;
100  $wgDiff = $bin;
101  }
102  }
103 
104  $user = new User();
105  $this->options = ParserOptions::newFromUser( $user );
106 
107  $this->failed = 0;
108  }
109 
110  public function conclusions() {
111  $this->error( "{$this->failed} failed revisions out of {$this->count}" );
112  if ( $this->count > 0 ) {
113  $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
114  }
115  }
116 
117  private function stripParameters( $text ) {
118  if ( !$this->stripParametersEnabled ) {
119  return $text;
120  }
121 
122  return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
123  }
124 
129  public function processRevision( WikiRevision $rev ) {
130  $title = $rev->getTitle();
131 
132  $parser1Name = $this->getOption( 'parser1' );
133  $parser2Name = $this->getOption( 'parser2' );
134 
135  self::checkParserLocally( $parser1Name );
136  self::checkParserLocally( $parser2Name );
137 
138  $parser1 = new $parser1Name();
139  $parser2 = new $parser2Name();
140 
141  $content = $rev->getContent();
142 
143  if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
144  $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
145  . "but {$content->getModel()}\n" );
146 
147  return;
148  }
149 
151  '@phan-var WikitextContent $content';
152  $text = strval( $content->getText() );
153 
154  $output1 = $parser1->parse( $text, $title, $this->options );
155  $output2 = $parser2->parse( $text, $title, $this->options );
156 
157  if ( $output1->getText() != $output2->getText() ) {
158  $this->failed++;
159  $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
160 
161  if ( $this->saveFailed ) {
162  file_put_contents(
163  $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
164  $text
165  );
166  }
167  if ( $this->showDiff ) {
168  $diffs = new Diff(
169  explode( "\n", $this->stripParameters( $output1->getText() ) ),
170  explode( "\n", $this->stripParameters( $output2->getText() ) )
171  );
172  $formatter = new UnifiedDiffFormatter();
173  $unifiedDiff = $formatter->format( $diffs );
174 
175  $this->output( $unifiedDiff );
176  }
177  } else {
178  $this->output( $title->getPrefixedText() . "\tOK\n" );
179 
180  if ( $this->showParsedOutput ) {
181  $this->output( $this->stripParameters( $output1->getText() ) );
182  }
183  }
184  }
185 
186  private static function checkParserLocally( $parserName ) {
187  /* Look for the parser in a file appropriately named in the current folder */
188  if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
189  global $wgAutoloadClasses;
190  $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
191  }
192  }
193 }
194 
195 $maintClass = CompareParsers::class;
196 require_once RUN_MAINTENANCE_IF_MAIN;
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:211
$wgAutoloadClasses
Definition: Setup.php:141
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
conclusions()
Stub function for giving data about what was computed.
processRevision(WikiRevision $rev)
Callback function for each revision, parse with both parsers and compare.
__construct()
Default constructor.
checkOptions()
Stub function for processing additional options.
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:32
Base class for iterating over a dump.
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
static newFromUser( $user)
Get a ParserOptions object from a given user.
A formatter that outputs unified diffs.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:70
Represents a revision, log entry or upload during the import process.
getContent( $role=SlotRecord::MAIN)
$maintClass
$wgDiff
Config variable stub for the Diff setting, for use by phpdoc and IDEs.
$content
Definition: router.php:76