MediaWiki  1.23.0
compareParsers.php
Go to the documentation of this file.
1 <?php
31 require_once __DIR__ . '/dumpIterator.php';
32 
40 
41  private $count = 0;
42 
43  public function __construct() {
44  parent::__construct();
45  $this->saveFailed = false;
46  $this->mDescription = "Run a file or dump with several parsers";
47  $this->addOption( 'parser1', 'The first parser to compare.', true, true );
48  $this->addOption( 'parser2', 'The second parser to compare.', true, true );
49  $this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
50  $this->addOption( 'save-failed', 'Folder in which articles which differ will be stored.', false, true );
51  $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
52  $this->addOption( 'diff-bin', 'Binary to use for diffing (can also be provided by DIFF env var).', false, false );
53  $this->addOption( 'strip-parameters', 'Remove parameters of html tags to increase readability.', false, false );
54  $this->addOption( 'show-parsed-output', 'Show the parsed html if both Parsers give the same output.', false, false );
55  }
56 
57  public function checkOptions() {
58  if ( $this->hasOption( 'save-failed' ) ) {
59  $this->saveFailed = $this->getOption( 'save-failed' );
60  }
61 
62  $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
63  $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
64 
65  $this->showDiff = $this->hasOption( 'show-diff' );
66  if ( $this->showDiff ) {
67  $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
68  if ( $bin != '' ) {
69  global $wgDiff;
70  $wgDiff = $bin;
71  }
72  }
73 
74  $user = new User();
76 
77  if ( $this->hasOption( 'tidy' ) ) {
78  global $wgUseTidy;
79  if ( !$wgUseTidy ) {
80  $this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
81  }
82  $this->options->setTidy( true );
83  }
84 
85  $this->failed = 0;
86  }
87 
88  public function conclusions() {
89  $this->error( "{$this->failed} failed revisions out of {$this->count}" );
90  if ( $this->count > 0 ) {
91  $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
92  }
93  }
94 
95  function stripParameters( $text ) {
96  if ( !$this->stripParametersEnabled ) {
97  return $text;
98  }
99  return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
100  }
101 
106  public function processRevision( $rev ) {
107  $title = $rev->getTitle();
108 
109  $parser1Name = $this->getOption( 'parser1' );
110  $parser2Name = $this->getOption( 'parser2' );
111 
112  self::checkParserLocally( $parser1Name );
113  self::checkParserLocally( $parser2Name );
114 
115  $parser1 = new $parser1Name();
116  $parser2 = new $parser2Name();
117 
118  $content = $rev->getContent();
119 
120  if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
121  $this->error( "Page {$title->getPrefixedText()} does not contain wikitext but {$content->getModel()}\n" );
122  return;
123  }
124 
125  $text = strval( $content->getNativeData() );
126 
127  $output1 = $parser1->parse( $text, $title, $this->options );
128  $output2 = $parser2->parse( $text, $title, $this->options );
129 
130  if ( $output1->getText() != $output2->getText() ) {
131  $this->failed++;
132  $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
133 
134  if ( $this->saveFailed ) {
135  file_put_contents( $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt", $text );
136  }
137  if ( $this->showDiff ) {
138  $this->output( wfDiff( $this->stripParameters( $output1->getText() ), $this->stripParameters( $output2->getText() ), '' ) );
139  }
140  } else {
141  $this->output( $title->getPrefixedText() . "\tOK\n" );
142  if ( $this->showParsedOutput ) {
143  $this->output( $this->stripParameters( $output1->getText() ) );
144  }
145  }
146  }
147 
148  private static function checkParserLocally( $parserName ) {
149  /* Look for the parser in a file appropiately named in the current folder */
150  if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
152  $wgAutoloadClasses[ $parserName ] = realpath( '.' ) . "/$parserName.php";
153  }
154  }
155 
156 }
157 
158 $maintClass = "CompareParsers";
159 require_once RUN_MAINTENANCE_IF_MAIN;
CompareParsers\$count
$count
Definition: compareParsers.php:41
CompareParsers\checkOptions
checkOptions()
Definition: compareParsers.php:57
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
wfDiff
wfDiff( $before, $after, $params='-u')
Returns unified plain-text diff of two texts.
Definition: GlobalFunctions.php:3164
CompareParsers\stripParameters
stripParameters( $text)
Definition: compareParsers.php:95
$wgAutoloadClasses
global $wgAutoloadClasses
Definition: TestsAutoLoader.php:24
CompareParsers\checkParserLocally
static checkParserLocally( $parserName)
Definition: compareParsers.php:148
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false)
Add a parameter to the script.
Definition: Maintenance.php:169
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
CONTENT_MODEL_WIKITEXT
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:283
CompareParsers\conclusions
conclusions()
Definition: compareParsers.php:88
$maintClass
$maintClass
Definition: compareParsers.php:158
CompareParsers\__construct
__construct()
Default constructor.
Definition: compareParsers.php:43
CompareParsers\processRevision
processRevision( $rev)
Callback function for each revision, parse with both parsers and compare.
Definition: compareParsers.php:106
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
options
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing options(say) and put it in one place. Instead of having little title-reversing if-blocks spread all over the codebase in showAnArticle
$user
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a account $user
Definition: hooks.txt:237
DumpIterator
Base class for interating over a dump.
Definition: dumpIterator.php:36
$rev
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1337
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:191
CompareParsers
Maintenance script to take page text out of an XML dump file and render basic HTML out to files.
Definition: compareParsers.php:39
User
User
Definition: All_system_messages.txt:425
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:333
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:314
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:181
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:375