MediaWiki  master
compareParserCache.php
Go to the documentation of this file.
1 <?php
22 require_once __DIR__ . '/Maintenance.php';
23 
27 
32  public function __construct() {
33  parent::__construct();
34  $this->addDescription( 'Parse random pages and compare output to cache.' );
35  $this->addOption( 'namespace', 'Page namespace number', true, true );
36  $this->addOption( 'maxpages', 'Number of pages to try', true, true );
37  }
38 
39  public function execute() {
40  $pages = $this->getOption( 'maxpages' );
41 
42  $dbr = $this->getDB( DB_REPLICA );
43 
44  $totalsec = 0.0;
45  $scanned = 0;
46  $withcache = 0;
47  $withdiff = 0;
48  $services = $this->getServiceContainer();
49  $parserCache = $services->getParserCache();
50  $renderer = $services->getRevisionRenderer();
51  $wikiPageFactory = $services->getWikiPageFactory();
52  while ( $pages-- > 0 ) {
53  $row = $dbr->newSelectQueryBuilder()
54  // @todo Title::selectFields() or Title::getQueryInfo() or something
55  ->select( [
56  'page_namespace',
57  'page_title',
58  'page_id',
59  'page_len',
60  'page_is_redirect',
61  'page_latest',
62  ] )
63  ->from( 'page' )
64  ->where( [
65  'page_namespace' => $this->getOption( 'namespace' ),
66  'page_is_redirect' => 0,
67  'page_random >= ' . wfRandom()
68  ] )
69  ->orderBy( 'page_random' )
70  ->caller( __METHOD__ )->fetchRow();
71 
72  if ( !$row ) {
73  continue;
74  }
75  ++$scanned;
76 
77  $title = Title::newFromRow( $row );
78  $page = $wikiPageFactory->newFromTitle( $title );
79  $revision = $page->getRevisionRecord();
80  $parserOptions = $page->makeParserOptions( 'canonical' );
81 
82  $parserOutputOld = $parserCache->get( $page, $parserOptions );
83 
84  if ( $parserOutputOld ) {
85  $t1 = microtime( true );
86  $parserOutputNew = $renderer->getRenderedRevision( $revision, $parserOptions )
87  ->getRevisionParserOutput();
88 
89  $sec = microtime( true ) - $t1;
90  $totalsec += $sec;
91 
92  $this->output( "Parsed '{$title->getPrefixedText()}' in $sec seconds.\n" );
93 
94  $this->output( "Found cache entry found for '{$title->getPrefixedText()}'..." );
95 
96  $oldHtml = trim( preg_replace( '#<!-- .+-->#Us', '', $parserOutputOld->getText() ) );
97  $newHtml = trim( preg_replace( '#<!-- .+-->#Us', '', $parserOutputNew->getText() ) );
98  $diffs = new Diff( explode( "\n", $oldHtml ), explode( "\n", $newHtml ) );
99  $formatter = new UnifiedDiffFormatter();
100  $unifiedDiff = $formatter->format( $diffs );
101 
102  if ( strlen( $unifiedDiff ) ) {
103  $this->output( "differences found:\n\n$unifiedDiff\n\n" );
104  ++$withdiff;
105  } else {
106  $this->output( "No differences found.\n" );
107  }
108  ++$withcache;
109  } else {
110  $this->output( "No parser cache entry found for '{$title->getPrefixedText()}'.\n" );
111  }
112  }
113 
114  $ave = $totalsec ? $totalsec / $scanned : 0;
115  $this->output( "Checked $scanned pages; $withcache had prior cache entries.\n" );
116  $this->output( "Pages with differences found: $withdiff\n" );
117  $this->output( "Average parse time: $ave sec\n" );
118  }
119 }
120 
121 $maintClass = CompareParserCache::class;
122 require_once RUN_MAINTENANCE_IF_MAIN;
wfRandom()
Get a random decimal value in the domain of [0, 1), in a way not likely to give duplicate values for ...
execute()
Do the actual work.
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Represents a title within MediaWiki.
Definition: Title.php:76
Class representing a 'diff' between two sequences of strings.
Definition: Diff.php:34
A formatter that outputs unified diffs.
const DB_REPLICA
Definition: defines.php:26