MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
27 
28 require_once __DIR__ . '/../Maintenance.php';
29 
31  public function __construct() {
32  parent::__construct();
33 
34  $this->addArg( 'title', 'The page to test' );
35 
36  $this->addOption( 'type', 'The HistoryBlob subclass to use', false, true );
37  $this->addOption( 'start', 'The start date', false, true );
38  $this->addOption( 'limit', 'Maximum number of revisions to process', false, true );
39  }
40 
41  public function execute() {
42  $lang = $this->getServiceContainer()->getLanguageFactory()->getLanguage( 'en' );
43  $title = Title::newFromText( $this->getArg( 0 ) );
44 
45  if ( $this->hasOption( 'start' ) ) {
46  $start = wfTimestamp( TS_MW, strtotime( $this->getOption( 'start' ) ) );
47  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
48  } else {
49  $start = '19700101000000';
50  }
51  if ( $this->hasOption( 'limit' ) ) {
52  $limit = $this->getOption( 'limit' );
53  $untilHappy = false;
54  } else {
55  $limit = 1000;
56  $untilHappy = true;
57  }
58  $type = $this->getOption( 'type', ConcatenatedGzipHistoryBlob::class );
59 
60  $dbr = $this->getDB( DB_REPLICA );
61 
62  $revStore = $this->getServiceContainer()->getRevisionStore();
63  $res = $revStore->newSelectQueryBuilder( $dbr )
64  ->joinComment()
65  ->joinPage()
66  ->where( [
67  'page_namespace' => $title->getNamespace(),
68  'page_title' => $title->getDBkey(),
69  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
70  ] )
71  ->limit( $limit )
72  ->caller( __FILE__ )->fetchResultSet();
73 
74  $blob = new $type;
75  $hashes = [];
76  $keys = [];
77  $uncompressedSize = 0;
78  $t = -microtime( true );
79  foreach ( $res as $row ) {
80  $revRecord = $revStore->newRevisionFromRow( $row );
81  $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
82  ->getContent()
83  ->serialize();
84  $uncompressedSize += strlen( $text );
85  $hashes[$row->rev_id] = md5( $text );
86  $keys[$row->rev_id] = $blob->addItem( $text );
87  if ( $untilHappy && !$blob->isHappy() ) {
88  break;
89  }
90  }
91 
92  $serialized = serialize( $blob );
93  $t += microtime( true );
94  # print_r( $blob->mDiffMap );
95 
96  printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
97  $type,
98  count( $hashes ),
99  $uncompressedSize / strlen( $serialized ),
100  $lang->formatSize( $uncompressedSize ),
101  strlen( $serialized )
102  );
103  printf( "Compression time: %5.2f ms\n", $t * 1000 );
104 
105  $t = -microtime( true );
106  $blob = unserialize( $serialized );
107  foreach ( $keys as $id => $key ) {
108  $text = $blob->getItem( $key );
109  if ( md5( $text ) != $hashes[$id] ) {
110  echo "Content hash mismatch for rev_id $id\n";
111  # var_dump( $text );
112  }
113  }
114  $t += microtime( true );
115  printf( "Decompression time: %5.2f ms\n", $t * 1000 );
116  }
117 }
118 
119 $maintClass = TestCompression::class;
120 require_once RUN_MAINTENANCE_IF_MAIN;
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Page revision base class.
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
Represents a title within MediaWiki.
Definition: Title.php:76
execute()
Do the actual work.
__construct()
Default constructor.
const DB_REPLICA
Definition: defines.php:26
$maintClass