MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
25 
26 $optionsWithArgs = [ 'start', 'limit', 'type' ];
27 require __DIR__ . '/../commandLine.inc';
28 
29 if ( !isset( $args[0] ) ) {
30  echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
31  "[--limit=<num-revs>] <page-title>\n";
32  exit( 1 );
33 }
34 
35 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
37 if ( isset( $options['start'] ) ) {
38  $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
39  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
40 } else {
41  $start = '19700101000000';
42 }
43 if ( isset( $options['limit'] ) ) {
44  $limit = $options['limit'];
45  $untilHappy = false;
46 } else {
47  $limit = 1000;
48  $untilHappy = true;
49 }
50 $type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
51 
54 $res = $dbr->select(
55  $revQuery['tables'],
56  $revQuery['fields'],
57  [
58  'page_namespace' => $title->getNamespace(),
59  'page_title' => $title->getDBkey(),
60  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
61  ],
62  __FILE__,
63  [ 'LIMIT' => $limit ],
64  $revQuery['joins']
65 );
66 
67 $blob = new $type;
68 $hashes = [];
69 $keys = [];
71 $t = -microtime( true );
72 foreach ( $res as $row ) {
73  $revision = new Revision( $row );
74  $text = $revision->getSerializedData();
75  $uncompressedSize += strlen( $text );
76  $hashes[$row->rev_id] = md5( $text );
77  $keys[$row->rev_id] = $blob->addItem( $text );
78  if ( $untilHappy && !$blob->isHappy() ) {
79  break;
80  }
81 }
82 
84 $t += microtime( true );
85 # print_r( $blob->mDiffMap );
86 
87 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
88  $type,
89  count( $hashes ),
90  $uncompressedSize / strlen( $serialized ),
91  $lang->formatSize( $uncompressedSize ),
92  strlen( $serialized )
93 );
94 printf( "Compression time: %5.2f ms\n", $t * 1000 );
95 
96 $t = -microtime( true );
98 foreach ( $keys as $id => $key ) {
99  $text = $blob->getItem( $key );
100  if ( md5( $text ) != $hashes[$id] ) {
101  echo "Content hash mismatch for rev_id $id\n";
102  # var_dump( $text );
103  }
104 }
105 $t += microtime( true );
106 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
$optionsWithArgs
if(!isset( $args[0])) $lang
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
$untilHappy
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
if( $line===false) $args
Definition: mcc.php:124
unserialize( $serialized)
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object...
Definition: Revision.php:316
$revQuery
$uncompressedSize
const DB_REPLICA
Definition: defines.php:25
foreach( $res as $row) $serialized
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:317