MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
24 $optionsWithArgs = [ 'start', 'limit', 'type' ];
25 require __DIR__ . '/../commandLine.inc';
26 
27 if ( !isset( $args[0] ) ) {
28  echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
29  "[--limit=<num-revs>] <page-title>\n";
30  exit( 1 );
31 }
32 
35 if ( isset( $options['start'] ) ) {
36  $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
37  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
38 } else {
39  $start = '19700101000000';
40 }
41 if ( isset( $options['limit'] ) ) {
42  $limit = $options['limit'];
43  $untilHappy = false;
44 } else {
45  $limit = 1000;
46  $untilHappy = true;
47 }
48 $type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
49 
52 $res = $dbr->select(
53  $revQuery['tables'],
54  $revQuery['fields'],
55  [
56  'page_namespace' => $title->getNamespace(),
57  'page_title' => $title->getDBkey(),
58  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
59  ],
60  __FILE__,
61  [ 'LIMIT' => $limit ],
62  $revQuery['joins']
63 );
64 
65 $blob = new $type;
66 $hashes = [];
67 $keys = [];
69 $t = -microtime( true );
70 foreach ( $res as $row ) {
71  $revision = new Revision( $row );
72  $text = $revision->getSerializedData();
73  $uncompressedSize += strlen( $text );
74  $hashes[$row->rev_id] = md5( $text );
75  $keys[$row->rev_id] = $blob->addItem( $text );
76  if ( $untilHappy && !$blob->isHappy() ) {
77  break;
78  }
79 }
80 
82 $t += microtime( true );
83 # print_r( $blob->mDiffMap );
84 
85 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
86  $type,
87  count( $hashes ),
88  $uncompressedSize / strlen( $serialized ),
89  $lang->formatSize( $uncompressedSize ),
90  strlen( $serialized )
91 );
92 printf( "Compression time: %5.2f ms\n", $t * 1000 );
93 
94 $t = -microtime( true );
96 foreach ( $keys as $id => $key ) {
97  $text = $blob->getItem( $key );
98  if ( md5( $text ) != $hashes[$id] ) {
99  echo "Content hash mismatch for rev_id $id\n";
100  # var_dump( $text );
101  }
102 }
103 $t += microtime( true );
104 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
$optionsWithArgs
if(!isset( $args[0])) $lang
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
$untilHappy
if( $line===false) $args
Definition: cdb.php:64
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
unserialize( $serialized)
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object...
Definition: Revision.php:315
static factory( $code)
Get a cached or new language object for a given language code.
Definition: Language.php:218
$revQuery
$uncompressedSize
const DB_REPLICA
Definition: defines.php:25
foreach( $res as $row) $serialized
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:316