MediaWiki REL1_31
testCompression.php
Go to the documentation of this file.
1<?php
24$optionsWithArgs = [ 'start', 'limit', 'type' ];
25require __DIR__ . '/../commandLine.inc';
26
27if ( !isset( $args[0] ) ) {
28 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
29 "[--limit=<num-revs>] <page-title>\n";
30 exit( 1 );
31}
32
33$lang = Language::factory( 'en' );
34$title = Title::newFromText( $args[0] );
35if ( isset( $options['start'] ) ) {
36 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
37 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
38} else {
39 $start = '19700101000000';
40}
41if ( isset( $options['limit'] ) ) {
42 $limit = $options['limit'];
43 $untilHappy = false;
44} else {
45 $limit = 1000;
47}
48$type = isset( $options['type'] ) ? $options['type'] : ConcatenatedGzipHistoryBlob::class;
49
50$dbr = $this->getDB( DB_REPLICA );
51$revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
52$res = $dbr->select(
53 $revQuery['tables'],
54 $revQuery['fields'],
55 [
56 'page_namespace' => $title->getNamespace(),
57 'page_title' => $title->getDBkey(),
58 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
59 ],
60 __FILE__,
61 [ 'LIMIT' => $limit ],
62 $revQuery['joins']
63);
64
67$keys = [];
69$t = -microtime( true );
70foreach ( $res as $row ) {
71 $revision = new Revision( $row );
72 $text = $revision->getSerializedData();
73 $uncompressedSize += strlen( $text );
74 $hashes[$row->rev_id] = md5( $text );
75 $keys[$row->rev_id] = $blob->addItem( $text );
76 if ( $untilHappy && !$blob->isHappy() ) {
77 break;
78 }
79}
80
82$t += microtime( true );
83# print_r( $blob->mDiffMap );
84
85printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
86 $type,
87 count( $hashes ),
89 $lang->formatSize( $uncompressedSize ),
90 strlen( $serialized )
91);
92printf( "Compression time: %5.2f ms\n", $t * 1000 );
93
94$t = -microtime( true );
96foreach ( $keys as $id => $key ) {
97 $text = $blob->getItem( $key );
98 if ( md5( $text ) != $hashes[$id] ) {
99 echo "Content hash mismatch for rev_id $id\n";
100 # var_dump( $text );
101 }
102}
103$t += microtime( true );
104printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
unserialize( $serialized)
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
if( $line===false) $args
Definition cdb.php:64
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
const DB_REPLICA
Definition defines.php:25
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$uncompressedSize
$untilHappy