MediaWiki REL1_33
testCompression.php
Go to the documentation of this file.
1<?php
24$optionsWithArgs = [ 'start', 'limit', 'type' ];
25require __DIR__ . '/../commandLine.inc';
26
27if ( !isset( $args[0] ) ) {
28 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
29 "[--limit=<num-revs>] <page-title>\n";
30 exit( 1 );
31}
32
33$lang = Language::factory( 'en' );
34$title = Title::newFromText( $args[0] );
35if ( isset( $options['start'] ) ) {
36 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
37 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
38} else {
39 $start = '19700101000000';
40}
41if ( isset( $options['limit'] ) ) {
42 $limit = $options['limit'];
43 $untilHappy = false;
44} else {
45 $limit = 1000;
47}
48$type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
49
50$dbr = $this->getDB( DB_REPLICA );
51$revQuery = Revision::getQueryInfo( [ 'page', 'text' ] );
52$res = $dbr->select(
53 $revQuery['tables'],
54 $revQuery['fields'],
55 [
56 'page_namespace' => $title->getNamespace(),
57 'page_title' => $title->getDBkey(),
58 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
59 ],
60 __FILE__,
61 [ 'LIMIT' => $limit ],
62 $revQuery['joins']
63);
64
67$keys = [];
69$t = -microtime( true );
70foreach ( $res as $row ) {
71 $revision = new Revision( $row );
72 $text = $revision->getSerializedData();
73 $uncompressedSize += strlen( $text );
74 $hashes[$row->rev_id] = md5( $text );
75 $keys[$row->rev_id] = $blob->addItem( $text );
76 if ( $untilHappy && !$blob->isHappy() ) {
77 break;
78 }
79}
80
82$t += microtime( true );
83# print_r( $blob->mDiffMap );
84
85printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
86 $type,
87 count( $hashes ),
89 $lang->formatSize( $uncompressedSize ),
90 strlen( $serialized )
91);
92printf( "Compression time: %5.2f ms\n", $t * 1000 );
93
94$t = -microtime( true );
96foreach ( $keys as $id => $key ) {
97 $text = $blob->getItem( $key );
98 if ( md5( $text ) != $hashes[$id] ) {
99 echo "Content hash mismatch for rev_id $id\n";
100 # var_dump( $text );
101 }
102}
103$t += microtime( true );
104printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
unserialize( $serialized)
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
if( $line===false) $args
Definition cdb.php:64
static getQueryInfo( $options=[])
Return the tables, fields, and join conditions to be selected to create a new revision object.
Definition Revision.php:511
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
const DB_REPLICA
Definition defines.php:25
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$uncompressedSize
$untilHappy