MediaWiki REL1_35
testCompression.php
Go to the documentation of this file.
1<?php
27
28$optionsWithArgs = [ 'start', 'limit', 'type' ];
29require __DIR__ . '/../commandLine.inc';
30
31if ( !isset( $args[0] ) ) {
32 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
33 "[--limit=<num-revs>] <page-title>\n";
34 exit( 1 );
35}
36
37$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
38$title = Title::newFromText( $args[0] );
39if ( isset( $options['start'] ) ) {
40 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
41 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
42} else {
43 $start = '19700101000000';
44}
45if ( isset( $options['limit'] ) ) {
46 $limit = $options['limit'];
47 $untilHappy = false;
48} else {
49 $limit = 1000;
51}
52$type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
53
55$revStore = MediaWikiServices::getInstance()->getRevisionStore();
56$revQuery = $revStore->getQueryInfo( [ 'page' ] );
57$res = $dbr->select(
58 $revQuery['tables'],
59 $revQuery['fields'],
60 [
61 'page_namespace' => $title->getNamespace(),
62 'page_title' => $title->getDBkey(),
63 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
64 ],
65 __FILE__,
66 [ 'LIMIT' => $limit ],
67 $revQuery['joins']
68);
69
72$keys = [];
74$t = -microtime( true );
75foreach ( $res as $row ) {
76 $revRecord = $revStore->newRevisionFromRow( $row );
77 $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
78 ->getContent()
79 ->serialize();
80 $uncompressedSize += strlen( $text );
81 $hashes[$row->rev_id] = md5( $text );
82 $keys[$row->rev_id] = $blob->addItem( $text );
83 if ( $untilHappy && !$blob->isHappy() ) {
84 break;
85 }
86}
87
89$t += microtime( true );
90# print_r( $blob->mDiffMap );
91
92printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
93 $type,
94 count( $hashes ),
96 $lang->formatSize( $uncompressedSize ),
97 strlen( $serialized )
98);
99printf( "Compression time: %5.2f ms\n", $t * 1000 );
100
101$t = -microtime( true );
103foreach ( $keys as $id => $key ) {
104 $text = $blob->getItem( $key );
105 if ( md5( $text ) != $hashes[$id] ) {
106 echo "Content hash mismatch for rev_id $id\n";
107 # var_dump( $text );
108 }
109}
110$t += microtime( true );
111printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
unserialize( $serialized)
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Page revision base class.
Value object representing a content slot associated with a page revision.
if( $line===false) $args
Definition mcc.php:124
const DB_REPLICA
Definition defines.php:25
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$uncompressedSize
$untilHappy