MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
28 
29 $optionsWithArgs = [ 'start', 'limit', 'type' ];
30 require __DIR__ . '/../CommandLineInc.php';
31 
32 if ( !isset( $args[0] ) ) {
33  echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
34  "[--limit=<num-revs>] <page-title>\n";
35  exit( 1 );
36 }
37 
38 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
39 $title = Title::newFromText( $args[0] );
40 if ( isset( $options['start'] ) ) {
41  $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
42  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
43 } else {
44  $start = '19700101000000';
45 }
46 if ( isset( $options['limit'] ) ) {
47  $limit = $options['limit'];
48  $untilHappy = false;
49 } else {
50  $limit = 1000;
51  $untilHappy = true;
52 }
53 $type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
54 
56 $revStore = MediaWikiServices::getInstance()->getRevisionStore();
57 $revQuery = $revStore->getQueryInfo( [ 'page' ] );
58 $res = $dbr->select(
59  $revQuery['tables'],
60  $revQuery['fields'],
61  [
62  'page_namespace' => $title->getNamespace(),
63  'page_title' => $title->getDBkey(),
64  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
65  ],
66  __FILE__,
67  [ 'LIMIT' => $limit ],
68  $revQuery['joins']
69 );
70 
71 $blob = new $type;
72 $hashes = [];
73 $keys = [];
75 $t = -microtime( true );
76 foreach ( $res as $row ) {
77  $revRecord = $revStore->newRevisionFromRow( $row );
78  $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
79  ->getContent()
80  ->serialize();
81  $uncompressedSize += strlen( $text );
82  $hashes[$row->rev_id] = md5( $text );
83  $keys[$row->rev_id] = $blob->addItem( $text );
84  if ( $untilHappy && !$blob->isHappy() ) {
85  break;
86  }
87 }
88 
89 $serialized = serialize( $blob );
90 $t += microtime( true );
91 # print_r( $blob->mDiffMap );
92 
93 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
94  $type,
95  count( $hashes ),
96  $uncompressedSize / strlen( $serialized ),
97  $lang->formatSize( $uncompressedSize ),
98  strlen( $serialized )
99 );
100 printf( "Compression time: %5.2f ms\n", $t * 1000 );
101 
102 $t = -microtime( true );
103 $blob = unserialize( $serialized );
104 foreach ( $keys as $id => $key ) {
105  $text = $blob->getItem( $key );
106  if ( md5( $text ) != $hashes[$id] ) {
107  echo "Content hash mismatch for rev_id $id\n";
108  # var_dump( $text );
109  }
110 }
111 $t += microtime( true );
112 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Service locator for MediaWiki core services.
Page revision base class.
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
Represents a title within MediaWiki.
Definition: Title.php:82
const DB_REPLICA
Definition: defines.php:26
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$revStore
$revQuery
$uncompressedSize
$untilHappy