MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
27 
28 $optionsWithArgs = [ 'start', 'limit', 'type' ];
29 require __DIR__ . '/../CommandLineInc.php';
30 
31 if ( !isset( $args[0] ) ) {
32  echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
33  "[--limit=<num-revs>] <page-title>\n";
34  exit( 1 );
35 }
36 
37 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
39 if ( isset( $options['start'] ) ) {
40  $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
41  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
42 } else {
43  $start = '19700101000000';
44 }
45 if ( isset( $options['limit'] ) ) {
46  $limit = $options['limit'];
47  $untilHappy = false;
48 } else {
49  $limit = 1000;
50  $untilHappy = true;
51 }
52 $type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
53 
55 $revStore = MediaWikiServices::getInstance()->getRevisionStore();
56 $revQuery = $revStore->getQueryInfo( [ 'page' ] );
57 $res = $dbr->select(
58  $revQuery['tables'],
59  $revQuery['fields'],
60  [
61  'page_namespace' => $title->getNamespace(),
62  'page_title' => $title->getDBkey(),
63  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
64  ],
65  __FILE__,
66  [ 'LIMIT' => $limit ],
67  $revQuery['joins']
68 );
69 
70 $blob = new $type;
71 $hashes = [];
72 $keys = [];
74 $t = -microtime( true );
75 foreach ( $res as $row ) {
76  $revRecord = $revStore->newRevisionFromRow( $row );
77  $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
78  ->getContent()
79  ->serialize();
80  $uncompressedSize += strlen( $text );
81  $hashes[$row->rev_id] = md5( $text );
82  $keys[$row->rev_id] = $blob->addItem( $text );
83  if ( $untilHappy && !$blob->isHappy() ) {
84  break;
85  }
86 }
87 
89 $t += microtime( true );
90 # print_r( $blob->mDiffMap );
91 
92 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
93  $type,
94  count( $hashes ),
95  $uncompressedSize / strlen( $serialized ),
96  $lang->formatSize( $uncompressedSize ),
97  strlen( $serialized )
98 );
99 printf( "Compression time: %5.2f ms\n", $t * 1000 );
100 
101 $t = -microtime( true );
103 foreach ( $keys as $id => $key ) {
104  $text = $blob->getItem( $key );
105  if ( md5( $text ) != $hashes[$id] ) {
106  echo "Content hash mismatch for rev_id $id\n";
107  # var_dump( $text );
108  }
109 }
110 $t += microtime( true );
111 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
serialize()
unserialize( $serialized)
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Service locator for MediaWiki core services.
Page revision base class.
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:370
if( $line===false) $args
Definition: mcc.php:124
const DB_REPLICA
Definition: defines.php:26
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$revStore
$revQuery
$uncompressedSize
$untilHappy