MediaWiki REL1_40
testCompression.php
Go to the documentation of this file.
1<?php
28
29$optionsWithArgs = [ 'start', 'limit', 'type' ];
30require __DIR__ . '/../CommandLineInc.php';
31
32if ( !isset( $args[0] ) ) {
33 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
34 "[--limit=<num-revs>] <page-title>\n";
35 exit( 1 );
36}
37
38$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
39$title = Title::newFromText( $args[0] );
40if ( isset( $options['start'] ) ) {
41 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
42 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
43} else {
44 $start = '19700101000000';
45}
46if ( isset( $options['limit'] ) ) {
47 $limit = $options['limit'];
48 $untilHappy = false;
49} else {
50 $limit = 1000;
52}
53$type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
54
56$revStore = MediaWikiServices::getInstance()->getRevisionStore();
57$revQuery = $revStore->getQueryInfo( [ 'page' ] );
58$res = $dbr->select(
59 $revQuery['tables'],
60 $revQuery['fields'],
61 [
62 'page_namespace' => $title->getNamespace(),
63 'page_title' => $title->getDBkey(),
64 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
65 ],
66 __FILE__,
67 [ 'LIMIT' => $limit ],
68 $revQuery['joins']
69);
70
73$keys = [];
75$t = -microtime( true );
76foreach ( $res as $row ) {
77 $revRecord = $revStore->newRevisionFromRow( $row );
78 $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
79 ->getContent()
80 ->serialize();
81 $uncompressedSize += strlen( $text );
82 $hashes[$row->rev_id] = md5( $text );
83 $keys[$row->rev_id] = $blob->addItem( $text );
84 if ( $untilHappy && !$blob->isHappy() ) {
85 break;
86 }
87}
88
89$serialized = serialize( $blob );
90$t += microtime( true );
91# print_r( $blob->mDiffMap );
92
93printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
94 $type,
95 count( $hashes ),
97 $lang->formatSize( $uncompressedSize ),
98 strlen( $serialized )
99);
100printf( "Compression time: %5.2f ms\n", $t * 1000 );
101
102$t = -microtime( true );
103$blob = unserialize( $serialized );
104foreach ( $keys as $id => $key ) {
105 $text = $blob->getItem( $key );
106 if ( md5( $text ) != $hashes[$id] ) {
107 echo "Content hash mismatch for rev_id $id\n";
108 # var_dump( $text );
109 }
110}
111$t += microtime( true );
112printf( "Decompression time: %5.2f ms\n", $t * 1000 );
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Service locator for MediaWiki core services.
Page revision base class.
Value object representing a content slot associated with a page revision.
Represents a title within MediaWiki.
Definition Title.php:82
const DB_REPLICA
Definition defines.php:26
foreach( $res as $row) $serialized
$optionsWithArgs
if(!isset( $args[0])) $lang
$uncompressedSize
$untilHappy