MediaWiki  master
testCompression.php
Go to the documentation of this file.
1 <?php
27 
28 $optionsWithArgs = [ 'start', 'limit', 'type' ];
29 require __DIR__ . '/../commandLine.inc';
30 
31 if ( !isset( $args[0] ) ) {
32  echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
33  "[--limit=<num-revs>] <page-title>\n";
34  exit( 1 );
35 }
36 
37 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
39 if ( isset( $options['start'] ) ) {
40  $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
41  echo "Starting from " . $lang->timeanddate( $start ) . "\n";
42 } else {
43  $start = '19700101000000';
44 }
45 if ( isset( $options['limit'] ) ) {
46  $limit = $options['limit'];
47  $untilHappy = false;
48 } else {
49  $limit = 1000;
50  $untilHappy = true;
51 }
52 $type = $options['type'] ?? ConcatenatedGzipHistoryBlob::class;
53 
55 $revStore = MediaWikiServices::getInstance()->getRevisionStore();
56 $revQuery = $revStore->getQueryInfo( [ 'page' ] );
57 $res = $dbr->select(
58  $revQuery['tables'],
59  $revQuery['fields'],
60  [
61  'page_namespace' => $title->getNamespace(),
62  'page_title' => $title->getDBkey(),
63  'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
64  ],
65  __FILE__,
66  [ 'LIMIT' => $limit ],
67  $revQuery['joins']
68 );
69 
70 $blob = new $type;
71 $hashes = [];
72 $keys = [];
74 $t = -microtime( true );
75 foreach ( $res as $row ) {
76  $revRecord = $revStore->newRevisionFromRow( $row );
77  $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
78  ->getContent()
79  ->serialize();
80  $uncompressedSize += strlen( $text );
81  $hashes[$row->rev_id] = md5( $text );
82  $keys[$row->rev_id] = $blob->addItem( $text );
83  if ( $untilHappy && !$blob->isHappy() ) {
84  break;
85  }
86 }
87 
89 $t += microtime( true );
90 # print_r( $blob->mDiffMap );
91 
92 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
93  $type,
94  count( $hashes ),
95  $uncompressedSize / strlen( $serialized ),
96  $lang->formatSize( $uncompressedSize ),
97  strlen( $serialized )
98 );
99 printf( "Compression time: %5.2f ms\n", $t * 1000 );
100 
101 $t = -microtime( true );
103 foreach ( $keys as $id => $key ) {
104  $text = $blob->getItem( $key );
105  if ( md5( $text ) != $hashes[$id] ) {
106  echo "Content hash mismatch for rev_id $id\n";
107  # var_dump( $text );
108  }
109 }
110 $t += microtime( true );
111 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:332
Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:46
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:144
$lang
if(!isset( $args[0])) $lang
Definition: testCompression.php:37
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1806
$serialized
foreach( $res as $row) $serialized
Definition: testCompression.php:88
$res
$res
Definition: testCompression.php:57
serialize
serialize()
Definition: ApiMessageTrait.php:138
$revQuery
$revQuery
Definition: testCompression.php:56
$dbr
$dbr
Definition: testCompression.php:54
$blob
$blob
Definition: testCompression.php:70
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2463
$args
if( $line===false) $args
Definition: mcc.php:124
$title
$title
Definition: testCompression.php:38
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$revStore
$revStore
Definition: testCompression.php:55
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:146
$keys
$keys
Definition: testCompression.php:72
$uncompressedSize
$uncompressedSize
Definition: testCompression.php:73
$hashes
$hashes
Definition: testCompression.php:71
$t
$t
Definition: testCompression.php:74
$optionsWithArgs
$optionsWithArgs
Definition: testCompression.php:28
Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:39
$untilHappy
$untilHappy
Definition: testCompression.php:50
$type
$type
Definition: testCompression.php:52