MediaWiki master
testCompression.php
Go to the documentation of this file.
1<?php
14use Wikimedia\Timestamp\TimestampFormat as TS;
15
16// @codeCoverageIgnoreStart
17require_once __DIR__ . '/../Maintenance.php';
18// @codeCoverageIgnoreEnd
19
21 public function __construct() {
22 parent::__construct();
23
24 $this->addArg( 'title', 'The page to test' );
25
26 $this->addOption( 'type', 'The HistoryBlob subclass to use', false, true );
27 $this->addOption( 'start', 'The start date', false, true );
28 $this->addOption( 'limit', 'Maximum number of revisions to process', false, true );
29 }
30
31 public function execute() {
32 $lang = $this->getServiceContainer()->getLanguageFactory()->getLanguage( 'en' );
33 $title = Title::newFromText( $this->getArg( 0 ) );
34
35 if ( $this->hasOption( 'start' ) ) {
36 $start = wfTimestamp( TS::MW, strtotime( $this->getOption( 'start' ) ) );
37 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
38 } else {
39 $start = '19700101000000';
40 }
41 if ( $this->hasOption( 'limit' ) ) {
42 $limit = $this->getOption( 'limit' );
43 $untilHappy = false;
44 } else {
45 $limit = 1000;
46 $untilHappy = true;
47 }
48 $type = $this->getOption( 'type', ConcatenatedGzipHistoryBlob::class );
49
50 $dbr = $this->getReplicaDB();
51
52 $revStore = $this->getServiceContainer()->getRevisionStore();
53 $res = $revStore->newSelectQueryBuilder( $dbr )
54 ->joinComment()
55 ->joinPage()
56 ->where( [
57 'page_namespace' => $title->getNamespace(),
58 'page_title' => $title->getDBkey(),
59 $dbr->expr( 'rev_timestamp', '>', $dbr->timestamp( $start ) ),
60 ] )
61 ->limit( $limit )
62 ->caller( __FILE__ )->fetchResultSet();
63
64 $blob = new $type;
65 $hashes = [];
66 $keys = [];
67 $uncompressedSize = 0;
68 $t = -microtime( true );
69 foreach ( $res as $row ) {
70 $revRecord = $revStore->newRevisionFromRow( $row );
71 $text = $revRecord->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )
72 ->getContent()
73 ->serialize();
74 $uncompressedSize += strlen( $text );
75 $hashes[$row->rev_id] = md5( $text );
76 $keys[$row->rev_id] = $blob->addItem( $text );
77 if ( $untilHappy && !$blob->isHappy() ) {
78 break;
79 }
80 }
81
82 $serialized = serialize( $blob );
83 $t += microtime( true );
84 # print_r( $blob->mDiffMap );
85
86 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
87 $type,
88 count( $hashes ),
89 $uncompressedSize / strlen( $serialized ),
90 $lang->formatSize( $uncompressedSize ),
91 strlen( $serialized )
92 );
93 printf( "Compression time: %5.2f ms\n", $t * 1000 );
94
95 $t = -microtime( true );
96 $blob = unserialize( $serialized );
97 foreach ( $keys as $id => $key ) {
98 $text = $blob->getItem( $key );
99 if ( md5( $text ) != $hashes[$id] ) {
100 echo "Content hash mismatch for rev_id $id\n";
101 # var_dump( $text );
102 }
103 }
104 $t += microtime( true );
105 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
106 }
107}
108
109// @codeCoverageIgnoreStart
110$maintClass = TestCompression::class;
111require_once RUN_MAINTENANCE_IF_MAIN;
112// @codeCoverageIgnoreEnd
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getReplicaDB(string|false $virtualDomain=false)
getServiceContainer()
Returns the main service container.
Page revision base class.
Value object representing a content slot associated with a page revision.
Represents a title within MediaWiki.
Definition Title.php:69
execute()
Do the actual work.
__construct()
Default constructor.