Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 146
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
DiffHistoryBlob
0.00% covered (danger)
0.00%
0 / 146
0.00% covered (danger)
0.00%
0 / 13
1980
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 addItem
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
6
 getItem
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setText
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getText
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 compress
0.00% covered (danger)
0.00%
0 / 49
0.00% covered (danger)
0.00%
0 / 1
132
 diff
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 patch
0.00% covered (danger)
0.00%
0 / 33
0.00% covered (danger)
0.00%
0 / 1
90
 xdiffAdler32
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 uncompress
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
 __sleep
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
30
 __wakeup
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
30
 isHappy
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2/**
3 * Efficient concatenated text storage.
4 *
5 * @license GPL-2.0-or-later
6 * @file
7 */
8
9/**
10 * Diff-based history compression
11 * Requires xdiff and zlib
12 *
13 * WARNING: Objects of this class are serialized and permanently stored in the DB.
14 * Do not change the name or visibility of any property!
15 */
16class DiffHistoryBlob implements HistoryBlob {
17    /** @var string[] Uncompressed item cache */
18    public $mItems = [];
19
20    /** @var int Total uncompressed size */
21    public $mSize = 0;
22
23    /**
24     * @var array|null Array of diffs. If a diff D from A to B is notated D = B - A,
25     * and Z is an empty string:
26     *
27     *              { item[map[i]] - item[map[i-1]]   where i > 0
28     *    diff[i] = {
29     *              { item[map[i]] - Z                where i = 0
30     */
31    public $mDiffs;
32
33    /** @var array The diff map, see above */
34    public $mDiffMap;
35
36    /** @var string|null The key for getText()
37     */
38    public $mDefaultKey;
39
40    /** @var string|null Compressed storage */
41    public $mCompressed;
42
43    /** @var bool True if the object is locked against further writes */
44    public $mFrozen = false;
45
46    /**
47     * @var int The maximum uncompressed size before the object becomes sad
48     * Should be less than max_allowed_packet
49     */
50    public $mMaxSize = 10_000_000;
51
52    /** @var int The maximum number of text items before the object becomes sad */
53    public $mMaxCount = 100;
54
55    /** Constants from xdiff.h */
56    private const XDL_BDOP_INS = 1;
57    private const XDL_BDOP_CPY = 2;
58    private const XDL_BDOP_INSB = 3;
59
60    public function __construct() {
61        if ( !function_exists( 'gzdeflate' ) ) {
62            throw new RuntimeException( "Need zlib support to read or write DiffHistoryBlob\n" );
63        }
64    }
65
66    /**
67     * @param string $text
68     * @return string
69     */
70    public function addItem( $text ) {
71        if ( $this->mFrozen ) {
72            throw new BadMethodCallException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
73        }
74
75        $this->mItems[] = $text;
76        $this->mSize += strlen( $text );
77        $this->mDiffs = null; // later
78        return (string)( count( $this->mItems ) - 1 );
79    }
80
81    /**
82     * @param string $key
83     * @return string
84     */
85    public function getItem( $key ) {
86        return $this->mItems[(int)$key];
87    }
88
89    /**
90     * @param string $text
91     */
92    public function setText( $text ) {
93        $this->mDefaultKey = $this->addItem( $text );
94    }
95
96    /**
97     * @return string
98     */
99    public function getText() {
100        return $this->getItem( $this->mDefaultKey );
101    }
102
103    private function compress() {
104        if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
105            throw new RuntimeException( "Need xdiff support to write DiffHistoryBlob\n" );
106        }
107        if ( $this->mDiffs !== null ) {
108            // Already compressed
109            return;
110        }
111        if ( $this->mItems === [] ) {
112            return;
113        }
114
115        // Create two diff sequences: one for main text and one for small text
116        $sequences = [
117            'small' => [
118                'tail' => '',
119                'diffs' => [],
120                'map' => [],
121            ],
122            'main' => [
123                'tail' => '',
124                'diffs' => [],
125                'map' => [],
126            ],
127        ];
128        $smallFactor = 0.5;
129
130        $mItemsCount = count( $this->mItems );
131        for ( $i = 0; $i < $mItemsCount; $i++ ) {
132            $text = $this->mItems[$i];
133            if ( $i == 0 ) {
134                $seqName = 'main';
135            } else {
136                $mainTail = $sequences['main']['tail'];
137                if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
138                    $seqName = 'small';
139                } else {
140                    $seqName = 'main';
141                }
142            }
143
144            $tail = $sequences[$seqName]['tail'];
145            $diff = $this->diff( $tail, $text );
146            $sequences[$seqName]['diffs'][] = $diff;
147            $sequences[$seqName]['map'][] = $i;
148            $sequences[$seqName]['tail'] = $text;
149        }
150
151        // Knit the sequences together
152        $tail = '';
153        $this->mDiffs = [];
154        $this->mDiffMap = [];
155        foreach ( $sequences as $seq ) {
156            if ( $seq['diffs'] === [] ) {
157                continue;
158            }
159            if ( $tail === '' ) {
160                $this->mDiffs[] = $seq['diffs'][0];
161            } else {
162                $head = $this->patch( '', $seq['diffs'][0] );
163                $this->mDiffs[] = $this->diff( $tail, $head );
164            }
165            $this->mDiffMap[] = $seq['map'][0];
166            $diffsCount = count( $seq['diffs'] );
167            for ( $i = 1; $i < $diffsCount; $i++ ) {
168                $this->mDiffs[] = $seq['diffs'][$i];
169                $this->mDiffMap[] = $seq['map'][$i];
170            }
171            $tail = $seq['tail'];
172        }
173    }
174
175    /**
176     * @param string $t1
177     * @param string $t2
178     * @return string
179     */
180    private function diff( $t1, $t2 ) {
181        return xdiff_string_rabdiff( $t1, $t2 );
182    }
183
184    /**
185     * @param string $base
186     * @param string $diff
187     * @return bool|string
188     */
189    private function patch( $base, $diff ) {
190        if ( function_exists( 'xdiff_string_bpatch' ) ) {
191            return xdiff_string_bpatch( $base, $diff );
192        }
193
194        # Pure PHP implementation
195
196        $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
197
198        # Check the checksum
199        $ofp = $this->xdiffAdler32( $base );
200        if ( $ofp !== substr( $diff, 0, 4 ) ) {
201            wfDebug( __METHOD__ . ": incorrect base checksum" );
202            return false;
203        }
204        if ( $header['csize'] != strlen( $base ) ) {
205            wfDebug( __METHOD__ . ": incorrect base length" );
206            return false;
207        }
208
209        $p = 8;
210        $out = '';
211        while ( $p < strlen( $diff ) ) {
212            $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
213            $op = $x['op'];
214            ++$p;
215            switch ( $op ) {
216                case self::XDL_BDOP_INS:
217                    $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
218                    $p++;
219                    $out .= substr( $diff, $p, $x['size'] );
220                    $p += $x['size'];
221                    break;
222                case self::XDL_BDOP_INSB:
223                    $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
224                    $p += 4;
225                    $out .= substr( $diff, $p, $x['csize'] );
226                    $p += $x['csize'];
227                    break;
228                case self::XDL_BDOP_CPY:
229                    $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
230                    $p += 8;
231                    $out .= substr( $base, $x['off'], $x['csize'] );
232                    break;
233                default:
234                    wfDebug( __METHOD__ . ": invalid op" );
235                    return false;
236            }
237        }
238        return $out;
239    }
240
241    /**
242     * Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e. with
243     * the bytes backwards and initialised with 0 instead of 1. See T36428.
244     *
245     * @param string $s
246     * @return string
247     */
248    public function xdiffAdler32( $s ) {
249        static $init = null;
250        $init ??= str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
251
252        // The real Adler-32 checksum of $init is zero, so it initialises the
253        // state to zero, as it is at the start of LibXDiff's checksum
254        // algorithm. Appending the subject string then simulates LibXDiff.
255        return strrev( hash( 'adler32', $init . $s, true ) );
256    }
257
258    private function uncompress() {
259        if ( !$this->mDiffs ) {
260            return;
261        }
262        $tail = '';
263        $mDiffsCount = count( $this->mDiffs );
264        for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
265            $textKey = $this->mDiffMap[$diffKey];
266            $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
267            $this->mItems[$textKey] = $text;
268            $tail = $text;
269        }
270    }
271
272    /**
273     * @return array
274     */
275    public function __sleep() {
276        $this->compress();
277        if ( $this->mItems === [] ) {
278            $info = false;
279        } else {
280            // Take forward differences to improve the compression ratio for sequences
281            $map = '';
282            $prev = 0;
283            foreach ( $this->mDiffMap as $i ) {
284                if ( $map !== '' ) {
285                    $map .= ',';
286                }
287                $map .= $i - $prev;
288                $prev = $i;
289            }
290            $info = [
291                'diffs' => $this->mDiffs,
292                'map' => $map
293            ];
294        }
295        if ( $this->mDefaultKey !== null ) {
296            $info['default'] = $this->mDefaultKey;
297        }
298        $this->mCompressed = gzdeflate( serialize( $info ) );
299        return [ 'mCompressed' ];
300    }
301
302    public function __wakeup() {
303        // addItem() doesn't work if mItems is partially filled from mDiffs
304        $this->mFrozen = true;
305        $info = HistoryBlobUtils::unserializeArray( gzinflate( $this->mCompressed ) );
306        $this->mCompressed = null;
307
308        if ( !$info ) {
309            // Empty object
310            return;
311        }
312
313        if ( isset( $info['default'] ) ) {
314            $this->mDefaultKey = $info['default'];
315        }
316        $this->mDiffs = $info['diffs'];
317        if ( isset( $info['base'] ) ) {
318            // Old format
319            $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
320            array_unshift( $this->mDiffs,
321                pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
322                $info['base'] );
323        } else {
324            // New format
325            $map = explode( ',', $info['map'] );
326            $cur = 0;
327            $this->mDiffMap = [];
328            foreach ( $map as $i ) {
329                $cur += $i;
330                $this->mDiffMap[] = $cur;
331            }
332        }
333        $this->uncompress();
334    }
335
336    /** @inheritDoc */
337    public function isHappy(): bool {
338        return $this->mSize < $this->mMaxSize
339            && count( $this->mItems ) < $this->mMaxCount;
340    }
341
342}