Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 147
0.00% covered (danger)
0.00%
0 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
DiffHistoryBlob
0.00% covered (danger)
0.00%
0 / 147
0.00% covered (danger)
0.00%
0 / 13
2070
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 addItem
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
6
 getItem
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setText
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getText
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 compress
0.00% covered (danger)
0.00%
0 / 49
0.00% covered (danger)
0.00%
0 / 1
132
 diff
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 patch
0.00% covered (danger)
0.00%
0 / 33
0.00% covered (danger)
0.00%
0 / 1
90
 xdiffAdler32
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 uncompress
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
 __sleep
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
30
 __wakeup
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
30
 isHappy
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2/**
3 * Efficient concatenated text storage.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23/**
24 * Diff-based history compression
25 * Requires xdiff and zlib
26 *
27 * WARNING: Objects of this class are serialized and permanently stored in the DB.
28 * Do not change the name or visibility of any property!
29 */
30class DiffHistoryBlob implements HistoryBlob {
31    /** @var string[] Uncompressed item cache */
32    public $mItems = [];
33
34    /** @var int Total uncompressed size */
35    public $mSize = 0;
36
37    /**
38     * @var array|null Array of diffs. If a diff D from A to B is notated D = B - A,
39     * and Z is an empty string:
40     *
41     *              { item[map[i]] - item[map[i-1]]   where i > 0
42     *    diff[i] = {
43     *              { item[map[i]] - Z                where i = 0
44     */
45    public $mDiffs;
46
47    /** @var array The diff map, see above */
48    public $mDiffMap;
49
50    /** @var string The key for getText()
51     */
52    public $mDefaultKey;
53
54    /** @var string|null Compressed storage */
55    public $mCompressed;
56
57    /** @var bool True if the object is locked against further writes */
58    public $mFrozen = false;
59
60    /**
61     * @var int The maximum uncompressed size before the object becomes sad
62     * Should be less than max_allowed_packet
63     */
64    public $mMaxSize = 10_000_000;
65
66    /** @var int The maximum number of text items before the object becomes sad */
67    public $mMaxCount = 100;
68
69    /** Constants from xdiff.h */
70    private const XDL_BDOP_INS = 1;
71    private const XDL_BDOP_CPY = 2;
72    private const XDL_BDOP_INSB = 3;
73
74    public function __construct() {
75        if ( !function_exists( 'gzdeflate' ) ) {
76            throw new RuntimeException( "Need zlib support to read or write DiffHistoryBlob\n" );
77        }
78    }
79
80    /**
81     * @param string $text
82     * @return string
83     */
84    public function addItem( $text ) {
85        if ( $this->mFrozen ) {
86            throw new BadMethodCallException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
87        }
88
89        $this->mItems[] = $text;
90        $this->mSize += strlen( $text );
91        $this->mDiffs = null; // later
92        return (string)( count( $this->mItems ) - 1 );
93    }
94
95    /**
96     * @param string $key
97     * @return string
98     */
99    public function getItem( $key ) {
100        return $this->mItems[(int)$key];
101    }
102
103    /**
104     * @param string $text
105     */
106    public function setText( $text ) {
107        $this->mDefaultKey = $this->addItem( $text );
108    }
109
110    /**
111     * @return string
112     */
113    public function getText() {
114        return $this->getItem( $this->mDefaultKey );
115    }
116
117    private function compress() {
118        if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
119            throw new RuntimeException( "Need xdiff support to write DiffHistoryBlob\n" );
120        }
121        if ( isset( $this->mDiffs ) ) {
122            // Already compressed
123            return;
124        }
125        if ( $this->mItems === [] ) {
126            return;
127        }
128
129        // Create two diff sequences: one for main text and one for small text
130        $sequences = [
131            'small' => [
132                'tail' => '',
133                'diffs' => [],
134                'map' => [],
135            ],
136            'main' => [
137                'tail' => '',
138                'diffs' => [],
139                'map' => [],
140            ],
141        ];
142        $smallFactor = 0.5;
143
144        $mItemsCount = count( $this->mItems );
145        for ( $i = 0; $i < $mItemsCount; $i++ ) {
146            $text = $this->mItems[$i];
147            if ( $i == 0 ) {
148                $seqName = 'main';
149            } else {
150                $mainTail = $sequences['main']['tail'];
151                if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
152                    $seqName = 'small';
153                } else {
154                    $seqName = 'main';
155                }
156            }
157
158            $tail = $sequences[$seqName]['tail'];
159            $diff = $this->diff( $tail, $text );
160            $sequences[$seqName]['diffs'][] = $diff;
161            $sequences[$seqName]['map'][] = $i;
162            $sequences[$seqName]['tail'] = $text;
163        }
164
165        // Knit the sequences together
166        $tail = '';
167        $this->mDiffs = [];
168        $this->mDiffMap = [];
169        foreach ( $sequences as $seq ) {
170            if ( $seq['diffs'] === [] ) {
171                continue;
172            }
173            if ( $tail === '' ) {
174                $this->mDiffs[] = $seq['diffs'][0];
175            } else {
176                $head = $this->patch( '', $seq['diffs'][0] );
177                $this->mDiffs[] = $this->diff( $tail, $head );
178            }
179            $this->mDiffMap[] = $seq['map'][0];
180            $diffsCount = count( $seq['diffs'] );
181            for ( $i = 1; $i < $diffsCount; $i++ ) {
182                $this->mDiffs[] = $seq['diffs'][$i];
183                $this->mDiffMap[] = $seq['map'][$i];
184            }
185            $tail = $seq['tail'];
186        }
187    }
188
189    /**
190     * @param string $t1
191     * @param string $t2
192     * @return string
193     */
194    private function diff( $t1, $t2 ) {
195        return xdiff_string_rabdiff( $t1, $t2 );
196    }
197
198    /**
199     * @param string $base
200     * @param string $diff
201     * @return bool|string
202     */
203    private function patch( $base, $diff ) {
204        if ( function_exists( 'xdiff_string_bpatch' ) ) {
205            return xdiff_string_bpatch( $base, $diff );
206        }
207
208        # Pure PHP implementation
209
210        $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
211
212        # Check the checksum
213        $ofp = $this->xdiffAdler32( $base );
214        if ( $ofp !== substr( $diff, 0, 4 ) ) {
215            wfDebug( __METHOD__ . ": incorrect base checksum" );
216            return false;
217        }
218        if ( $header['csize'] != strlen( $base ) ) {
219            wfDebug( __METHOD__ . ": incorrect base length" );
220            return false;
221        }
222
223        $p = 8;
224        $out = '';
225        while ( $p < strlen( $diff ) ) {
226            $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
227            $op = $x['op'];
228            ++$p;
229            switch ( $op ) {
230                case self::XDL_BDOP_INS:
231                    $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
232                    $p++;
233                    $out .= substr( $diff, $p, $x['size'] );
234                    $p += $x['size'];
235                    break;
236                case self::XDL_BDOP_INSB:
237                    $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
238                    $p += 4;
239                    $out .= substr( $diff, $p, $x['csize'] );
240                    $p += $x['csize'];
241                    break;
242                case self::XDL_BDOP_CPY:
243                    $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
244                    $p += 8;
245                    $out .= substr( $base, $x['off'], $x['csize'] );
246                    break;
247                default:
248                    wfDebug( __METHOD__ . ": invalid op" );
249                    return false;
250            }
251        }
252        return $out;
253    }
254
255    /**
256     * Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e. with
257     * the bytes backwards and initialised with 0 instead of 1. See T36428.
258     *
259     * @param string $s
260     * @return string
261     */
262    public function xdiffAdler32( $s ) {
263        static $init;
264        if ( $init === null ) {
265            $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
266        }
267
268        // The real Adler-32 checksum of $init is zero, so it initialises the
269        // state to zero, as it is at the start of LibXDiff's checksum
270        // algorithm. Appending the subject string then simulates LibXDiff.
271        return strrev( hash( 'adler32', $init . $s, true ) );
272    }
273
274    private function uncompress() {
275        if ( !$this->mDiffs ) {
276            return;
277        }
278        $tail = '';
279        $mDiffsCount = count( $this->mDiffs );
280        for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
281            $textKey = $this->mDiffMap[$diffKey];
282            $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
283            $this->mItems[$textKey] = $text;
284            $tail = $text;
285        }
286    }
287
288    /**
289     * @return array
290     */
291    public function __sleep() {
292        $this->compress();
293        if ( $this->mItems === [] ) {
294            $info = false;
295        } else {
296            // Take forward differences to improve the compression ratio for sequences
297            $map = '';
298            $prev = 0;
299            foreach ( $this->mDiffMap as $i ) {
300                if ( $map !== '' ) {
301                    $map .= ',';
302                }
303                $map .= $i - $prev;
304                $prev = $i;
305            }
306            $info = [
307                'diffs' => $this->mDiffs,
308                'map' => $map
309            ];
310        }
311        if ( isset( $this->mDefaultKey ) ) {
312            $info['default'] = $this->mDefaultKey;
313        }
314        $this->mCompressed = gzdeflate( serialize( $info ) );
315        return [ 'mCompressed' ];
316    }
317
318    public function __wakeup() {
319        // addItem() doesn't work if mItems is partially filled from mDiffs
320        $this->mFrozen = true;
321        $info = HistoryBlobUtils::unserializeArray( gzinflate( $this->mCompressed ) );
322        $this->mCompressed = null;
323
324        if ( !$info ) {
325            // Empty object
326            return;
327        }
328
329        if ( isset( $info['default'] ) ) {
330            $this->mDefaultKey = $info['default'];
331        }
332        $this->mDiffs = $info['diffs'];
333        if ( isset( $info['base'] ) ) {
334            // Old format
335            $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
336            array_unshift( $this->mDiffs,
337                pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
338                $info['base'] );
339        } else {
340            // New format
341            $map = explode( ',', $info['map'] );
342            $cur = 0;
343            $this->mDiffMap = [];
344            foreach ( $map as $i ) {
345                $cur += $i;
346                $this->mDiffMap[] = $cur;
347            }
348        }
349        $this->uncompress();
350    }
351
352    /**
353     * Helper function for compression jobs
354     * Returns true until the object is "full" and ready to be committed
355     *
356     * @return bool
357     */
358    public function isHappy() {
359        return $this->mSize < $this->mMaxSize
360            && count( $this->mItems ) < $this->mMaxCount;
361    }
362
363}