Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 85
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
FlowReserializeRevisionContent
0.00% covered (danger)
0.00%
0 / 79
0.00% covered (danger)
0.00%
0 / 4
272
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getBodyContent
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 makeContentUpdatesAllowed
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 55
0.00% covered (danger)
0.00%
0 / 1
182
1<?php
2
3namespace Flow\Maintenance;
4
5use BatchRowIterator;
6use Flow\Container;
7use Flow\Conversion\Utils;
8use Flow\Data\ManagerGroup;
9use Flow\Data\ObjectManager;
10use Flow\DbFactory;
11use Flow\Model\AbstractRevision;
12use Flow\Model\UUID;
13use Flow\Parsoid\ContentFixer;
14use Maintenance;
15use MediaWiki\WikiMap\WikiMap;
16use ReflectionClass;
17use ReflectionMethod;
18use Wikimedia\Diff\Diff;
19use Wikimedia\Diff\UnifiedDiffFormatter;
20use Wikimedia\Rdbms\IExpression;
21use Wikimedia\Rdbms\LikeValue;
22
23$IP = getenv( 'MW_INSTALL_PATH' );
24if ( $IP === false ) {
25    $IP = __DIR__ . '/../../..';
26}
27
28require_once "$IP/maintenance/Maintenance.php";
29
30/**
31 * @ingroup Maintenance
32 */
33class FlowReserializeRevisionContent extends Maintenance {
34    /**
35     * @var ReflectionMethod
36     */
37    private $setContentRawMethod;
38
39    /**
40     * @var DbFactory
41     */
42    private $dbFactory;
43
44    /**
45     * @var ManagerGroup
46     */
47    private $storage;
48
49    public function __construct() {
50        parent::__construct();
51        $this->addDescription( "Reserializes HTML revision contents to the latest Parsoid version." );
52        $this->addOption( 'dry-run', 'Log hypothetical updates but don\'t write them to the database' );
53        $this->addOption( 'raw-diff', 'In dry-run mode, show diffs of raw HTML rather than just the <body> (noisy)' );
54        $this->setBatchSize( 50 );
55        $this->requireExtension( 'Flow' );
56    }
57
58    protected function getBodyContent( $html ) {
59        $dom = ContentFixer::createDOM( $html );
60        $body = $dom->getElementsByTagName( 'body' )->item( 0 );
61        return Utils::getInnerHtml( $body );
62    }
63
64    protected function makeContentUpdatesAllowed( ObjectManager $om ) {
65        // Do reflection hackery to unblock updates to rev_content
66        $omClass = new ReflectionClass( get_class( $om ) );
67        $storageProp = $omClass->getProperty( 'storage' );
68        $storageProp->setAccessible( true );
69        $storageObj = $storageProp->getValue( $om );
70
71        $storageClass = new ReflectionClass( get_class( $storageObj ) );
72        $allowedUpdateColumnsProp = $storageClass->getProperty( 'allowedUpdateColumns' );
73        $allowedUpdateColumnsProp->setAccessible( true );
74        $allowedUpdateColumnsValue = $allowedUpdateColumnsProp->getValue( $storageObj );
75
76        $newAllowedUpdateColumnsValue = array_unique( array_merge( $allowedUpdateColumnsValue, [
77            'rev_content',
78            'rev_content_length',
79            'rev_flags',
80            'rev_previous_content_length',
81        ] ) );
82        $allowedUpdateColumnsProp->setValue( $storageObj, $newAllowedUpdateColumnsValue );
83    }
84
85    public function execute() {
86        // Reflection hackery: make setContentRaw() callable
87        $this->setContentRawMethod = new ReflectionMethod( AbstractRevision::class, 'setContentRaw' );
88        $this->setContentRawMethod->setAccessible( true );
89
90        $this->dbFactory = Container::get( 'db.factory' );
91        $this->storage = Container::get( 'storage' );
92
93        $dbr = $this->dbFactory->getDb( DB_REPLICA );
94        $dbw = $this->dbFactory->getDb( DB_PRIMARY );
95        $newVersion = Utils::PARSOID_VERSION;
96
97        $iterator = new BatchRowIterator( $dbw, 'flow_revision', 'rev_id', $this->getBatchSize() );
98        $iterator->addConditions( [
99            'rev_user_wiki' => WikiMap::getCurrentWikiId(),
100            $dbr->expr( 'rev_flags', IExpression::LIKE, new LikeValue( $dbr->anyString(), 'html', $dbr->anyString() ) ),
101        ] );
102        $iterator->setFetchColumns( [ 'rev_id', 'rev_type', 'rev_content', 'rev_flags' ] );
103        $iterator->setCaller( __METHOD__ );
104
105        foreach ( $iterator as $batch ) {
106            foreach ( $batch as $row ) {
107                $revId = UUID::create( $row->rev_id );
108                $om = $this->storage->getStorage( $row->rev_type );
109                $rev = $om->get( $revId );
110                $revIdAlpha = $revId->getAlphadecimal();
111                if ( !$rev ) {
112                    $this->error( 'Could not load revision: ' . $revIdAlpha );
113                    continue;
114                }
115                if ( $rev->getContentFormat() !== 'html' ) {
116                    // Paranoia: we check rev_flags LIKE '%html%', protect against that picking up non-HTML
117                    // revisions that have a flag that contains the substring 'html'
118                    continue;
119                }
120                $storedHtml = $rev->getContent();
121                $storedVersion = Utils::getParsoidVersion( $storedHtml );
122                if ( $storedVersion === $newVersion ) {
123                    continue;
124                }
125                if ( $storedHtml === '' || $storedHtml === '<html><head></head><body></body></html>' ) {
126                    continue;
127                }
128
129                $title = $rev->getCollection()->getTitle();
130                // Convert from HTML to wikitext then back to HTML
131                $wikitext = Utils::convert( 'html', 'wikitext', $storedHtml, $title );
132                $convertedHtml = Utils::convert( 'wikitext', 'html', $wikitext, $title );
133                if ( $convertedHtml === $storedHtml ) {
134                    continue;
135                }
136
137                if ( $this->hasOption( 'dry-run' ) ) {
138                    if ( $this->hasOption( 'raw-diff' ) ) {
139                        $fromDiff = $storedHtml;
140                        $toDiff = $convertedHtml;
141                    } else {
142                        $fromDiff = $this->getBodyContent( $storedHtml );
143                        $toDiff = $this->getBodyContent( $convertedHtml );
144                    }
145                    if ( $fromDiff === $toDiff ) {
146                        $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: no change to body\n" );
147                    } else {
148                        $diff = new Diff( explode( "\n", $fromDiff ), explode( "\n", $toDiff ) );
149                        $format = new UnifiedDiffFormatter();
150                        $output = $format->format( $diff );
151                        $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: diff\n" );
152                        $this->output( $output . "\n" );
153                    }
154                } else {
155                    $this->makeContentUpdatesAllowed( $om );
156                    $this->setContentRawMethod->invoke( $rev, [ 'html' => $convertedHtml, 'wikitext' => $wikitext ] );
157                    try {
158                        $om->put( $rev );
159                        $this->output( "Updated revision $revIdAlpha\n" );
160                    } catch ( \Exception $e ) {
161                        $this->error( "Failed to update revision $revIdAlpha{$e->getMessage()}\n" );
162                    }
163                }
164            }
165        }
166    }
167}
168
169$maintClass = FlowReserializeRevisionContent::class;
170require_once RUN_MAINTENANCE_IF_MAIN;