Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 85
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
FlowReserializeRevisionContent
0.00% covered (danger)
0.00%
0 / 79
0.00% covered (danger)
0.00%
0 / 4
272
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getBodyContent
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 makeContentUpdatesAllowed
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 55
0.00% covered (danger)
0.00%
0 / 1
182
1<?php
2
3namespace Flow\Maintenance;
4
5use BatchRowIterator;
6use Flow\Container;
7use Flow\Conversion\Utils;
8use Flow\Data\ManagerGroup;
9use Flow\Data\ObjectManager;
10use Flow\DbFactory;
11use Flow\Model\AbstractRevision;
12use Flow\Model\UUID;
13use Flow\Parsoid\ContentFixer;
14use Maintenance;
15use MediaWiki\WikiMap\WikiMap;
16use ReflectionClass;
17use ReflectionMethod;
18use Wikimedia\Diff\Diff;
19use Wikimedia\Diff\UnifiedDiffFormatter;
20
21$IP = getenv( 'MW_INSTALL_PATH' );
22if ( $IP === false ) {
23    $IP = __DIR__ . '/../../..';
24}
25
26require_once "$IP/maintenance/Maintenance.php";
27
28/**
29 * @ingroup Maintenance
30 */
31class FlowReserializeRevisionContent extends Maintenance {
32    /**
33     * @var ReflectionMethod
34     */
35    private $setContentRawMethod;
36
37    /**
38     * @var DbFactory
39     */
40    private $dbFactory;
41
42    /**
43     * @var ManagerGroup
44     */
45    private $storage;
46
47    public function __construct() {
48        parent::__construct();
49        $this->addDescription( "Reserializes HTML revision contents to the latest Parsoid version." );
50        $this->addOption( 'dry-run', 'Log hypothetical updates but don\'t write them to the database' );
51        $this->addOption( 'raw-diff', 'In dry-run mode, show diffs of raw HTML rather than just the <body> (noisy)' );
52        $this->setBatchSize( 50 );
53        $this->requireExtension( 'Flow' );
54    }
55
56    protected function getBodyContent( $html ) {
57        $dom = ContentFixer::createDOM( $html );
58        $body = $dom->getElementsByTagName( 'body' )->item( 0 );
59        return Utils::getInnerHtml( $body );
60    }
61
62    protected function makeContentUpdatesAllowed( ObjectManager $om ) {
63        // Do reflection hackery to unblock updates to rev_content
64        $omClass = new ReflectionClass( get_class( $om ) );
65        $storageProp = $omClass->getProperty( 'storage' );
66        $storageProp->setAccessible( true );
67        $storageObj = $storageProp->getValue( $om );
68
69        $storageClass = new ReflectionClass( get_class( $storageObj ) );
70        $allowedUpdateColumnsProp = $storageClass->getProperty( 'allowedUpdateColumns' );
71        $allowedUpdateColumnsProp->setAccessible( true );
72        $allowedUpdateColumnsValue = $allowedUpdateColumnsProp->getValue( $storageObj );
73
74        $newAllowedUpdateColumnsValue = array_unique( array_merge( $allowedUpdateColumnsValue, [
75            'rev_content',
76            'rev_content_length',
77            'rev_flags',
78            'rev_previous_content_length',
79        ] ) );
80        $allowedUpdateColumnsProp->setValue( $storageObj, $newAllowedUpdateColumnsValue );
81    }
82
83    public function execute() {
84        // Reflection hackery: make setContentRaw() callable
85        $this->setContentRawMethod = new ReflectionMethod( AbstractRevision::class, 'setContentRaw' );
86        $this->setContentRawMethod->setAccessible( true );
87
88        $this->dbFactory = Container::get( 'db.factory' );
89        $this->storage = Container::get( 'storage' );
90
91        $dbr = $this->dbFactory->getDb( DB_REPLICA );
92        $dbw = $this->dbFactory->getDb( DB_PRIMARY );
93        $newVersion = Utils::PARSOID_VERSION;
94
95        $iterator = new BatchRowIterator( $dbw, 'flow_revision', 'rev_id', $this->getBatchSize() );
96        $iterator->addConditions( [
97            'rev_user_wiki' => WikiMap::getCurrentWikiId(),
98            'rev_flags' . $dbr->buildLike( $dbr->anyString(), 'html', $dbr->anyString() ),
99        ] );
100        $iterator->setFetchColumns( [ 'rev_id', 'rev_type', 'rev_content', 'rev_flags' ] );
101        $iterator->setCaller( __METHOD__ );
102
103        foreach ( $iterator as $batch ) {
104            foreach ( $batch as $row ) {
105                $revId = UUID::create( $row->rev_id );
106                $om = $this->storage->getStorage( $row->rev_type );
107                $rev = $om->get( $revId );
108                $revIdAlpha = $revId->getAlphadecimal();
109                if ( !$rev ) {
110                    $this->error( 'Could not load revision: ' . $revIdAlpha );
111                    continue;
112                }
113                if ( $rev->getContentFormat() !== 'html' ) {
114                    // Paranoia: we check rev_flags LIKE '%html%', protect against that picking up non-HTML
115                    // revisions that have a flag that contains the substring 'html'
116                    continue;
117                }
118                $storedHtml = $rev->getContent();
119                $storedVersion = Utils::getParsoidVersion( $storedHtml );
120                if ( $storedVersion === $newVersion ) {
121                    continue;
122                }
123                if ( $storedHtml === '' || $storedHtml === '<html><head></head><body></body></html>' ) {
124                    continue;
125                }
126
127                $title = $rev->getCollection()->getTitle();
128                // Convert from HTML to wikitext then back to HTML
129                $wikitext = Utils::convert( 'html', 'wikitext', $storedHtml, $title );
130                $convertedHtml = Utils::convert( 'wikitext', 'html', $wikitext, $title );
131                if ( $convertedHtml === $storedHtml ) {
132                    continue;
133                }
134
135                if ( $this->hasOption( 'dry-run' ) ) {
136                    if ( $this->hasOption( 'raw-diff' ) ) {
137                        $fromDiff = $storedHtml;
138                        $toDiff = $convertedHtml;
139                    } else {
140                        $fromDiff = $this->getBodyContent( $storedHtml );
141                        $toDiff = $this->getBodyContent( $convertedHtml );
142                    }
143                    if ( $fromDiff === $toDiff ) {
144                        $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: no change to body\n" );
145                    } else {
146                        $diff = new Diff( explode( "\n", $fromDiff ), explode( "\n", $toDiff ) );
147                        $format = new UnifiedDiffFormatter();
148                        $output = $format->format( $diff );
149                        $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: diff\n" );
150                        $this->output( $output . "\n" );
151                    }
152                } else {
153                    $this->makeContentUpdatesAllowed( $om );
154                    $this->setContentRawMethod->invoke( $rev, [ 'html' => $convertedHtml, 'wikitext' => $wikitext ] );
155                    try {
156                        $om->put( $rev );
157                        $this->output( "Updated revision $revIdAlpha\n" );
158                    } catch ( \Exception $e ) {
159                        $this->error( "Failed to update revision $revIdAlpha{$e->getMessage()}\n" );
160                    }
161                }
162            }
163        }
164    }
165}
166
167$maintClass = FlowReserializeRevisionContent::class;
168require_once RUN_MAINTENANCE_IF_MAIN;