Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 85 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
FlowReserializeRevisionContent | |
0.00% |
0 / 79 |
|
0.00% |
0 / 4 |
272 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getBodyContent | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
makeContentUpdatesAllowed | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
182 |
1 | <?php |
2 | |
3 | namespace Flow\Maintenance; |
4 | |
5 | use BatchRowIterator; |
6 | use Flow\Container; |
7 | use Flow\Conversion\Utils; |
8 | use Flow\Data\ManagerGroup; |
9 | use Flow\Data\ObjectManager; |
10 | use Flow\DbFactory; |
11 | use Flow\Model\AbstractRevision; |
12 | use Flow\Model\UUID; |
13 | use Flow\Parsoid\ContentFixer; |
14 | use Maintenance; |
15 | use MediaWiki\WikiMap\WikiMap; |
16 | use ReflectionClass; |
17 | use ReflectionMethod; |
18 | use Wikimedia\Diff\Diff; |
19 | use Wikimedia\Diff\UnifiedDiffFormatter; |
20 | use Wikimedia\Rdbms\IExpression; |
21 | use Wikimedia\Rdbms\LikeValue; |
22 | |
23 | $IP = getenv( 'MW_INSTALL_PATH' ); |
24 | if ( $IP === false ) { |
25 | $IP = __DIR__ . '/../../..'; |
26 | } |
27 | |
28 | require_once "$IP/maintenance/Maintenance.php"; |
29 | |
30 | /** |
31 | * @ingroup Maintenance |
32 | */ |
33 | class FlowReserializeRevisionContent extends Maintenance { |
34 | /** |
35 | * @var ReflectionMethod |
36 | */ |
37 | private $setContentRawMethod; |
38 | |
39 | /** |
40 | * @var DbFactory |
41 | */ |
42 | private $dbFactory; |
43 | |
44 | /** |
45 | * @var ManagerGroup |
46 | */ |
47 | private $storage; |
48 | |
49 | public function __construct() { |
50 | parent::__construct(); |
51 | $this->addDescription( "Reserializes HTML revision contents to the latest Parsoid version." ); |
52 | $this->addOption( 'dry-run', 'Log hypothetical updates but don\'t write them to the database' ); |
53 | $this->addOption( 'raw-diff', 'In dry-run mode, show diffs of raw HTML rather than just the <body> (noisy)' ); |
54 | $this->setBatchSize( 50 ); |
55 | $this->requireExtension( 'Flow' ); |
56 | } |
57 | |
58 | protected function getBodyContent( $html ) { |
59 | $dom = ContentFixer::createDOM( $html ); |
60 | $body = $dom->getElementsByTagName( 'body' )->item( 0 ); |
61 | return Utils::getInnerHtml( $body ); |
62 | } |
63 | |
64 | protected function makeContentUpdatesAllowed( ObjectManager $om ) { |
65 | // Do reflection hackery to unblock updates to rev_content |
66 | $omClass = new ReflectionClass( get_class( $om ) ); |
67 | $storageProp = $omClass->getProperty( 'storage' ); |
68 | $storageProp->setAccessible( true ); |
69 | $storageObj = $storageProp->getValue( $om ); |
70 | |
71 | $storageClass = new ReflectionClass( get_class( $storageObj ) ); |
72 | $allowedUpdateColumnsProp = $storageClass->getProperty( 'allowedUpdateColumns' ); |
73 | $allowedUpdateColumnsProp->setAccessible( true ); |
74 | $allowedUpdateColumnsValue = $allowedUpdateColumnsProp->getValue( $storageObj ); |
75 | |
76 | $newAllowedUpdateColumnsValue = array_unique( array_merge( $allowedUpdateColumnsValue, [ |
77 | 'rev_content', |
78 | 'rev_content_length', |
79 | 'rev_flags', |
80 | 'rev_previous_content_length', |
81 | ] ) ); |
82 | $allowedUpdateColumnsProp->setValue( $storageObj, $newAllowedUpdateColumnsValue ); |
83 | } |
84 | |
85 | public function execute() { |
86 | // Reflection hackery: make setContentRaw() callable |
87 | $this->setContentRawMethod = new ReflectionMethod( AbstractRevision::class, 'setContentRaw' ); |
88 | $this->setContentRawMethod->setAccessible( true ); |
89 | |
90 | $this->dbFactory = Container::get( 'db.factory' ); |
91 | $this->storage = Container::get( 'storage' ); |
92 | |
93 | $dbr = $this->dbFactory->getDb( DB_REPLICA ); |
94 | $dbw = $this->dbFactory->getDb( DB_PRIMARY ); |
95 | $newVersion = Utils::PARSOID_VERSION; |
96 | |
97 | $iterator = new BatchRowIterator( $dbw, 'flow_revision', 'rev_id', $this->getBatchSize() ); |
98 | $iterator->addConditions( [ |
99 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
100 | $dbr->expr( 'rev_flags', IExpression::LIKE, new LikeValue( $dbr->anyString(), 'html', $dbr->anyString() ) ), |
101 | ] ); |
102 | $iterator->setFetchColumns( [ 'rev_id', 'rev_type', 'rev_content', 'rev_flags' ] ); |
103 | $iterator->setCaller( __METHOD__ ); |
104 | |
105 | foreach ( $iterator as $batch ) { |
106 | foreach ( $batch as $row ) { |
107 | $revId = UUID::create( $row->rev_id ); |
108 | $om = $this->storage->getStorage( $row->rev_type ); |
109 | $rev = $om->get( $revId ); |
110 | $revIdAlpha = $revId->getAlphadecimal(); |
111 | if ( !$rev ) { |
112 | $this->error( 'Could not load revision: ' . $revIdAlpha ); |
113 | continue; |
114 | } |
115 | if ( $rev->getContentFormat() !== 'html' ) { |
116 | // Paranoia: we check rev_flags LIKE '%html%', protect against that picking up non-HTML |
117 | // revisions that have a flag that contains the substring 'html' |
118 | continue; |
119 | } |
120 | $storedHtml = $rev->getContent(); |
121 | $storedVersion = Utils::getParsoidVersion( $storedHtml ); |
122 | if ( $storedVersion === $newVersion ) { |
123 | continue; |
124 | } |
125 | if ( $storedHtml === '' || $storedHtml === '<html><head></head><body></body></html>' ) { |
126 | continue; |
127 | } |
128 | |
129 | $title = $rev->getCollection()->getTitle(); |
130 | // Convert from HTML to wikitext then back to HTML |
131 | $wikitext = Utils::convert( 'html', 'wikitext', $storedHtml, $title ); |
132 | $convertedHtml = Utils::convert( 'wikitext', 'html', $wikitext, $title ); |
133 | if ( $convertedHtml === $storedHtml ) { |
134 | continue; |
135 | } |
136 | |
137 | if ( $this->hasOption( 'dry-run' ) ) { |
138 | if ( $this->hasOption( 'raw-diff' ) ) { |
139 | $fromDiff = $storedHtml; |
140 | $toDiff = $convertedHtml; |
141 | } else { |
142 | $fromDiff = $this->getBodyContent( $storedHtml ); |
143 | $toDiff = $this->getBodyContent( $convertedHtml ); |
144 | } |
145 | if ( $fromDiff === $toDiff ) { |
146 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: no change to body\n" ); |
147 | } else { |
148 | $diff = new Diff( explode( "\n", $fromDiff ), explode( "\n", $toDiff ) ); |
149 | $format = new UnifiedDiffFormatter(); |
150 | $output = $format->format( $diff ); |
151 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: diff\n" ); |
152 | $this->output( $output . "\n" ); |
153 | } |
154 | } else { |
155 | $this->makeContentUpdatesAllowed( $om ); |
156 | $this->setContentRawMethod->invoke( $rev, [ 'html' => $convertedHtml, 'wikitext' => $wikitext ] ); |
157 | try { |
158 | $om->put( $rev ); |
159 | $this->output( "Updated revision $revIdAlpha\n" ); |
160 | } catch ( \Exception $e ) { |
161 | $this->error( "Failed to update revision $revIdAlpha: {$e->getMessage()}\n" ); |
162 | } |
163 | } |
164 | } |
165 | } |
166 | } |
167 | } |
168 | |
169 | $maintClass = FlowReserializeRevisionContent::class; |
170 | require_once RUN_MAINTENANCE_IF_MAIN; |