Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 85 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
FlowReserializeRevisionContent | |
0.00% |
0 / 79 |
|
0.00% |
0 / 4 |
272 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getBodyContent | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
makeContentUpdatesAllowed | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
182 |
1 | <?php |
2 | |
3 | namespace Flow\Maintenance; |
4 | |
5 | use BatchRowIterator; |
6 | use Flow\Container; |
7 | use Flow\Conversion\Utils; |
8 | use Flow\Data\ManagerGroup; |
9 | use Flow\Data\ObjectManager; |
10 | use Flow\DbFactory; |
11 | use Flow\Model\AbstractRevision; |
12 | use Flow\Model\UUID; |
13 | use Flow\Parsoid\ContentFixer; |
14 | use Maintenance; |
15 | use MediaWiki\WikiMap\WikiMap; |
16 | use ReflectionClass; |
17 | use ReflectionMethod; |
18 | use Wikimedia\Diff\Diff; |
19 | use Wikimedia\Diff\UnifiedDiffFormatter; |
20 | |
21 | $IP = getenv( 'MW_INSTALL_PATH' ); |
22 | if ( $IP === false ) { |
23 | $IP = __DIR__ . '/../../..'; |
24 | } |
25 | |
26 | require_once "$IP/maintenance/Maintenance.php"; |
27 | |
28 | /** |
29 | * @ingroup Maintenance |
30 | */ |
31 | class FlowReserializeRevisionContent extends Maintenance { |
32 | /** |
33 | * @var ReflectionMethod |
34 | */ |
35 | private $setContentRawMethod; |
36 | |
37 | /** |
38 | * @var DbFactory |
39 | */ |
40 | private $dbFactory; |
41 | |
42 | /** |
43 | * @var ManagerGroup |
44 | */ |
45 | private $storage; |
46 | |
47 | public function __construct() { |
48 | parent::__construct(); |
49 | $this->addDescription( "Reserializes HTML revision contents to the latest Parsoid version." ); |
50 | $this->addOption( 'dry-run', 'Log hypothetical updates but don\'t write them to the database' ); |
51 | $this->addOption( 'raw-diff', 'In dry-run mode, show diffs of raw HTML rather than just the <body> (noisy)' ); |
52 | $this->setBatchSize( 50 ); |
53 | $this->requireExtension( 'Flow' ); |
54 | } |
55 | |
56 | protected function getBodyContent( $html ) { |
57 | $dom = ContentFixer::createDOM( $html ); |
58 | $body = $dom->getElementsByTagName( 'body' )->item( 0 ); |
59 | return Utils::getInnerHtml( $body ); |
60 | } |
61 | |
62 | protected function makeContentUpdatesAllowed( ObjectManager $om ) { |
63 | // Do reflection hackery to unblock updates to rev_content |
64 | $omClass = new ReflectionClass( get_class( $om ) ); |
65 | $storageProp = $omClass->getProperty( 'storage' ); |
66 | $storageProp->setAccessible( true ); |
67 | $storageObj = $storageProp->getValue( $om ); |
68 | |
69 | $storageClass = new ReflectionClass( get_class( $storageObj ) ); |
70 | $allowedUpdateColumnsProp = $storageClass->getProperty( 'allowedUpdateColumns' ); |
71 | $allowedUpdateColumnsProp->setAccessible( true ); |
72 | $allowedUpdateColumnsValue = $allowedUpdateColumnsProp->getValue( $storageObj ); |
73 | |
74 | $newAllowedUpdateColumnsValue = array_unique( array_merge( $allowedUpdateColumnsValue, [ |
75 | 'rev_content', |
76 | 'rev_content_length', |
77 | 'rev_flags', |
78 | 'rev_previous_content_length', |
79 | ] ) ); |
80 | $allowedUpdateColumnsProp->setValue( $storageObj, $newAllowedUpdateColumnsValue ); |
81 | } |
82 | |
83 | public function execute() { |
84 | // Reflection hackery: make setContentRaw() callable |
85 | $this->setContentRawMethod = new ReflectionMethod( AbstractRevision::class, 'setContentRaw' ); |
86 | $this->setContentRawMethod->setAccessible( true ); |
87 | |
88 | $this->dbFactory = Container::get( 'db.factory' ); |
89 | $this->storage = Container::get( 'storage' ); |
90 | |
91 | $dbr = $this->dbFactory->getDb( DB_REPLICA ); |
92 | $dbw = $this->dbFactory->getDb( DB_PRIMARY ); |
93 | $newVersion = Utils::PARSOID_VERSION; |
94 | |
95 | $iterator = new BatchRowIterator( $dbw, 'flow_revision', 'rev_id', $this->getBatchSize() ); |
96 | $iterator->addConditions( [ |
97 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
98 | 'rev_flags' . $dbr->buildLike( $dbr->anyString(), 'html', $dbr->anyString() ), |
99 | ] ); |
100 | $iterator->setFetchColumns( [ 'rev_id', 'rev_type', 'rev_content', 'rev_flags' ] ); |
101 | $iterator->setCaller( __METHOD__ ); |
102 | |
103 | foreach ( $iterator as $batch ) { |
104 | foreach ( $batch as $row ) { |
105 | $revId = UUID::create( $row->rev_id ); |
106 | $om = $this->storage->getStorage( $row->rev_type ); |
107 | $rev = $om->get( $revId ); |
108 | $revIdAlpha = $revId->getAlphadecimal(); |
109 | if ( !$rev ) { |
110 | $this->error( 'Could not load revision: ' . $revIdAlpha ); |
111 | continue; |
112 | } |
113 | if ( $rev->getContentFormat() !== 'html' ) { |
114 | // Paranoia: we check rev_flags LIKE '%html%', protect against that picking up non-HTML |
115 | // revisions that have a flag that contains the substring 'html' |
116 | continue; |
117 | } |
118 | $storedHtml = $rev->getContent(); |
119 | $storedVersion = Utils::getParsoidVersion( $storedHtml ); |
120 | if ( $storedVersion === $newVersion ) { |
121 | continue; |
122 | } |
123 | if ( $storedHtml === '' || $storedHtml === '<html><head></head><body></body></html>' ) { |
124 | continue; |
125 | } |
126 | |
127 | $title = $rev->getCollection()->getTitle(); |
128 | // Convert from HTML to wikitext then back to HTML |
129 | $wikitext = Utils::convert( 'html', 'wikitext', $storedHtml, $title ); |
130 | $convertedHtml = Utils::convert( 'wikitext', 'html', $wikitext, $title ); |
131 | if ( $convertedHtml === $storedHtml ) { |
132 | continue; |
133 | } |
134 | |
135 | if ( $this->hasOption( 'dry-run' ) ) { |
136 | if ( $this->hasOption( 'raw-diff' ) ) { |
137 | $fromDiff = $storedHtml; |
138 | $toDiff = $convertedHtml; |
139 | } else { |
140 | $fromDiff = $this->getBodyContent( $storedHtml ); |
141 | $toDiff = $this->getBodyContent( $convertedHtml ); |
142 | } |
143 | if ( $fromDiff === $toDiff ) { |
144 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: no change to body\n" ); |
145 | } else { |
146 | $diff = new Diff( explode( "\n", $fromDiff ), explode( "\n", $toDiff ) ); |
147 | $format = new UnifiedDiffFormatter(); |
148 | $output = $format->format( $diff ); |
149 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: diff\n" ); |
150 | $this->output( $output . "\n" ); |
151 | } |
152 | } else { |
153 | $this->makeContentUpdatesAllowed( $om ); |
154 | $this->setContentRawMethod->invoke( $rev, [ 'html' => $convertedHtml, 'wikitext' => $wikitext ] ); |
155 | try { |
156 | $om->put( $rev ); |
157 | $this->output( "Updated revision $revIdAlpha\n" ); |
158 | } catch ( \Exception $e ) { |
159 | $this->error( "Failed to update revision $revIdAlpha: {$e->getMessage()}\n" ); |
160 | } |
161 | } |
162 | } |
163 | } |
164 | } |
165 | } |
166 | |
167 | $maintClass = FlowReserializeRevisionContent::class; |
168 | require_once RUN_MAINTENANCE_IF_MAIN; |