Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 82 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
| FlowReserializeRevisionContent | |
0.00% |
0 / 76 |
|
0.00% |
0 / 4 |
272 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| getBodyContent | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| makeContentUpdatesAllowed | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
182 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace Flow\Maintenance; |
| 4 | |
| 5 | use BatchRowIterator; |
| 6 | use Flow\Container; |
| 7 | use Flow\Conversion\Utils; |
| 8 | use Flow\Data\ManagerGroup; |
| 9 | use Flow\Data\ObjectManager; |
| 10 | use Flow\DbFactory; |
| 11 | use Flow\Model\AbstractRevision; |
| 12 | use Flow\Model\UUID; |
| 13 | use Flow\Parsoid\ContentFixer; |
| 14 | use MediaWiki\Maintenance\Maintenance; |
| 15 | use MediaWiki\WikiMap\WikiMap; |
| 16 | use ReflectionClass; |
| 17 | use ReflectionMethod; |
| 18 | use Wikimedia\Diff\Diff; |
| 19 | use Wikimedia\Diff\UnifiedDiffFormatter; |
| 20 | use Wikimedia\Rdbms\IExpression; |
| 21 | use Wikimedia\Rdbms\LikeValue; |
| 22 | |
| 23 | $IP = getenv( 'MW_INSTALL_PATH' ); |
| 24 | if ( $IP === false ) { |
| 25 | $IP = __DIR__ . '/../../..'; |
| 26 | } |
| 27 | |
| 28 | require_once "$IP/maintenance/Maintenance.php"; |
| 29 | |
| 30 | /** |
| 31 | * @ingroup Maintenance |
| 32 | */ |
| 33 | class FlowReserializeRevisionContent extends Maintenance { |
| 34 | /** |
| 35 | * @var ReflectionMethod |
| 36 | */ |
| 37 | private $setContentRawMethod; |
| 38 | |
| 39 | /** |
| 40 | * @var DbFactory |
| 41 | */ |
| 42 | private $dbFactory; |
| 43 | |
| 44 | /** |
| 45 | * @var ManagerGroup |
| 46 | */ |
| 47 | private $storage; |
| 48 | |
| 49 | public function __construct() { |
| 50 | parent::__construct(); |
| 51 | $this->addDescription( "Reserializes HTML revision contents to the latest Parsoid version." ); |
| 52 | $this->addOption( 'dry-run', 'Log hypothetical updates but don\'t write them to the database' ); |
| 53 | $this->addOption( 'raw-diff', 'In dry-run mode, show diffs of raw HTML rather than just the <body> (noisy)' ); |
| 54 | $this->setBatchSize( 50 ); |
| 55 | $this->requireExtension( 'Flow' ); |
| 56 | } |
| 57 | |
| 58 | protected function getBodyContent( $html ) { |
| 59 | $dom = ContentFixer::createDOM( $html ); |
| 60 | $body = $dom->getElementsByTagName( 'body' )->item( 0 ); |
| 61 | return Utils::getInnerHtml( $body ); |
| 62 | } |
| 63 | |
| 64 | protected function makeContentUpdatesAllowed( ObjectManager $om ) { |
| 65 | // Do reflection hackery to unblock updates to rev_content |
| 66 | $omClass = new ReflectionClass( get_class( $om ) ); |
| 67 | $storageProp = $omClass->getProperty( 'storage' ); |
| 68 | $storageObj = $storageProp->getValue( $om ); |
| 69 | |
| 70 | $storageClass = new ReflectionClass( get_class( $storageObj ) ); |
| 71 | $allowedUpdateColumnsProp = $storageClass->getProperty( 'allowedUpdateColumns' ); |
| 72 | $allowedUpdateColumnsValue = $allowedUpdateColumnsProp->getValue( $storageObj ); |
| 73 | |
| 74 | $newAllowedUpdateColumnsValue = array_unique( array_merge( $allowedUpdateColumnsValue, [ |
| 75 | 'rev_content', |
| 76 | 'rev_content_length', |
| 77 | 'rev_flags', |
| 78 | 'rev_previous_content_length', |
| 79 | ] ) ); |
| 80 | $allowedUpdateColumnsProp->setValue( $storageObj, $newAllowedUpdateColumnsValue ); |
| 81 | } |
| 82 | |
| 83 | public function execute() { |
| 84 | // Reflection hackery: make setContentRaw() callable |
| 85 | $this->setContentRawMethod = new ReflectionMethod( AbstractRevision::class, 'setContentRaw' ); |
| 86 | |
| 87 | $this->dbFactory = Container::get( 'db.factory' ); |
| 88 | $this->storage = Container::get( 'storage' ); |
| 89 | |
| 90 | $dbr = $this->dbFactory->getDb( DB_REPLICA ); |
| 91 | $dbw = $this->dbFactory->getDb( DB_PRIMARY ); |
| 92 | $newVersion = Utils::PARSOID_VERSION; |
| 93 | |
| 94 | $iterator = new BatchRowIterator( $dbw, 'flow_revision', 'rev_id', $this->getBatchSize() ); |
| 95 | $iterator->addConditions( [ |
| 96 | 'rev_user_wiki' => WikiMap::getCurrentWikiId(), |
| 97 | $dbr->expr( 'rev_flags', IExpression::LIKE, new LikeValue( $dbr->anyString(), 'html', $dbr->anyString() ) ), |
| 98 | ] ); |
| 99 | $iterator->setFetchColumns( [ 'rev_id', 'rev_type', 'rev_content', 'rev_flags' ] ); |
| 100 | $iterator->setCaller( __METHOD__ ); |
| 101 | |
| 102 | foreach ( $iterator as $batch ) { |
| 103 | foreach ( $batch as $row ) { |
| 104 | $revId = UUID::create( $row->rev_id ); |
| 105 | $om = $this->storage->getStorage( $row->rev_type ); |
| 106 | $rev = $om->get( $revId ); |
| 107 | $revIdAlpha = $revId->getAlphadecimal(); |
| 108 | if ( !$rev ) { |
| 109 | $this->error( 'Could not load revision: ' . $revIdAlpha ); |
| 110 | continue; |
| 111 | } |
| 112 | if ( $rev->getContentFormat() !== 'html' ) { |
| 113 | // Paranoia: we check rev_flags LIKE '%html%', protect against that picking up non-HTML |
| 114 | // revisions that have a flag that contains the substring 'html' |
| 115 | continue; |
| 116 | } |
| 117 | $storedHtml = $rev->getContent(); |
| 118 | $storedVersion = Utils::getParsoidVersion( $storedHtml ); |
| 119 | if ( $storedVersion === $newVersion ) { |
| 120 | continue; |
| 121 | } |
| 122 | if ( $storedHtml === '' || $storedHtml === '<html><head></head><body></body></html>' ) { |
| 123 | continue; |
| 124 | } |
| 125 | |
| 126 | $title = $rev->getCollection()->getTitle(); |
| 127 | // Convert from HTML to wikitext then back to HTML |
| 128 | $wikitext = Utils::convert( 'html', 'wikitext', $storedHtml, $title ); |
| 129 | $convertedHtml = Utils::convert( 'wikitext', 'html', $wikitext, $title ); |
| 130 | if ( $convertedHtml === $storedHtml ) { |
| 131 | continue; |
| 132 | } |
| 133 | |
| 134 | if ( $this->hasOption( 'dry-run' ) ) { |
| 135 | if ( $this->hasOption( 'raw-diff' ) ) { |
| 136 | $fromDiff = $storedHtml; |
| 137 | $toDiff = $convertedHtml; |
| 138 | } else { |
| 139 | $fromDiff = $this->getBodyContent( $storedHtml ); |
| 140 | $toDiff = $this->getBodyContent( $convertedHtml ); |
| 141 | } |
| 142 | if ( $fromDiff === $toDiff ) { |
| 143 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: no change to body\n" ); |
| 144 | } else { |
| 145 | $diff = new Diff( explode( "\n", $fromDiff ), explode( "\n", $toDiff ) ); |
| 146 | $format = new UnifiedDiffFormatter(); |
| 147 | $output = $format->format( $diff ); |
| 148 | $this->output( "Revision $revIdAlpha version $storedVersion -> $newVersion: diff\n" ); |
| 149 | $this->output( $output . "\n" ); |
| 150 | } |
| 151 | } else { |
| 152 | $this->makeContentUpdatesAllowed( $om ); |
| 153 | $this->setContentRawMethod->invoke( $rev, [ 'html' => $convertedHtml, 'wikitext' => $wikitext ] ); |
| 154 | try { |
| 155 | $om->put( $rev ); |
| 156 | $this->output( "Updated revision $revIdAlpha\n" ); |
| 157 | } catch ( \Exception $e ) { |
| 158 | $this->error( "Failed to update revision $revIdAlpha: {$e->getMessage()}\n" ); |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | $maintClass = FlowReserializeRevisionContent::class; |
| 167 | require_once RUN_MAINTENANCE_IF_MAIN; |