Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 147 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
| UpdateSecondaryTables | |
0.00% |
0 / 141 |
|
0.00% |
0 / 5 |
506 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 74 |
|
0.00% |
0 / 1 |
306 | |||
| reportTableInfo | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| printTableInfo | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getTableInfo | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * WikiLambda maintenance script to update WikiLambda secondary tables, |
| 5 | * for ZObjects of a given type. For each such ZObject, a new instance of |
| 6 | * ZObjectSecondaryDataUpdate is created and added to DeferredUpdates. |
| 7 | * |
| 8 | * The --report option prints secondary table sizes before and after the update. |
| 9 | * |
| 10 | * By default, sleeps 5 seconds between the creation of each ZObjectSecondaryDataUpdate. |
| 11 | * |
| 12 | * @file |
| 13 | * @ingroup Extensions |
| 14 | * @copyright 2020– Abstract Wikipedia team; see AUTHORS.txt |
| 15 | * @license MIT |
| 16 | */ |
| 17 | |
| 18 | namespace MediaWiki\Extensions\WikiLambda\Maintenance; |
| 19 | |
| 20 | use GuzzleHttp\Client; |
| 21 | use MediaWiki\Deferred\DeferredUpdates; |
| 22 | use MediaWiki\Extension\WikiLambda\OrchestratorRequest; |
| 23 | use MediaWiki\Extension\WikiLambda\WikiLambdaServices; |
| 24 | use MediaWiki\Extension\WikiLambda\ZObjectContentHandler; |
| 25 | use MediaWiki\Extension\WikiLambda\ZObjectSecondaryDataUpdate; |
| 26 | use MediaWiki\Maintenance\Maintenance; |
| 27 | use MediaWiki\Title\Title; |
| 28 | use Wikimedia\Rdbms\IConnectionProvider; |
| 29 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 30 | |
| 31 | $IP = getenv( 'MW_INSTALL_PATH' ); |
| 32 | if ( $IP === false ) { |
| 33 | $IP = __DIR__ . '/../../..'; |
| 34 | } |
| 35 | require_once "$IP/maintenance/Maintenance.php"; |
| 36 | |
| 37 | class UpdateSecondaryTables extends Maintenance { |
| 38 | |
| 39 | private IConnectionProvider $dbProvider; |
| 40 | |
| 41 | /** |
| 42 | * @inheritDoc |
| 43 | */ |
| 44 | public function __construct() { |
| 45 | parent::__construct(); |
| 46 | $this->requireExtension( 'WikiLambda' ); |
| 47 | $this->addDescription( "Updates WikiLambda secondary tables for each ZObject" . |
| 48 | " of the given zType. By default, sleeps 5 seconds after the creation of each update." ); |
| 49 | |
| 50 | $this->addOption( |
| 51 | 'all', |
| 52 | 'Updates all stored ZObjects', |
| 53 | false, |
| 54 | false |
| 55 | ); |
| 56 | $this->addOption( |
| 57 | 'zType', |
| 58 | 'Updates will be triggered for each ZObject of this type (a ZID)', |
| 59 | false, |
| 60 | true |
| 61 | ); |
| 62 | $this->addOption( |
| 63 | 'verbose', |
| 64 | "Whether to print the ZID of each ZObject for which updating is done (default: false)", |
| 65 | false, |
| 66 | false |
| 67 | ); |
| 68 | $this->addOption( |
| 69 | 'report', |
| 70 | "Whether to report table info (number of rows, highest autoincrement column value)" . |
| 71 | "\n\tbefore and after the updates (default: false)", |
| 72 | false, |
| 73 | false |
| 74 | ); |
| 75 | $this->addOption( |
| 76 | 'dryRun', |
| 77 | 'Whether to just dry-run, without actually making changes (default: false)', |
| 78 | false, |
| 79 | false |
| 80 | ); |
| 81 | $this->addOption( |
| 82 | 'cache', |
| 83 | 'Whether to try to stash the ZObject in the function-orchestrator\'s cache (default: false)', |
| 84 | false, |
| 85 | false |
| 86 | ); |
| 87 | $this->addOption( |
| 88 | 'quick', |
| 89 | 'Do not sleep 5 seconds after the creation of each update (default: false)', |
| 90 | false, |
| 91 | false |
| 92 | ); |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * @inheritDoc |
| 97 | * |
| 98 | * Note there is a function updateSecondaryTables in RepoHooks.php that provides similar |
| 99 | * functionality (and with code that duplicates part of this, which could not |
| 100 | * easily be avoided). |
| 101 | */ |
| 102 | public function execute() { |
| 103 | $services = $this->getServiceContainer(); |
| 104 | $this->dbProvider = $services->getConnectionProvider(); |
| 105 | // Build ZObjectStore and ZObject caches, because ServiceWiring hasn't run |
| 106 | $zObjectStore = WikiLambdaServices::buildZObjectStore( $services ); |
| 107 | $zObjectCaches = WikiLambdaServices::buildMemcachedWrapper( $services ); |
| 108 | $config = $services->getMainConfig(); |
| 109 | $handler = new ZObjectContentHandler( CONTENT_MODEL_ZOBJECT, $config, $zObjectStore, $zObjectCaches ); |
| 110 | |
| 111 | $all = $this->getOption( 'all' ); |
| 112 | $zType = $this->getOption( 'zType' ); |
| 113 | $verbose = $this->getOption( 'verbose' ); |
| 114 | $report = $this->getOption( 'report' ); |
| 115 | $dryRun = $this->getOption( 'dryRun' ); |
| 116 | $cache = $this->getOption( 'cache' ); |
| 117 | $quick = $this->getOption( 'quick' ); |
| 118 | |
| 119 | if ( $all && $zType ) { |
| 120 | $this->fatalError( 'The flags "--all" and "--zType <ZID>" should be mutually exclusive:' . "\n" |
| 121 | . 'Use "--all" to update all existing ZObjects.' . "\n" |
| 122 | . 'Use "--zType <ZID>" to update all ZObjects of the given type.' ); |
| 123 | } |
| 124 | |
| 125 | if ( !$all && !$zType ) { |
| 126 | $this->fatalError( 'The script must be called with at least one option, "--all" or "--zType <ZID>:' . "\n" |
| 127 | . 'Use "--all" to update all existing ZObjects.' . "\n" |
| 128 | . 'Use "--zType <ZID>" to update all ZObjects of the given type.' ); |
| 129 | } |
| 130 | |
| 131 | if ( $report ) { |
| 132 | $this->output( "[Number of rows, highest autoincrement column value] before updates:\n" ); |
| 133 | $this->reportTableInfo(); |
| 134 | $this->output( "\n" ); |
| 135 | } |
| 136 | |
| 137 | if ( $all ) { |
| 138 | $targets = $zObjectStore->fetchAllZids(); |
| 139 | } else { |
| 140 | $targets = $zObjectStore->fetchZidsOfType( $zType ); |
| 141 | } |
| 142 | |
| 143 | if ( count( $targets ) === 0 ) { |
| 144 | $this->output( "No ZObjects for which secondary tables need updating\n" ); |
| 145 | return; |
| 146 | } |
| 147 | |
| 148 | if ( $dryRun ) { |
| 149 | $this->output( "Would have updated" ); |
| 150 | } else { |
| 151 | $this->output( "Updating" ); |
| 152 | } |
| 153 | $this->output( " secondary tables for " . count( $targets ) . " ZObjects\n" ); |
| 154 | |
| 155 | // By default, we do not update the orchestrator cache, as we're in a maintenance script and might over-whelm |
| 156 | $orchestrator = null; |
| 157 | if ( $cache ) { |
| 158 | if ( $config->get( 'WikiLambdaPersistBackendCache' ) ) { |
| 159 | $this->output( "Sending cache updates to the function-orchestrator.\n" ); |
| 160 | |
| 161 | $orchestratorHost = $config->get( 'WikiLambdaOrchestratorLocation' ); |
| 162 | $client = new Client( [ "base_uri" => $orchestratorHost ] ); |
| 163 | $orchestrator = new OrchestratorRequest( $client ); |
| 164 | } else { |
| 165 | $this->output( "ERROR: Cannot send cache updates to the function-orchestrator as it is disabled.\n" ); |
| 166 | $cache = false; |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | $offset = 0; |
| 171 | $queryLimit = 10; |
| 172 | do { |
| 173 | $contents = $zObjectStore->fetchBatchZObjects( array_slice( $targets, $offset, |
| 174 | $queryLimit ) ); |
| 175 | $offset += $queryLimit; |
| 176 | |
| 177 | foreach ( $contents as $zid => $persistentObject ) { |
| 178 | if ( $verbose ) { |
| 179 | $this->output( " $zid\n" ); |
| 180 | } |
| 181 | if ( $dryRun ) { |
| 182 | continue; |
| 183 | } |
| 184 | $title = Title::newFromText( $zid, NS_MAIN ); |
| 185 | $data = json_encode( $persistentObject->getSerialized() ); |
| 186 | $content = $handler::makeContent( $data, $title ); |
| 187 | $update = new ZObjectSecondaryDataUpdate( |
| 188 | $title, |
| 189 | $content, |
| 190 | $zObjectStore, |
| 191 | $zObjectCaches, |
| 192 | $orchestrator |
| 193 | ); |
| 194 | DeferredUpdates::addUpdate( $update ); |
| 195 | if ( !$quick ) { |
| 196 | sleep( 5 ); |
| 197 | } |
| 198 | } |
| 199 | if ( $verbose ) { |
| 200 | $this->output( "\n" ); |
| 201 | } |
| 202 | |
| 203 | } while ( count( $targets ) - $offset > 0 ); |
| 204 | |
| 205 | if ( $report ) { |
| 206 | // Make sure the updates have happened before reporting table info |
| 207 | DeferredUpdates::doUpdates(); |
| 208 | $this->output( "\n[Number of rows, highest autoincrement column value] after updates:\n" ); |
| 209 | $this->reportTableInfo(); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | /** |
| 214 | * Print [Number of rows, highest autoincrement column value] for each secondary table |
| 215 | */ |
| 216 | private function reportTableInfo() { |
| 217 | $this->printTableInfo( 'wikilambda_zobject_join', 'wlzo_id' ); |
| 218 | $this->printTableInfo( 'wikilambda_zlanguages', 'wlzlangs_id' ); |
| 219 | $this->printTableInfo( 'wikilambda_zobject_function_join', 'wlzf_id' ); |
| 220 | $this->printTableInfo( 'wikilambda_zobject_label_conflicts', 'wlzlc_id' ); |
| 221 | $this->printTableInfo( 'wikilambda_zobject_labels', 'wlzl_id' ); |
| 222 | $this->printTableInfo( 'wikilambda_ztester_results', 'wlztr_id' ); |
| 223 | } |
| 224 | |
| 225 | private function printTableInfo( string $tableName, string $columnName ) { |
| 226 | $tableInfo = $this->getTableInfo( $tableName, $columnName ); |
| 227 | $this->output( " $tableName: [$tableInfo[0], $tableInfo[1]]\n" ); |
| 228 | } |
| 229 | |
| 230 | private function getTableInfo( string $tableName, string $columnName ): array { |
| 231 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 232 | |
| 233 | $res = $dbr->newSelectQueryBuilder() |
| 234 | ->select( [ $columnName ] ) |
| 235 | ->from( $tableName ) |
| 236 | ->orderBy( $columnName, SelectQueryBuilder::SORT_DESC ) |
| 237 | ->caller( __METHOD__ ) |
| 238 | ->fetchResultSet(); |
| 239 | |
| 240 | $size = $res->numRows(); |
| 241 | $highest = 'none'; |
| 242 | if ( $size > 0 ) { |
| 243 | $highest = $res->fetchRow()[$columnName]; |
| 244 | } |
| 245 | |
| 246 | return [ $size, $highest ]; |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | $maintClass = UpdateSecondaryTables::class; |
| 251 | require_once RUN_MAINTENANCE_IF_MAIN; |