43 parent::__construct();
46 $this->
addDescription(
'Find and mark bad content blobs. Marked blobs will be read as empty. '
47 .
'Use --scan-from to find revisions with bad blobs, use --mark to mark them.' );
48 $this->
addOption(
'scan-from',
'Start scanning revisions at the given date. '
49 .
'Format: Anything supported by MediaWiki, e.g. YYYYMMDDHHMMSS or YYYY-MM-DDTHH:MM:SS',
51 $this->
addOption(
'revisions',
'A list of revision IDs to process, separated by comma or '
52 .
'colon or whitespace. Revisions belonging to deleted pages will work. '
53 .
'If set to "-" IDs are read from stdin, one per line.',
false,
true );
54 $this->
addOption(
'limit',
'Maximum number of revisions for --scan-from to scan. '
55 .
'Default: 1000',
false,
true );
56 $this->
addOption(
'mark',
'Mark the blob as "known bad", to avoid errors when '
57 .
'attempting to read it. The value given is the reason for marking the blob as bad, '
58 .
'typically a ticket ID. Requires --revisions to also be set.',
false,
true );
64 private function getStartTimestamp() {
66 if ( strlen( $tsOpt ) < 14 ) {
68 .
', please provide time and date down to the second.' );
73 $this->
fatalError(
'Bad timestamp: ' . $tsOpt );
82 private function getRevisionIds() {
86 $opt = stream_get_contents( STDIN );
101 $this->revisionStore = $services->getRevisionStore();
102 $this->blobStore = $services->getBlobStore();
107 $this->
fatalError(
'Cannot use --revisions together with --scan-from' );
110 $ids = $this->getRevisionIds();
112 $count = $this->scanRevisionsById( $ids );
113 } elseif ( $this->
hasOption(
'scan-from' ) ) {
115 $this->
fatalError(
'Cannot use --mark with --scan-from, '
116 .
'use --revisions to specify revisions to mark.' );
119 $fromTimestamp = $this->getStartTimestamp();
120 $total = $this->
getOption(
'limit', 1000 );
122 $count = $this->scanRevisionsByTimestamp( $fromTimestamp, $total );
124 $this->
output(
"The range of archive rows scanned is based on the range of revision IDs "
125 .
"scanned in the revision table.\n" );
128 $this->
fatalError(
'The --mark must be used together with --revisions' );
130 $this->
fatalError(
'Must specify one of --revisions or --scan-from' );
135 $this->
output(
"Marked $count bad revisions.\n" );
137 $this->
output(
"Found $count bad revisions.\n" );
140 $this->
output(
"On a unix/linux environment, you can use grep and cut to list of IDs\n" );
141 $this->
output(
"that can then be used with the --revisions option. E.g.\n" );
142 $this->
output(
" grep '! Found bad blob' | cut -s -f 3\n" );
153 private function scanRevisionsByTimestamp( $fromTimestamp, $total ) {
157 $lastTimestamp = $fromTimestamp;
158 $revisionRowsScanned = 0;
159 $archiveRowsScanned = 0;
161 $this->
output(
"Scanning revisions table, "
162 .
"$total rows starting at rev_timestamp $fromTimestamp\n" );
164 while ( $revisionRowsScanned < $total ) {
165 $batchSize = min( $total - $revisionRowsScanned, $this->
getBatchSize() );
166 $revisions = $this->loadRevisionsByTimestamp( $lastRevId, $lastTimestamp, $batchSize );
171 foreach ( $revisions as $rev ) {
173 $firstRevId = $firstRevId ? min( $firstRevId, $rev->getId() ) : $rev->getId();
174 $lastRevId = max( $lastRevId, $rev->getId() );
176 $count += $this->checkRevision( $rev );
179 $lastTimestamp = $rev->getTimestamp();
180 $batchSize = count( $revisions );
181 $revisionRowsScanned += $batchSize;
183 "\t- Scanned a batch of $batchSize revisions, "
184 .
"up to revision $lastRevId ($lastTimestamp)\n"
195 $fromArchived = $this->getNextRevision( $firstRevId,
'<',
'DESC' );
196 $maxArchived = $this->getNextRevision( $lastRevId,
'>',
'ASC' );
197 $maxArchived = $maxArchived ?: PHP_INT_MAX;
199 $this->
output(
"Scanning archive table by ar_rev_id, $fromArchived to $maxArchived\n" );
200 while ( $firstRevId > 0 && $fromArchived < $maxArchived ) {
201 $batchSize = min( $total - $archiveRowsScanned, $this->
getBatchSize() );
202 $revisions = $this->loadArchiveByRevisionId( $fromArchived, $maxArchived, $batchSize );
207 foreach ( $revisions as $rev ) {
208 $count += $this->checkRevision( $rev );
210 $fromArchived = $rev->getId();
211 $batchSize = count( $revisions );
212 $archiveRowsScanned += $batchSize;
214 "\t- Scanned a batch of $batchSize archived revisions, "
215 .
"up to revision $fromArchived ($lastTimestamp)\n"
231 private function loadRevisionsByTimestamp(
int $afterId,
string $fromTimestamp, $batchSize ) {
233 $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $db );
234 $rows = $queryBuilder->joinComment()
235 ->where( $db->buildComparison(
'>', [
236 'rev_timestamp' => $fromTimestamp,
237 'rev_id' => $afterId,
239 ->useIndex( [
'revision' =>
'rev_timestamp' ] )
240 ->orderBy( [
'rev_timestamp',
'rev_id' ] )
241 ->limit( $batchSize )
242 ->caller( __METHOD__ )->fetchResultSet();
243 $result = $this->revisionStore->newRevisionsFromBatch( $rows, [
'slots' =>
true ] );
244 $this->handleStatus( $result );
246 $records = array_filter( $result->value );
248 '@phan-var RevisionStoreRecord[] $records';
259 private function loadArchiveByRevisionId(
int $afterId,
int $uptoId, $batchSize ) {
261 $rows = $this->revisionStore->newArchiveSelectQueryBuilder( $db )
263 ->where( [
"ar_rev_id > $afterId",
"ar_rev_id <= $uptoId" ] )
264 ->orderBy(
'ar_rev_id' )
265 ->limit( $batchSize )
266 ->caller( __METHOD__ )->fetchResultSet();
267 $result = $this->revisionStore->newRevisionsFromBatch(
269 [
'archive' =>
true,
'slots' =>
true ]
271 $this->handleStatus( $result );
273 $records = array_filter( $result->value );
275 '@phan-var RevisionArchiveRecord[] $records';
288 private function getNextRevision(
int $revId,
string $comp,
string $dir ) {
290 $next = $db->newSelectQueryBuilder()
293 ->where(
"rev_id $comp $revId" )
294 ->orderBy( [
"rev_id" ], $dir )
295 ->caller( __METHOD__ )
305 private function scanRevisionsById( array $ids ) {
307 $total = count( $ids );
309 $this->
output(
"Scanning $total ids\n" );
311 foreach ( array_chunk( $ids, $this->
getBatchSize() ) as $batch ) {
312 $revisions = $this->loadRevisionsById( $batch );
319 foreach ( $revisions as $rev ) {
320 $count += $this->checkRevision( $rev );
323 $batchSize = count( $revisions );
324 $this->
output(
"\t- Scanned a batch of $batchSize revisions\n" );
335 private function loadRevisionsById( array $ids ) {
337 $queryBuilder = $this->revisionStore->newSelectQueryBuilder( $db );
339 $rows = $queryBuilder
341 ->where( [
'rev_id' => $ids ] )
342 ->caller( __METHOD__ )->fetchResultSet();
344 $result = $this->revisionStore->newRevisionsFromBatch( $rows, [
'slots' =>
true ] );
346 $this->handleStatus( $result );
348 $revisions = array_filter( $result->value );
349 '@phan-var RevisionArchiveRecord[] $revisions';
352 if ( count( $revisions ) < count( $ids ) ) {
353 $rows = $this->revisionStore->newArchiveSelectQueryBuilder( $db )
355 ->where( [
'ar_rev_id' => array_diff( $ids, array_keys( $revisions ) ) ] )
356 ->caller( __METHOD__ )->fetchResultSet();
358 $archiveResult = $this->revisionStore->newRevisionsFromBatch(
360 [
'slots' =>
true,
'archive' =>
true ]
363 $this->handleStatus( $archiveResult );
366 $revisions += array_filter( $archiveResult->value );
379 foreach ( $rev->
getSlots()->getSlots() as $slot ) {
380 $count += $this->checkSlot( $rev, $slot );
383 if ( $count === 0 && $this->
hasOption(
'mark' ) ) {
384 $this->
output(
"\t# No bad blob found on revision {$rev->getId()}, skipped!\n" );
400 $this->blobStore->getBlob( $address );
403 }
catch ( Exception $ex ) {
404 $error = $ex->getMessage();
405 $type = get_class( $ex );
410 $this->
output(
"\t! Found bad blob on revision {$rev->getId()} "
411 .
"from {$rev->getTimestamp()} ({$slot->getRole()} slot): "
412 .
"content_id={$slot->getContentId()}, address=<{$slot->getAddress()}>, "
413 .
"error='$error', type='$type'. ID:\t{$rev->getId()}\n" );
416 $newAddress = $this->markBlob( $slot, $error );
417 $this->
output(
"\tChanged address to <$newAddress>\n" );
429 private function markBlob(
SlotRecord $slot,
string $error =
null ) {
433 $args[
'reason'] = $this->
getOption(
'mark' );
437 $args[
'error'] = $error;
441 $badAddress =
'bad:' . urlencode( $address );
447 $badAddress = substr( $badAddress, 0, 255 );
450 $dbw->newUpdateQueryBuilder()
451 ->update(
'content' )
452 ->set( [
'content_address' => $badAddress ] )
454 ->caller( __METHOD__ )->execute();
459 private function handleStatus(
StatusValue $status ) {
460 if ( !$status->
isOK() ) {
462 Status::wrap( $status )->getMessage(
false,
false,
'en' )->text()
465 if ( !$status->
isGood() ) {
467 "\t! " . Status::wrap( $status )->getMessage(
false,
false,
'en' )->text()
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.