MediaWiki  master
resolveStubs.php
Go to the documentation of this file.
1 <?php
27 
28 require_once __DIR__ . '/../Maintenance.php';
29 
30 class ResolveStubs extends Maintenance {
32  private $undoLog;
33 
34  public function __construct() {
35  parent::__construct();
36  $this->setBatchSize( 1000 );
37  $this->addOption( 'dry-run', 'Don\'t update any rows' );
38  $this->addOption( 'undo', 'Undo log location', false, true );
39  }
40 
45  public function execute() {
46  $dbw = $this->getDB( DB_PRIMARY );
47  $dbr = $this->getDB( DB_REPLICA );
48  $maxID = $dbr->newSelectQueryBuilder()
49  ->select( 'MAX(old_id)' )
50  ->from( 'text' )
51  ->caller( __METHOD__ )->fetchField();
52  $blockSize = $this->getBatchSize();
53  $dryRun = $this->getOption( 'dry-run' );
54  $this->setUndoLog( new UndoLog( $this->getOption( 'undo' ), $dbw ) );
55 
56  $numBlocks = intval( $maxID / $blockSize ) + 1;
57  $numResolved = 0;
58  $numTotal = 0;
59  $lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
60 
61  for ( $b = 0; $b < $numBlocks; $b++ ) {
62  $lbFactory->waitForReplication();
63 
64  $this->output( sprintf( "%5.2f%%\n", $b / $numBlocks * 100 ) );
65  $start = $blockSize * $b + 1;
66  $end = $blockSize * ( $b + 1 );
67 
68  $res = $dbr->newSelectQueryBuilder()
69  ->select( [ 'old_id', 'old_text', 'old_flags' ] )
70  ->from( 'text' )
71  ->where(
72  "old_id>=$start AND old_id<=$end " .
73  "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " .
74  // LOWER() doesn't work on binary text, need to convert
75  'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\''
76  )
77  ->caller( __METHOD__ )->fetchResultSet();
78  foreach ( $res as $row ) {
79  $numResolved += $this->resolveStub( $row, $dryRun ) ? 1 : 0;
80  $numTotal++;
81  }
82  }
83  $this->output( "100%\n" );
84  $this->output( "$numResolved of $numTotal stubs resolved\n" );
85  }
86 
90  public function setUndoLog( UndoLog $undoLog ) {
91  $this->undoLog = $undoLog;
92  }
93 
103  public function resolveStub( $row, $dryRun ) {
104  $id = $row->old_id;
105  $stub = unserialize( $row->old_text );
106  $flags = SqlBlobStore::explodeFlags( $row->old_flags );
107 
108  $dbr = $this->getDB( DB_REPLICA );
109 
110  if ( !( $stub instanceof HistoryBlobStub ) ) {
111  print "Error at old_id $id: found object of class " . get_class( $stub ) .
112  ", expecting HistoryBlobStub\n";
113  return false;
114  }
115 
116  $mainId = $stub->getLocation();
117  if ( !$mainId ) {
118  print "Error at old_id $id: falsey location\n";
119  return false;
120  }
121 
122  # Get the main text row
123  $mainTextRow = $dbr->newSelectQueryBuilder()
124  ->select( [ 'old_text', 'old_flags' ] )
125  ->from( 'text' )
126  ->where( [ 'old_id' => $mainId ] )
127  ->caller( __METHOD__ )->fetchRow();
128 
129  if ( !$mainTextRow ) {
130  print "Error at old_id $id: can't find main text row old_id $mainId\n";
131  return false;
132  }
133 
134  $mainFlags = SqlBlobStore::explodeFlags( $mainTextRow->old_flags );
135  $mainText = $mainTextRow->old_text;
136 
137  if ( !in_array( 'external', $mainFlags ) ) {
138  print "Error at old_id $id: target $mainId is not external\n";
139  return false;
140  }
141  if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9a-f]{32}$!', $mainText ) ) {
142  print "Error at old_id $id: target $mainId is a CGZ pointer\n";
143  return false;
144  }
145  if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9]{1,6}$!', $mainText ) ) {
146  print "Error at old_id $id: target $mainId is a DHB pointer\n";
147  return false;
148  }
149  if ( !preg_match( '!^DB://([^/]*)/([^/]*)$!', $mainText ) ) {
150  print "Error at old_id $id: target $mainId has unrecognised text\n";
151  return false;
152  }
153 
154  # Preserve the legacy encoding flag, but switch from object to external
155  if ( in_array( 'utf-8', $flags ) ) {
156  $newFlags = 'utf-8,external';
157  } else {
158  $newFlags = 'external';
159  }
160  $newText = $mainText . '/' . $stub->getHash();
161 
162  # Update the row
163  if ( $dryRun ) {
164  $this->output( "Resolve $id => $newFlags $newText\n" );
165  } else {
166  $updated = $this->undoLog->update(
167  'text',
168  [
169  'old_flags' => $newFlags,
170  'old_text' => $newText
171  ],
172  (array)$row,
173  __METHOD__
174  );
175  if ( !$updated ) {
176  $this->output( "Updated of old_id $id failed to match\n" );
177  return false;
178  }
179  }
180  return true;
181  }
182 }
183 
184 $maintClass = ResolveStubs::class;
185 require_once RUN_MAINTENANCE_IF_MAIN;
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Update a database while optionally writing SQL that reverses the update to a file.
Definition: UndoLog.php:11
Service for storing and loading Content objects representing revision data blobs.
setUndoLog(UndoLog $undoLog)
resolveStub( $row, $dryRun)
Resolve a history stub.
__construct()
Default constructor.
execute()
Convert history stubs that point to an external row to direct external pointers.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28
$maintClass