MediaWiki REL1_34
syncFileBackend.php
Go to the documentation of this file.
1<?php
24require_once __DIR__ . '/Maintenance.php';
25
33 public function __construct() {
34 parent::__construct();
35 $this->addDescription( 'Sync one file backend with another using the journal' );
36 $this->addOption( 'src', 'Name of backend to sync from', true, true );
37 $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
38 $this->addOption( 'start', 'Starting journal ID', false, true );
39 $this->addOption( 'end', 'Ending journal ID', false, true );
40 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
41 $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
42 $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
43 $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true );
44 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
45 $this->setBatchSize( 50 );
46 }
47
48 public function execute() {
49 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
50
51 $posDir = $this->getOption( 'posdir' );
52 if ( $posDir != '' ) {
53 $posFile = "$posDir/" . rawurlencode( $src->getDomainId() );
54 } else {
55 $posFile = false;
56 }
57
58 if ( $this->hasOption( 'posdump' ) ) {
59 // Just dump the current position into the specified position dir
60 if ( !$this->hasOption( 'posdir' ) ) {
61 $this->fatalError( "Param posdir required!" );
62 }
63 if ( $this->hasOption( 'postime' ) ) {
64 $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
65 $this->output( "Requested journal position is $id.\n" );
66 } else {
67 $id = (int)$src->getJournal()->getCurrentPosition();
68 $this->output( "Current journal position is $id.\n" );
69 }
70 if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
71 $this->output( "Saved journal position file.\n" );
72 } else {
73 $this->output( "Could not save journal position file.\n" );
74 }
75 if ( $this->isQuiet() ) {
76 print $id; // give a single machine-readable number
77 }
78
79 return;
80 }
81
82 if ( !$this->hasOption( 'dst' ) ) {
83 $this->fatalError( "Param dst required!" );
84 }
85 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
86
87 $start = $this->getOption( 'start', 0 );
88 if ( !$start && $posFile && is_dir( $posDir ) ) {
89 $start = is_file( $posFile )
90 ? (int)trim( file_get_contents( $posFile ) )
91 : 0;
92 ++$start; // we already did this ID, start with the next one
93 $startFromPosFile = true;
94 } else {
95 $startFromPosFile = false;
96 }
97
98 if ( $this->hasOption( 'backoff' ) ) {
99 $time = time() - $this->getOption( 'backoff', 0 );
100 $end = (int)$src->getJournal()->getPositionAtTime( $time );
101 } else {
102 $end = $this->getOption( 'end', INF );
103 }
104
105 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
106 $this->output( "Starting journal position is $start.\n" );
107 if ( is_finite( $end ) ) {
108 $this->output( "Ending journal position is $end.\n" );
109 }
110
111 // Periodically update the position file
112 $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) {
113 if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
114 file_put_contents( $posFile, $pos, LOCK_EX );
115 }
116 };
117
118 // Actually sync the dest backend with the reference backend
119 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
120
121 // Update the sync position file
122 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
123 if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
124 $this->output( "Updated journal position file.\n" );
125 } else {
126 $this->output( "Could not update journal position file.\n" );
127 }
128 }
129
130 if ( $lastOKPos === false ) {
131 if ( !$start ) {
132 $this->output( "No journal entries found.\n" );
133 } else {
134 $this->output( "No new journal entries found.\n" );
135 }
136 } else {
137 $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
138 }
139
140 if ( $this->isQuiet() ) {
141 print $lastOKPos; // give a single machine-readable number
142 }
143 }
144
156 protected function syncBackends(
157 FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
158 ) {
159 $lastOKPos = 0; // failed
160 $first = true; // first batch
161
162 if ( $start > $end ) { // sanity
163 $this->fatalError( "Error: given starting ID greater than ending ID." );
164 }
165
166 $next = null;
167 do {
168 $limit = min( $this->getBatchSize(), $end - $start + 1 ); // don't go pass ending ID
169 $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
170
171 $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
172 $start = $next; // start where we left off next time
173 if ( $first && !count( $entries ) ) {
174 return false; // nothing to do
175 }
176 $first = false;
177
178 $lastPosInBatch = 0;
179 $pathsInBatch = []; // changed paths
180 foreach ( $entries as $entry ) {
181 if ( $entry['op'] !== 'null' ) { // null ops are just for reference
182 $pathsInBatch[$entry['path']] = 1; // remove duplicates
183 }
184 $lastPosInBatch = $entry['id'];
185 }
186
187 $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
188 if ( $status->isOK() ) {
189 $lastOKPos = max( $lastOKPos, $lastPosInBatch );
190 $callback( $lastOKPos ); // update position file
191 } else {
192 $this->error( print_r( $status->getErrorsArray(), true ) );
193 break; // no gaps; everything up to $lastPos must be OK
194 }
195
196 if ( !$start ) {
197 $this->output( "End of journal entries.\n" );
198 }
199 } while ( $start && $start <= $end );
200
201 return $lastOKPos;
202 }
203
212 protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
213 $status = Status::newGood();
214 if ( !count( $paths ) ) {
215 return $status; // nothing to do
216 }
217
218 // Source: convert internal backend names (FileBackendMultiWrite) to the public one
219 $sPaths = $this->replaceNamePaths( $paths, $src );
220 // Destination: get corresponding path name
221 $dPaths = $this->replaceNamePaths( $paths, $dst );
222
223 // Lock the live backend paths from modification
224 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
225 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
226 if ( !$status->isOK() ) {
227 return $status;
228 }
229
230 $src->preloadFileStat( [ 'srcs' => $sPaths, 'latest' => 1 ] );
231 $dst->preloadFileStat( [ 'srcs' => $dPaths, 'latest' => 1 ] );
232
233 $ops = [];
234 $fsFiles = [];
235 foreach ( $sPaths as $i => $sPath ) {
236 $dPath = $dPaths[$i]; // destination
237 $sExists = $src->fileExists( [ 'src' => $sPath, 'latest' => 1 ] );
238 if ( $sExists === true ) { // exists in source
239 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
240 continue; // avoid local copies for non-FS backends
241 }
242 // Note: getLocalReference() is fast for FS backends
243 $fsFile = $src->getLocalReference( [ 'src' => $sPath, 'latest' => 1 ] );
244 if ( !$fsFile ) {
245 $this->error( "Unable to sync '$dPath': could not get local copy." );
246 $status->fatal( 'backend-fail-internal', $src->getName() );
247
248 return $status;
249 }
250 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
251 // Note: prepare() is usually fast for key/value backends
252 $status->merge( $dst->prepare( [
253 'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ] ) );
254 if ( !$status->isOK() ) {
255 return $status;
256 }
257 $ops[] = [ 'op' => 'store',
258 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 ];
259 } elseif ( $sExists === false ) { // does not exist in source
260 $ops[] = [ 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 ];
261 } else {
262 $this->error( "Unable to sync '$dPath': could not stat file." );
263 $status->fatal( 'backend-fail-internal', $src->getName() );
264
265 return $status;
266 }
267 }
268
269 $t_start = microtime( true );
270 $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
271 if ( !$status->isOK() ) {
272 sleep( 10 ); // wait and retry copy again
273 $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
274 }
275 $elapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
276 if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
277 $this->output( "Synchronized these file(s) [{$elapsed_ms}ms]:\n" .
278 implode( "\n", $dPaths ) . "\n" );
279 }
280
281 return $status;
282 }
283
291 protected function replaceNamePaths( $paths, FileBackend $backend ) {
292 return preg_replace(
293 '!^mwstore://([^/]+)!',
294 StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
295 $paths // string or array
296 );
297 }
298
299 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
300 return (
301 ( $src->getFileSize( [ 'src' => $sPath ] )
302 === $dst->getFileSize( [ 'src' => $dPath ] ) // short-circuit
303 ) && ( $src->getFileSha1Base36( [ 'src' => $sPath ] )
304 === $dst->getFileSha1Base36( [ 'src' => $dPath ] )
305 )
306 );
307 }
308}
309
310$maintClass = SyncFileBackend::class;
311require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
Base class for all file backend classes (including multi-write backends).
preloadFileStat(array $params)
Preload file stat information (concurrently if possible) into in-process cache.
getFileSha1Base36(array $params)
Get a SHA-1 hash of the content of the file at a storage path in the backend.
fileExists(array $params)
Check if a file exists at a storage path in the backend.
getScopedFileLocks(array $paths, $type, StatusValue $status, $timeout=0)
Lock the files at the given storage paths in the backend.
getFileSize(array $params)
Get the size (bytes) of a file at a storage path in the backend.
prepare(array $params)
Prepare a storage directory for usage.
doQuickOperations(array $ops, array $opts=[])
Perform a set of independent file operations on some files.
getLocalReference(array $params)
Returns a file system file, identical in content to the file at a storage path.
getName()
Get the unique backend name.
getJournal()
Get the file journal object for this backend.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Maintenance script that syncs one file backend to another based on the journal of later.
syncBackends(FileBackend $src, FileBackend $dst, $start, $end, Closure $callback)
Sync $dst backend to $src backend based on the $src logs given after $start.
__construct()
Default constructor.
syncFileBatch(array $paths, FileBackend $src, FileBackend $dst)
Sync particular files of backend $src to the corresponding $dst backend files.
filesAreSame(FileBackend $src, FileBackend $dst, $sPath, $dPath)
execute()
Do the actual work.
replaceNamePaths( $paths, FileBackend $backend)
Substitute the backend name of storage paths with that of a given one.
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64