Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
64.76% |
147 / 227 |
|
46.67% |
7 / 15 |
CRAP | |
0.00% |
0 / 1 |
UploadFromChunks | |
64.76% |
147 / 227 |
|
46.67% |
7 / 15 |
88.62 | |
0.00% |
0 / 1 |
__construct | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
tryStashFile | |
50.00% |
2 / 4 |
|
0.00% |
0 / 1 |
2.50 | |||
doStashFile | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
continueChunks | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
2.00 | |||
concatenateChunks | |
56.70% |
55 / 97 |
|
0.00% |
0 / 1 |
13.20 | |||
getVirtualChunkLocation | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
addChunk | |
52.00% |
13 / 25 |
|
0.00% |
0 / 1 |
7.76 | |||
updateChunkStatus | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
1 | |||
getChunkStatus | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getChunkIndex | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getOffset | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
outputChunk | |
46.15% |
6 / 13 |
|
0.00% |
0 / 1 |
2.62 | |||
getChunkFileKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
verifyChunk | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
2.01 | |||
logFileBackendStatus | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | use MediaWiki\Deferred\AutoCommitUpdate; |
4 | use MediaWiki\Deferred\DeferredUpdates; |
5 | use MediaWiki\FileRepo\FileRepo; |
6 | use MediaWiki\FileRepo\LocalRepo; |
7 | use MediaWiki\Logger\LoggerFactory; |
8 | use MediaWiki\MediaWikiServices; |
9 | use MediaWiki\Request\WebRequestUpload; |
10 | use MediaWiki\Status\Status; |
11 | use MediaWiki\User\User; |
12 | use Psr\Log\LoggerInterface; |
13 | use Wikimedia\FileBackend\FileBackend; |
14 | |
15 | /** |
16 | * Backend for uploading files from chunks. |
17 | * |
18 | * This program is free software; you can redistribute it and/or modify |
19 | * it under the terms of the GNU General Public License as published by |
20 | * the Free Software Foundation; either version 2 of the License, or |
21 | * (at your option) any later version. |
22 | * |
23 | * This program is distributed in the hope that it will be useful, |
24 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26 | * GNU General Public License for more details. |
27 | * |
28 | * You should have received a copy of the GNU General Public License along |
29 | * with this program; if not, write to the Free Software Foundation, Inc., |
30 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
31 | * http://www.gnu.org/copyleft/gpl.html |
32 | * |
33 | * @file |
34 | * @ingroup Upload |
35 | */ |
36 | |
37 | /** |
38 | * Implements uploading from chunks |
39 | * |
40 | * @ingroup Upload |
41 | * @author Michael Dale |
42 | */ |
43 | class UploadFromChunks extends UploadFromFile { |
44 | /** @var LocalRepo */ |
45 | private $repo; |
46 | /** @var UploadStash */ |
47 | public $stash; |
48 | /** @var User */ |
49 | public $user; |
50 | |
51 | /** @var int|null */ |
52 | protected $mOffset; |
53 | /** @var int|null */ |
54 | protected $mChunkIndex; |
55 | /** @var string */ |
56 | protected $mFileKey; |
57 | /** @var string|null */ |
58 | protected $mVirtualTempPath; |
59 | |
60 | private LoggerInterface $logger; |
61 | |
62 | /** @noinspection PhpMissingParentConstructorInspection */ |
63 | |
64 | /** |
65 | * Setup local pointers to stash, repo and user (similar to UploadFromStash) |
66 | * |
67 | * @param User $user |
68 | * @param UploadStash|false $stash Default: false |
69 | * @param FileRepo|false $repo Default: false |
70 | */ |
71 | public function __construct( User $user, $stash = false, $repo = false ) { |
72 | $this->user = $user; |
73 | |
74 | if ( $repo ) { |
75 | $this->repo = $repo; |
76 | } else { |
77 | $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo(); |
78 | } |
79 | |
80 | if ( $stash ) { |
81 | $this->stash = $stash; |
82 | } else { |
83 | wfDebug( __METHOD__ . " creating new UploadFromChunks instance for " . $user->getId() ); |
84 | $this->stash = new UploadStash( $this->repo, $this->user ); |
85 | } |
86 | |
87 | $this->logger = LoggerFactory::getInstance( 'upload' ); |
88 | } |
89 | |
90 | /** |
91 | * @inheritDoc |
92 | */ |
93 | public function tryStashFile( User $user, $isPartial = false ) { |
94 | try { |
95 | $this->verifyChunk(); |
96 | } catch ( UploadChunkVerificationException $e ) { |
97 | return Status::newFatal( $e->msg ); |
98 | } |
99 | |
100 | return parent::tryStashFile( $user, $isPartial ); |
101 | } |
102 | |
103 | /** |
104 | * Calls the parent doStashFile and updates the uploadsession table to handle "chunks" |
105 | * |
106 | * @param User|null $user |
107 | * @return UploadStashFile Stashed file |
108 | */ |
109 | protected function doStashFile( ?User $user = null ) { |
110 | // Stash file is the called on creating a new chunk session: |
111 | $this->mChunkIndex = 0; |
112 | $this->mOffset = 0; |
113 | |
114 | // Create a local stash target |
115 | $this->mStashFile = parent::doStashFile( $user ); |
116 | // Update the initial file offset (based on file size) |
117 | $this->mOffset = $this->mStashFile->getSize(); |
118 | $this->mFileKey = $this->mStashFile->getFileKey(); |
119 | $this->mVirtualTempPath = $this->mStashFile->getPath(); |
120 | |
121 | // Output a copy of this first to chunk 0 location: |
122 | $this->outputChunk( $this->mStashFile->getPath() ); |
123 | |
124 | // Update db table to reflect initial "chunk" state |
125 | $this->updateChunkStatus(); |
126 | |
127 | return $this->mStashFile; |
128 | } |
129 | |
130 | /** |
131 | * Continue chunk uploading |
132 | * |
133 | * @param string $name |
134 | * @param string $key |
135 | * @param WebRequestUpload $webRequestUpload |
136 | */ |
137 | public function continueChunks( $name, $key, $webRequestUpload ) { |
138 | $this->mFileKey = $key; |
139 | $this->mUpload = $webRequestUpload; |
140 | // Get the chunk status form the db: |
141 | $this->getChunkStatus(); |
142 | |
143 | $metadata = $this->stash->getMetadata( $key ); |
144 | $tempPath = $this->getRealPath( $metadata['us_path'] ); |
145 | if ( $tempPath === false ) { |
146 | throw new UploadStashBadPathException( wfMessage( 'uploadstash-bad-path' ) ); |
147 | } |
148 | $this->initializePathInfo( $name, |
149 | $tempPath, |
150 | $metadata['us_size'], |
151 | false |
152 | ); |
153 | } |
154 | |
155 | /** |
156 | * Append the final chunk and ready file for parent::performUpload() |
157 | * @return Status |
158 | */ |
159 | public function concatenateChunks() { |
160 | $oldFileKey = $this->mFileKey; |
161 | $chunkIndex = $this->getChunkIndex(); |
162 | $this->logger->debug( |
163 | __METHOD__ . ' concatenate {totalChunks} chunks: {offset} inx: {curIndex}', |
164 | [ |
165 | 'offset' => $this->getOffset(), |
166 | 'totalChunks' => $this->mChunkIndex, |
167 | 'curIndex' => $chunkIndex, |
168 | 'filekey' => $oldFileKey |
169 | ] |
170 | ); |
171 | |
172 | // Concatenate all the chunks to mVirtualTempPath |
173 | $fileList = []; |
174 | // The first chunk is stored at the mVirtualTempPath path so we start on "chunk 1" |
175 | for ( $i = 0; $i <= $chunkIndex; $i++ ) { |
176 | $fileList[] = $this->getVirtualChunkLocation( $i ); |
177 | } |
178 | |
179 | // Get the file extension from the last chunk |
180 | $ext = FileBackend::extensionFromPath( $this->mVirtualTempPath ); |
181 | // Get a 0-byte temp file to perform the concatenation at |
182 | $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() |
183 | ->newTempFSFile( 'chunkedupload_', $ext ); |
184 | $tmpPath = false; // fail in concatenate() |
185 | if ( $tmpFile ) { |
186 | // keep alive with $this |
187 | $tmpPath = $tmpFile->bind( $this )->getPath(); |
188 | } else { |
189 | $this->logger->warning( "Error getting tmp file", [ 'filekey' => $oldFileKey ] ); |
190 | } |
191 | |
192 | // Concatenate the chunks at the temp file |
193 | $tStart = microtime( true ); |
194 | $status = $this->repo->concatenate( $fileList, $tmpPath ); |
195 | $tAmount = microtime( true ) - $tStart; |
196 | if ( !$status->isOK() ) { |
197 | // This is a backend error and not user-related, so log is safe |
198 | // Upload verification further on is not safe to log server side |
199 | $this->logFileBackendStatus( |
200 | $status, |
201 | '[{type}] Error on concatenate {chunks} stashed files ({details})', |
202 | [ 'chunks' => $chunkIndex, 'filekey' => $oldFileKey ] |
203 | ); |
204 | return $status; |
205 | } else { |
206 | // Delete old chunks in deferred job. Put in deferred job because deleting |
207 | // lots of chunks can take a long time, sometimes to the point of causing |
208 | // a timeout, and we do not want that to tank the operation. Note that chunks |
209 | // are also automatically deleted after a set time by cleanupUploadStash.php |
210 | // Additionally, using AutoCommitUpdate ensures that we do not delete files |
211 | // if the main transaction is rolled back for some reason. |
212 | DeferredUpdates::addUpdate( new AutoCommitUpdate( |
213 | $this->repo->getPrimaryDB(), |
214 | __METHOD__, |
215 | function () use( $fileList, $oldFileKey ) { |
216 | $status = $this->repo->quickPurgeBatch( $fileList ); |
217 | if ( !$status->isOK() ) { |
218 | $this->logger->warning( |
219 | "Could not delete chunks of {filekey} - {status}", |
220 | [ |
221 | 'status' => (string)$status, |
222 | 'filekey' => $oldFileKey, |
223 | ] |
224 | ); |
225 | } |
226 | } |
227 | ) ); |
228 | } |
229 | |
230 | wfDebugLog( 'fileconcatenate', "Combined $i chunks in $tAmount seconds." ); |
231 | |
232 | // File system path of the actual full temp file |
233 | $this->setTempFile( $tmpPath ); |
234 | |
235 | $ret = $this->verifyUpload(); |
236 | if ( $ret['status'] !== UploadBase::OK ) { |
237 | $this->logger->info( |
238 | "Verification failed for chunked upload {filekey}", |
239 | [ |
240 | 'user' => $this->user->getName(), |
241 | 'filekey' => $oldFileKey |
242 | ] |
243 | ); |
244 | // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal |
245 | return $this->convertVerifyErrorToStatus( $ret ); |
246 | } |
247 | |
248 | // Update the mTempPath and mStashFile |
249 | // (for FileUpload or normal Stash to take over) |
250 | $tStart = microtime( true ); |
251 | // This is a re-implementation of UploadBase::tryStashFile(), we can't call it because we |
252 | // override doStashFile() with completely different functionality in this class... |
253 | $error = $this->runUploadStashFileHook( $this->user ); |
254 | if ( $error ) { |
255 | $status->fatal( ...$error ); |
256 | $this->logger->info( "Aborting stash upload due to hook - {status}", |
257 | [ |
258 | 'status' => (string)$status, |
259 | 'user' => $this->user->getName(), |
260 | 'filekey' => $this->mFileKey |
261 | ] |
262 | ); |
263 | return $status; |
264 | } |
265 | try { |
266 | $this->mStashFile = parent::doStashFile( $this->user ); |
267 | } catch ( UploadStashException $e ) { |
268 | $this->logger->warning( "Could not stash file for {user} because {error} {msg}", |
269 | [ |
270 | 'user' => $this->user->getName(), |
271 | 'error' => get_class( $e ), |
272 | 'msg' => $e->getMessage(), |
273 | 'filekey' => $this->mFileKey |
274 | ] |
275 | ); |
276 | $status->fatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() ); |
277 | return $status; |
278 | } |
279 | |
280 | $tAmount = microtime( true ) - $tStart; |
281 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable tmpFile is set when tmpPath is set here |
282 | $this->mStashFile->setLocalReference( $tmpFile ); // reuse (e.g. for getImageInfo()) |
283 | $this->logger->info( "Stashed combined ({chunks} chunks) of {oldkey} under new name {filekey}", |
284 | [ |
285 | 'chunks' => $i, |
286 | 'stashTime' => $tAmount, |
287 | 'oldpath' => $this->mVirtualTempPath, |
288 | 'filekey' => $this->mStashFile->getFileKey(), |
289 | 'oldkey' => $oldFileKey, |
290 | 'newpath' => $this->mStashFile->getPath(), |
291 | 'user' => $this->user->getName() |
292 | ] |
293 | ); |
294 | wfDebugLog( 'fileconcatenate', "Stashed combined file ($i chunks) in $tAmount seconds." ); |
295 | |
296 | return $status; |
297 | } |
298 | |
299 | /** |
300 | * Returns the virtual chunk location: |
301 | * @param int $index |
302 | * @return string |
303 | */ |
304 | private function getVirtualChunkLocation( $index ) { |
305 | return $this->repo->getVirtualUrl( 'temp' ) . |
306 | '/' . |
307 | $this->repo->getHashPath( |
308 | $this->getChunkFileKey( $index ) |
309 | ) . |
310 | $this->getChunkFileKey( $index ); |
311 | } |
312 | |
313 | /** |
314 | * Add a chunk to the temporary directory |
315 | * |
316 | * @param string $chunkPath Path to temporary chunk file |
317 | * @param int $chunkSize Size of the current chunk |
318 | * @param int $offset Offset of current chunk ( mutch match database chunk offset ) |
319 | * @return Status |
320 | */ |
321 | public function addChunk( $chunkPath, $chunkSize, $offset ) { |
322 | // Get the offset before we add the chunk to the file system |
323 | $preAppendOffset = $this->getOffset(); |
324 | |
325 | if ( $preAppendOffset + $chunkSize > $this->getMaxUploadSize() ) { |
326 | $status = Status::newFatal( 'file-too-large' ); |
327 | } else { |
328 | // Make sure the client is uploading the correct chunk with a matching offset. |
329 | if ( $preAppendOffset == $offset ) { |
330 | // Update local chunk index for the current chunk |
331 | $this->mChunkIndex++; |
332 | try { |
333 | # For some reason mTempPath is set to first part |
334 | $oldTemp = $this->mTempPath; |
335 | $this->mTempPath = $chunkPath; |
336 | $this->verifyChunk(); |
337 | $this->mTempPath = $oldTemp; |
338 | } catch ( UploadChunkVerificationException $e ) { |
339 | $this->logger->info( "Error verifying upload chunk {msg}", |
340 | [ |
341 | 'user' => $this->user->getName(), |
342 | 'msg' => $e->getMessage(), |
343 | 'chunkIndex' => $this->mChunkIndex, |
344 | 'filekey' => $this->mFileKey |
345 | ] |
346 | ); |
347 | |
348 | return Status::newFatal( $e->msg ); |
349 | } |
350 | $status = $this->outputChunk( $chunkPath ); |
351 | if ( $status->isGood() ) { |
352 | // Update local offset: |
353 | $this->mOffset = $preAppendOffset + $chunkSize; |
354 | // Update chunk table status db |
355 | $this->updateChunkStatus(); |
356 | } |
357 | } else { |
358 | $status = Status::newFatal( 'invalid-chunk-offset' ); |
359 | } |
360 | } |
361 | |
362 | return $status; |
363 | } |
364 | |
365 | /** |
366 | * Update the chunk db table with the current status: |
367 | */ |
368 | private function updateChunkStatus() { |
369 | $this->logger->info( "update chunk status for {filekey} offset: {offset} inx: {inx}", |
370 | [ |
371 | 'offset' => $this->getOffset(), |
372 | 'inx' => $this->getChunkIndex(), |
373 | 'filekey' => $this->mFileKey, |
374 | 'user' => $this->user->getName() |
375 | ] |
376 | ); |
377 | |
378 | $dbw = $this->repo->getPrimaryDB(); |
379 | $dbw->newUpdateQueryBuilder() |
380 | ->update( 'uploadstash' ) |
381 | ->set( [ |
382 | 'us_status' => 'chunks', |
383 | 'us_chunk_inx' => $this->getChunkIndex(), |
384 | 'us_size' => $this->getOffset() |
385 | ] ) |
386 | ->where( [ 'us_key' => $this->mFileKey ] ) |
387 | ->caller( __METHOD__ )->execute(); |
388 | } |
389 | |
390 | /** |
391 | * Get the chunk db state and populate update relevant local values |
392 | */ |
393 | private function getChunkStatus() { |
394 | // get primary db to avoid race conditions. |
395 | // Otherwise, if chunk upload time < replag there will be spurious errors |
396 | $dbw = $this->repo->getPrimaryDB(); |
397 | $row = $dbw->newSelectQueryBuilder() |
398 | ->select( [ 'us_chunk_inx', 'us_size', 'us_path' ] ) |
399 | ->from( 'uploadstash' ) |
400 | ->where( [ 'us_key' => $this->mFileKey ] ) |
401 | ->caller( __METHOD__ )->fetchRow(); |
402 | // Handle result: |
403 | if ( $row ) { |
404 | $this->mChunkIndex = $row->us_chunk_inx; |
405 | $this->mOffset = $row->us_size; |
406 | $this->mVirtualTempPath = $row->us_path; |
407 | } |
408 | } |
409 | |
410 | /** |
411 | * Get the current Chunk index |
412 | * @return int Index of the current chunk |
413 | */ |
414 | private function getChunkIndex() { |
415 | return $this->mChunkIndex ?? 0; |
416 | } |
417 | |
418 | /** |
419 | * Get the offset at which the next uploaded chunk will be appended to |
420 | * @return int Current byte offset of the chunk file set |
421 | */ |
422 | public function getOffset() { |
423 | return $this->mOffset ?? 0; |
424 | } |
425 | |
426 | /** |
427 | * Output the chunk to disk |
428 | * |
429 | * @param string $chunkPath |
430 | * @throws UploadChunkFileException |
431 | * @return Status |
432 | */ |
433 | private function outputChunk( $chunkPath ) { |
434 | // Key is fileKey + chunk index |
435 | $fileKey = $this->getChunkFileKey(); |
436 | |
437 | // Store the chunk per its indexed fileKey: |
438 | $hashPath = $this->repo->getHashPath( $fileKey ); |
439 | $storeStatus = $this->repo->quickImport( $chunkPath, |
440 | $this->repo->getZonePath( 'temp' ) . "/{$hashPath}{$fileKey}" ); |
441 | |
442 | // Check for error in stashing the chunk: |
443 | if ( !$storeStatus->isOK() ) { |
444 | $error = $this->logFileBackendStatus( |
445 | $storeStatus, |
446 | '[{type}] Error storing chunk in "{chunkPath}" for {fileKey} ({details})', |
447 | [ 'chunkPath' => $chunkPath, 'fileKey' => $fileKey ] |
448 | ); |
449 | throw new UploadChunkFileException( "Error storing file in '{chunkPath}': " . |
450 | implode( '; ', $error ), [ 'chunkPath' => $chunkPath ] ); |
451 | } |
452 | |
453 | return $storeStatus; |
454 | } |
455 | |
456 | private function getChunkFileKey( ?int $index = null ): string { |
457 | return $this->mFileKey . '.' . ( $index ?? $this->getChunkIndex() ); |
458 | } |
459 | |
460 | /** |
461 | * Verify that the chunk isn't really an evil html file |
462 | * |
463 | * @throws UploadChunkVerificationException |
464 | */ |
465 | private function verifyChunk() { |
466 | // Rest mDesiredDestName here so we verify the name as if it were mFileKey |
467 | $oldDesiredDestName = $this->mDesiredDestName; |
468 | $this->mDesiredDestName = $this->mFileKey; |
469 | $this->mTitle = false; |
470 | $res = $this->verifyPartialFile(); |
471 | $this->mDesiredDestName = $oldDesiredDestName; |
472 | $this->mTitle = false; |
473 | if ( is_array( $res ) ) { |
474 | throw new UploadChunkVerificationException( $res ); |
475 | } |
476 | } |
477 | |
478 | /** |
479 | * Log a status object from FileBackend functions (via FileRepo functions) to the upload log channel. |
480 | * Return a array with the first error to build up a exception message |
481 | * |
482 | * @param Status $status |
483 | * @param string $logMessage |
484 | * @param array $context |
485 | * @return array |
486 | */ |
487 | private function logFileBackendStatus( Status $status, string $logMessage, array $context = [] ): array { |
488 | $logger = $this->logger; |
489 | $errorToThrow = null; |
490 | $warningToThrow = null; |
491 | |
492 | foreach ( $status->getErrors() as $errorItem ) { |
493 | // The message key stands for distinct error situation from the file backend, |
494 | // each error situation should be shown up in aggregated stats as own point, replace in message |
495 | $logMessageType = str_replace( '{type}', $errorItem['message'], $logMessage ); |
496 | |
497 | // The message arguments often contains the name of the failing datacenter or file names |
498 | // and should not show up in aggregated stats, add to context |
499 | $context['details'] = implode( '; ', $errorItem['params'] ); |
500 | $context['user'] = $this->user->getName(); |
501 | |
502 | if ( $errorItem['type'] === 'error' ) { |
503 | // Use the first error of the list for the exception text |
504 | $errorToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
505 | $logger->error( $logMessageType, $context ); |
506 | } else { |
507 | // When no error is found, fall back to the first warning |
508 | $warningToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
509 | $logger->warning( $logMessageType, $context ); |
510 | } |
511 | } |
512 | return $errorToThrow ?? $warningToThrow ?? [ 'unknown', 'no error recorded' ]; |
513 | } |
514 | } |