Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 224 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
UploadFromChunks | |
0.00% |
0 / 224 |
|
0.00% |
0 / 15 |
1190 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
tryStashFile | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
doStashFile | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
continueChunks | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
concatenateChunks | |
0.00% |
0 / 98 |
|
0.00% |
0 / 1 |
72 | |||
getVirtualChunkLocation | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
addChunk | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 | |||
updateChunkStatus | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
getChunkStatus | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getChunkIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOffset | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
outputChunk | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
getChunkFileKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyChunk | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
logFileBackendStatus | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | use MediaWiki\Deferred\AutoCommitUpdate; |
4 | use MediaWiki\Deferred\DeferredUpdates; |
5 | use MediaWiki\Logger\LoggerFactory; |
6 | use MediaWiki\MediaWikiServices; |
7 | use MediaWiki\Request\WebRequestUpload; |
8 | use MediaWiki\Status\Status; |
9 | use MediaWiki\User\User; |
10 | use Psr\Log\LoggerInterface; |
11 | use Wikimedia\FileBackend\FileBackend; |
12 | |
13 | /** |
14 | * Backend for uploading files from chunks. |
15 | * |
16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by |
18 | * the Free Software Foundation; either version 2 of the License, or |
19 | * (at your option) any later version. |
20 | * |
21 | * This program is distributed in the hope that it will be useful, |
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 | * GNU General Public License for more details. |
25 | * |
26 | * You should have received a copy of the GNU General Public License along |
27 | * with this program; if not, write to the Free Software Foundation, Inc., |
28 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
29 | * http://www.gnu.org/copyleft/gpl.html |
30 | * |
31 | * @file |
32 | * @ingroup Upload |
33 | */ |
34 | |
35 | /** |
36 | * Implements uploading from chunks |
37 | * |
38 | * @ingroup Upload |
39 | * @author Michael Dale |
40 | */ |
41 | class UploadFromChunks extends UploadFromFile { |
42 | /** @var LocalRepo */ |
43 | private $repo; |
44 | /** @var UploadStash */ |
45 | public $stash; |
46 | /** @var User */ |
47 | public $user; |
48 | |
49 | /** @var int|null */ |
50 | protected $mOffset; |
51 | /** @var int|null */ |
52 | protected $mChunkIndex; |
53 | /** @var string */ |
54 | protected $mFileKey; |
55 | /** @var string|null */ |
56 | protected $mVirtualTempPath; |
57 | |
58 | private LoggerInterface $logger; |
59 | |
60 | /** @noinspection PhpMissingParentConstructorInspection */ |
61 | |
62 | /** |
63 | * Setup local pointers to stash, repo and user (similar to UploadFromStash) |
64 | * |
65 | * @param User $user |
66 | * @param UploadStash|false $stash Default: false |
67 | * @param FileRepo|false $repo Default: false |
68 | */ |
69 | public function __construct( User $user, $stash = false, $repo = false ) { |
70 | $this->user = $user; |
71 | |
72 | if ( $repo ) { |
73 | $this->repo = $repo; |
74 | } else { |
75 | $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo(); |
76 | } |
77 | |
78 | if ( $stash ) { |
79 | $this->stash = $stash; |
80 | } else { |
81 | wfDebug( __METHOD__ . " creating new UploadFromChunks instance for " . $user->getId() ); |
82 | $this->stash = new UploadStash( $this->repo, $this->user ); |
83 | } |
84 | |
85 | $this->logger = LoggerFactory::getInstance( 'upload' ); |
86 | } |
87 | |
88 | /** |
89 | * @inheritDoc |
90 | */ |
91 | public function tryStashFile( User $user, $isPartial = false ) { |
92 | try { |
93 | $this->verifyChunk(); |
94 | } catch ( UploadChunkVerificationException $e ) { |
95 | return Status::newFatal( $e->msg ); |
96 | } |
97 | |
98 | return parent::tryStashFile( $user, $isPartial ); |
99 | } |
100 | |
101 | /** |
102 | * Calls the parent doStashFile and updates the uploadsession table to handle "chunks" |
103 | * |
104 | * @param User|null $user |
105 | * @return UploadStashFile Stashed file |
106 | */ |
107 | protected function doStashFile( ?User $user = null ) { |
108 | // Stash file is the called on creating a new chunk session: |
109 | $this->mChunkIndex = 0; |
110 | $this->mOffset = 0; |
111 | |
112 | // Create a local stash target |
113 | $this->mStashFile = parent::doStashFile( $user ); |
114 | // Update the initial file offset (based on file size) |
115 | $this->mOffset = $this->mStashFile->getSize(); |
116 | $this->mFileKey = $this->mStashFile->getFileKey(); |
117 | |
118 | // Output a copy of this first to chunk 0 location: |
119 | $this->outputChunk( $this->mStashFile->getPath() ); |
120 | |
121 | // Update db table to reflect initial "chunk" state |
122 | $this->updateChunkStatus(); |
123 | |
124 | return $this->mStashFile; |
125 | } |
126 | |
127 | /** |
128 | * Continue chunk uploading |
129 | * |
130 | * @param string $name |
131 | * @param string $key |
132 | * @param WebRequestUpload $webRequestUpload |
133 | */ |
134 | public function continueChunks( $name, $key, $webRequestUpload ) { |
135 | $this->mFileKey = $key; |
136 | $this->mUpload = $webRequestUpload; |
137 | // Get the chunk status form the db: |
138 | $this->getChunkStatus(); |
139 | |
140 | $metadata = $this->stash->getMetadata( $key ); |
141 | $this->initializePathInfo( $name, |
142 | $this->getRealPath( $metadata['us_path'] ), |
143 | $metadata['us_size'], |
144 | false |
145 | ); |
146 | } |
147 | |
148 | /** |
149 | * Append the final chunk and ready file for parent::performUpload() |
150 | * @return Status |
151 | */ |
152 | public function concatenateChunks() { |
153 | $oldFileKey = $this->mFileKey; |
154 | $chunkIndex = $this->getChunkIndex(); |
155 | $this->logger->debug( |
156 | __METHOD__ . ' concatenate {totalChunks} chunks: {offset} inx: {curIndex}', |
157 | [ |
158 | 'offset' => $this->getOffset(), |
159 | 'totalChunks' => $this->mChunkIndex, |
160 | 'curIndex' => $chunkIndex, |
161 | 'filekey' => $oldFileKey |
162 | ] |
163 | ); |
164 | |
165 | // Concatenate all the chunks to mVirtualTempPath |
166 | $fileList = []; |
167 | // The first chunk is stored at the mVirtualTempPath path so we start on "chunk 1" |
168 | for ( $i = 0; $i <= $chunkIndex; $i++ ) { |
169 | $fileList[] = $this->getVirtualChunkLocation( $i ); |
170 | } |
171 | |
172 | // Get the file extension from the last chunk |
173 | $ext = FileBackend::extensionFromPath( $this->mVirtualTempPath ); |
174 | // Get a 0-byte temp file to perform the concatenation at |
175 | $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() |
176 | ->newTempFSFile( 'chunkedupload_', $ext ); |
177 | $tmpPath = false; // fail in concatenate() |
178 | if ( $tmpFile ) { |
179 | // keep alive with $this |
180 | $tmpPath = $tmpFile->bind( $this )->getPath(); |
181 | } else { |
182 | $this->logger->warning( "Error getting tmp file", [ 'filekey' => $oldFileKey ] ); |
183 | } |
184 | |
185 | // Concatenate the chunks at the temp file |
186 | $tStart = microtime( true ); |
187 | $status = $this->repo->concatenate( $fileList, $tmpPath ); |
188 | $tAmount = microtime( true ) - $tStart; |
189 | if ( !$status->isOK() ) { |
190 | // This is a backend error and not user-related, so log is safe |
191 | // Upload verification further on is not safe to log server side |
192 | $this->logFileBackendStatus( |
193 | $status, |
194 | '[{type}] Error on concatenate {chunks} stashed files ({details})', |
195 | [ 'chunks' => $chunkIndex, 'filekey' => $oldFileKey ] |
196 | ); |
197 | return $status; |
198 | } else { |
199 | // Delete old chunks in deferred job. Put in deferred job because deleting |
200 | // lots of chunks can take a long time, sometimes to the point of causing |
201 | // a timeout, and we do not want that to tank the operation. Note that chunks |
202 | // are also automatically deleted after a set time by cleanupUploadStash.php |
203 | // Additionally, using AutoCommitUpdate ensures that we do not delete files |
204 | // if the main transaction is rolled back for some reason. |
205 | DeferredUpdates::addUpdate( new AutoCommitUpdate( |
206 | $this->repo->getPrimaryDB(), |
207 | __METHOD__, |
208 | function () use( $fileList, $oldFileKey ) { |
209 | $status = $this->repo->quickPurgeBatch( $fileList ); |
210 | if ( !$status->isOK() ) { |
211 | $this->logger->warning( |
212 | "Could not delete chunks of {filekey} - {status}", |
213 | [ |
214 | 'status' => (string)$status, |
215 | 'filekey' => $oldFileKey, |
216 | ] |
217 | ); |
218 | } |
219 | } |
220 | ) ); |
221 | } |
222 | |
223 | wfDebugLog( 'fileconcatenate', "Combined $i chunks in $tAmount seconds." ); |
224 | |
225 | // File system path of the actual full temp file |
226 | $this->setTempFile( $tmpPath ); |
227 | |
228 | $ret = $this->verifyUpload(); |
229 | if ( $ret['status'] !== UploadBase::OK ) { |
230 | $this->logger->info( |
231 | "Verification failed for chunked upload {filekey}", |
232 | [ |
233 | 'user' => $this->user->getName(), |
234 | 'filekey' => $oldFileKey |
235 | ] |
236 | ); |
237 | $status->fatal( $this->getVerificationErrorCode( $ret['status'] ) ); |
238 | |
239 | return $status; |
240 | } |
241 | |
242 | // Update the mTempPath and mStashFile |
243 | // (for FileUpload or normal Stash to take over) |
244 | $tStart = microtime( true ); |
245 | // This is a re-implementation of UploadBase::tryStashFile(), we can't call it because we |
246 | // override doStashFile() with completely different functionality in this class... |
247 | $error = $this->runUploadStashFileHook( $this->user ); |
248 | if ( $error ) { |
249 | $status->fatal( ...$error ); |
250 | $this->logger->info( "Aborting stash upload due to hook - {status}", |
251 | [ |
252 | 'status' => (string)$status, |
253 | 'user' => $this->user->getName(), |
254 | 'filekey' => $this->mFileKey |
255 | ] |
256 | ); |
257 | return $status; |
258 | } |
259 | try { |
260 | $this->mStashFile = parent::doStashFile( $this->user ); |
261 | } catch ( UploadStashException $e ) { |
262 | $this->logger->warning( "Could not stash file for {user} because {error} {msg}", |
263 | [ |
264 | 'user' => $this->user->getName(), |
265 | 'error' => get_class( $e ), |
266 | 'msg' => $e->getMessage(), |
267 | 'filekey' => $this->mFileKey |
268 | ] |
269 | ); |
270 | $status->fatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() ); |
271 | return $status; |
272 | } |
273 | |
274 | $tAmount = microtime( true ) - $tStart; |
275 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable tmpFile is set when tmpPath is set here |
276 | $this->mStashFile->setLocalReference( $tmpFile ); // reuse (e.g. for getImageInfo()) |
277 | $this->logger->info( "Stashed combined ({chunks} chunks) of {oldkey} under new name {filekey}", |
278 | [ |
279 | 'chunks' => $i, |
280 | 'stashTime' => $tAmount, |
281 | 'oldpath' => $this->mVirtualTempPath, |
282 | 'filekey' => $this->mStashFile->getFileKey(), |
283 | 'oldkey' => $oldFileKey, |
284 | 'newpath' => $this->mStashFile->getPath(), |
285 | 'user' => $this->user->getName() |
286 | ] |
287 | ); |
288 | wfDebugLog( 'fileconcatenate', "Stashed combined file ($i chunks) in $tAmount seconds." ); |
289 | |
290 | return $status; |
291 | } |
292 | |
293 | /** |
294 | * Returns the virtual chunk location: |
295 | * @param int $index |
296 | * @return string |
297 | */ |
298 | private function getVirtualChunkLocation( $index ) { |
299 | return $this->repo->getVirtualUrl( 'temp' ) . |
300 | '/' . |
301 | $this->repo->getHashPath( |
302 | $this->getChunkFileKey( $index ) |
303 | ) . |
304 | $this->getChunkFileKey( $index ); |
305 | } |
306 | |
307 | /** |
308 | * Add a chunk to the temporary directory |
309 | * |
310 | * @param string $chunkPath Path to temporary chunk file |
311 | * @param int $chunkSize Size of the current chunk |
312 | * @param int $offset Offset of current chunk ( mutch match database chunk offset ) |
313 | * @return Status |
314 | */ |
315 | public function addChunk( $chunkPath, $chunkSize, $offset ) { |
316 | // Get the offset before we add the chunk to the file system |
317 | $preAppendOffset = $this->getOffset(); |
318 | |
319 | if ( $preAppendOffset + $chunkSize > $this->getMaxUploadSize() ) { |
320 | $status = Status::newFatal( 'file-too-large' ); |
321 | } else { |
322 | // Make sure the client is uploading the correct chunk with a matching offset. |
323 | if ( $preAppendOffset == $offset ) { |
324 | // Update local chunk index for the current chunk |
325 | $this->mChunkIndex++; |
326 | try { |
327 | # For some reason mTempPath is set to first part |
328 | $oldTemp = $this->mTempPath; |
329 | $this->mTempPath = $chunkPath; |
330 | $this->verifyChunk(); |
331 | $this->mTempPath = $oldTemp; |
332 | } catch ( UploadChunkVerificationException $e ) { |
333 | $this->logger->info( "Error verifying upload chunk {msg}", |
334 | [ |
335 | 'user' => $this->user->getName(), |
336 | 'msg' => $e->getMessage(), |
337 | 'chunkIndex' => $this->mChunkIndex, |
338 | 'filekey' => $this->mFileKey |
339 | ] |
340 | ); |
341 | |
342 | return Status::newFatal( $e->msg ); |
343 | } |
344 | $status = $this->outputChunk( $chunkPath ); |
345 | if ( $status->isGood() ) { |
346 | // Update local offset: |
347 | $this->mOffset = $preAppendOffset + $chunkSize; |
348 | // Update chunk table status db |
349 | $this->updateChunkStatus(); |
350 | } |
351 | } else { |
352 | $status = Status::newFatal( 'invalid-chunk-offset' ); |
353 | } |
354 | } |
355 | |
356 | return $status; |
357 | } |
358 | |
359 | /** |
360 | * Update the chunk db table with the current status: |
361 | */ |
362 | private function updateChunkStatus() { |
363 | $this->logger->info( "update chunk status for {filekey} offset: {offset} inx: {inx}", |
364 | [ |
365 | 'offset' => $this->getOffset(), |
366 | 'inx' => $this->getChunkIndex(), |
367 | 'filekey' => $this->mFileKey, |
368 | 'user' => $this->user->getName() |
369 | ] |
370 | ); |
371 | |
372 | $dbw = $this->repo->getPrimaryDB(); |
373 | $dbw->newUpdateQueryBuilder() |
374 | ->update( 'uploadstash' ) |
375 | ->set( [ |
376 | 'us_status' => 'chunks', |
377 | 'us_chunk_inx' => $this->getChunkIndex(), |
378 | 'us_size' => $this->getOffset() |
379 | ] ) |
380 | ->where( [ 'us_key' => $this->mFileKey ] ) |
381 | ->caller( __METHOD__ )->execute(); |
382 | } |
383 | |
384 | /** |
385 | * Get the chunk db state and populate update relevant local values |
386 | */ |
387 | private function getChunkStatus() { |
388 | // get primary db to avoid race conditions. |
389 | // Otherwise, if chunk upload time < replag there will be spurious errors |
390 | $dbw = $this->repo->getPrimaryDB(); |
391 | $row = $dbw->newSelectQueryBuilder() |
392 | ->select( [ 'us_chunk_inx', 'us_size', 'us_path' ] ) |
393 | ->from( 'uploadstash' ) |
394 | ->where( [ 'us_key' => $this->mFileKey ] ) |
395 | ->caller( __METHOD__ )->fetchRow(); |
396 | // Handle result: |
397 | if ( $row ) { |
398 | $this->mChunkIndex = $row->us_chunk_inx; |
399 | $this->mOffset = $row->us_size; |
400 | $this->mVirtualTempPath = $row->us_path; |
401 | } |
402 | } |
403 | |
404 | /** |
405 | * Get the current Chunk index |
406 | * @return int Index of the current chunk |
407 | */ |
408 | private function getChunkIndex() { |
409 | return $this->mChunkIndex ?? 0; |
410 | } |
411 | |
412 | /** |
413 | * Get the offset at which the next uploaded chunk will be appended to |
414 | * @return int Current byte offset of the chunk file set |
415 | */ |
416 | public function getOffset() { |
417 | return $this->mOffset ?? 0; |
418 | } |
419 | |
420 | /** |
421 | * Output the chunk to disk |
422 | * |
423 | * @param string $chunkPath |
424 | * @throws UploadChunkFileException |
425 | * @return Status |
426 | */ |
427 | private function outputChunk( $chunkPath ) { |
428 | // Key is fileKey + chunk index |
429 | $fileKey = $this->getChunkFileKey(); |
430 | |
431 | // Store the chunk per its indexed fileKey: |
432 | $hashPath = $this->repo->getHashPath( $fileKey ); |
433 | $storeStatus = $this->repo->quickImport( $chunkPath, |
434 | $this->repo->getZonePath( 'temp' ) . "/{$hashPath}{$fileKey}" ); |
435 | |
436 | // Check for error in stashing the chunk: |
437 | if ( !$storeStatus->isOK() ) { |
438 | $error = $this->logFileBackendStatus( |
439 | $storeStatus, |
440 | '[{type}] Error storing chunk in "{chunkPath}" for {fileKey} ({details})', |
441 | [ 'chunkPath' => $chunkPath, 'fileKey' => $fileKey ] |
442 | ); |
443 | throw new UploadChunkFileException( "Error storing file in '{chunkPath}': " . |
444 | implode( '; ', $error ), [ 'chunkPath' => $chunkPath ] ); |
445 | } |
446 | |
447 | return $storeStatus; |
448 | } |
449 | |
450 | private function getChunkFileKey( $index = null ) { |
451 | return $this->mFileKey . '.' . ( $index ?? $this->getChunkIndex() ); |
452 | } |
453 | |
454 | /** |
455 | * Verify that the chunk isn't really an evil html file |
456 | * |
457 | * @throws UploadChunkVerificationException |
458 | */ |
459 | private function verifyChunk() { |
460 | // Rest mDesiredDestName here so we verify the name as if it were mFileKey |
461 | $oldDesiredDestName = $this->mDesiredDestName; |
462 | $this->mDesiredDestName = $this->mFileKey; |
463 | $this->mTitle = false; |
464 | $res = $this->verifyPartialFile(); |
465 | $this->mDesiredDestName = $oldDesiredDestName; |
466 | $this->mTitle = false; |
467 | if ( is_array( $res ) ) { |
468 | throw new UploadChunkVerificationException( $res ); |
469 | } |
470 | } |
471 | |
472 | /** |
473 | * Log a status object from FileBackend functions (via FileRepo functions) to the upload log channel. |
474 | * Return a array with the first error to build up a exception message |
475 | * |
476 | * @param Status $status |
477 | * @param string $logMessage |
478 | * @param array $context |
479 | * @return array |
480 | */ |
481 | private function logFileBackendStatus( Status $status, string $logMessage, array $context = [] ): array { |
482 | $logger = $this->logger; |
483 | $errorToThrow = null; |
484 | $warningToThrow = null; |
485 | |
486 | foreach ( $status->getErrors() as $errorItem ) { |
487 | // The message key stands for distinct error situation from the file backend, |
488 | // each error situation should be shown up in aggregated stats as own point, replace in message |
489 | $logMessageType = str_replace( '{type}', $errorItem['message'], $logMessage ); |
490 | |
491 | // The message arguments often contains the name of the failing datacenter or file names |
492 | // and should not show up in aggregated stats, add to context |
493 | $context['details'] = implode( '; ', $errorItem['params'] ); |
494 | $context['user'] = $this->user->getName(); |
495 | |
496 | if ( $errorItem['type'] === 'error' ) { |
497 | // Use the first error of the list for the exception text |
498 | $errorToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
499 | $logger->error( $logMessageType, $context ); |
500 | } else { |
501 | // When no error is found, fall back to the first warning |
502 | $warningToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
503 | $logger->warning( $logMessageType, $context ); |
504 | } |
505 | } |
506 | return $errorToThrow ?? $warningToThrow ?? [ 'unknown', 'no error recorded' ]; |
507 | } |
508 | } |