Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 228 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
UploadFromChunks | |
0.00% |
0 / 228 |
|
0.00% |
0 / 15 |
1332 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
tryStashFile | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
doStashFile | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
continueChunks | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
concatenateChunks | |
0.00% |
0 / 98 |
|
0.00% |
0 / 1 |
72 | |||
getVirtualChunkLocation | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
addChunk | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 | |||
updateChunkStatus | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
getChunkStatus | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getChunkIndex | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getOffset | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
outputChunk | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
getChunkFileKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyChunk | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
logFileBackendStatus | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | use MediaWiki\Logger\LoggerFactory; |
4 | use MediaWiki\MediaWikiServices; |
5 | use MediaWiki\Request\WebRequestUpload; |
6 | use MediaWiki\Status\Status; |
7 | use MediaWiki\User\User; |
8 | use Psr\Log\LoggerInterface; |
9 | |
10 | /** |
11 | * Backend for uploading files from chunks. |
12 | * |
13 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by |
15 | * the Free Software Foundation; either version 2 of the License, or |
16 | * (at your option) any later version. |
17 | * |
18 | * This program is distributed in the hope that it will be useful, |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | * GNU General Public License for more details. |
22 | * |
23 | * You should have received a copy of the GNU General Public License along |
24 | * with this program; if not, write to the Free Software Foundation, Inc., |
25 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
26 | * http://www.gnu.org/copyleft/gpl.html |
27 | * |
28 | * @file |
29 | * @ingroup Upload |
30 | */ |
31 | |
32 | /** |
33 | * Implements uploading from chunks |
34 | * |
35 | * @ingroup Upload |
36 | * @author Michael Dale |
37 | */ |
38 | class UploadFromChunks extends UploadFromFile { |
39 | /** @var LocalRepo */ |
40 | private $repo; |
41 | /** @var UploadStash */ |
42 | public $stash; |
43 | /** @var User */ |
44 | public $user; |
45 | |
46 | protected $mOffset; |
47 | protected $mChunkIndex; |
48 | protected $mFileKey; |
49 | protected $mVirtualTempPath; |
50 | |
51 | private LoggerInterface $logger; |
52 | |
53 | /** @noinspection PhpMissingParentConstructorInspection */ |
54 | |
55 | /** |
56 | * Setup local pointers to stash, repo and user (similar to UploadFromStash) |
57 | * |
58 | * @param User $user |
59 | * @param UploadStash|false $stash Default: false |
60 | * @param FileRepo|false $repo Default: false |
61 | */ |
62 | public function __construct( User $user, $stash = false, $repo = false ) { |
63 | $this->user = $user; |
64 | |
65 | if ( $repo ) { |
66 | $this->repo = $repo; |
67 | } else { |
68 | $this->repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo(); |
69 | } |
70 | |
71 | if ( $stash ) { |
72 | $this->stash = $stash; |
73 | } else { |
74 | wfDebug( __METHOD__ . " creating new UploadFromChunks instance for " . $user->getId() ); |
75 | $this->stash = new UploadStash( $this->repo, $this->user ); |
76 | } |
77 | |
78 | $this->logger = LoggerFactory::getInstance( 'upload' ); |
79 | } |
80 | |
81 | /** |
82 | * @inheritDoc |
83 | */ |
84 | public function tryStashFile( User $user, $isPartial = false ) { |
85 | try { |
86 | $this->verifyChunk(); |
87 | } catch ( UploadChunkVerificationException $e ) { |
88 | return Status::newFatal( $e->msg ); |
89 | } |
90 | |
91 | return parent::tryStashFile( $user, $isPartial ); |
92 | } |
93 | |
94 | /** |
95 | * Calls the parent doStashFile and updates the uploadsession table to handle "chunks" |
96 | * |
97 | * @param User|null $user |
98 | * @return UploadStashFile Stashed file |
99 | */ |
100 | protected function doStashFile( User $user = null ) { |
101 | // Stash file is the called on creating a new chunk session: |
102 | $this->mChunkIndex = 0; |
103 | $this->mOffset = 0; |
104 | |
105 | // Create a local stash target |
106 | $this->mStashFile = parent::doStashFile( $user ); |
107 | // Update the initial file offset (based on file size) |
108 | $this->mOffset = $this->mStashFile->getSize(); |
109 | $this->mFileKey = $this->mStashFile->getFileKey(); |
110 | |
111 | // Output a copy of this first to chunk 0 location: |
112 | $this->outputChunk( $this->mStashFile->getPath() ); |
113 | |
114 | // Update db table to reflect initial "chunk" state |
115 | $this->updateChunkStatus(); |
116 | |
117 | return $this->mStashFile; |
118 | } |
119 | |
120 | /** |
121 | * Continue chunk uploading |
122 | * |
123 | * @param string $name |
124 | * @param string $key |
125 | * @param WebRequestUpload $webRequestUpload |
126 | */ |
127 | public function continueChunks( $name, $key, $webRequestUpload ) { |
128 | $this->mFileKey = $key; |
129 | $this->mUpload = $webRequestUpload; |
130 | // Get the chunk status form the db: |
131 | $this->getChunkStatus(); |
132 | |
133 | $metadata = $this->stash->getMetadata( $key ); |
134 | $this->initializePathInfo( $name, |
135 | $this->getRealPath( $metadata['us_path'] ), |
136 | $metadata['us_size'], |
137 | false |
138 | ); |
139 | } |
140 | |
141 | /** |
142 | * Append the final chunk and ready file for parent::performUpload() |
143 | * @return Status |
144 | */ |
145 | public function concatenateChunks() { |
146 | $oldFileKey = $this->mFileKey; |
147 | $chunkIndex = $this->getChunkIndex(); |
148 | $this->logger->debug( |
149 | __METHOD__ . ' concatenate {totalChunks} chunks: {offset} inx: {curIndex}', |
150 | [ |
151 | 'offset' => $this->getOffset(), |
152 | 'totalChunks' => $this->mChunkIndex, |
153 | 'curIndex' => $chunkIndex, |
154 | 'filekey' => $oldFileKey |
155 | ] |
156 | ); |
157 | |
158 | // Concatenate all the chunks to mVirtualTempPath |
159 | $fileList = []; |
160 | // The first chunk is stored at the mVirtualTempPath path so we start on "chunk 1" |
161 | for ( $i = 0; $i <= $chunkIndex; $i++ ) { |
162 | $fileList[] = $this->getVirtualChunkLocation( $i ); |
163 | } |
164 | |
165 | // Get the file extension from the last chunk |
166 | $ext = FileBackend::extensionFromPath( $this->mVirtualTempPath ); |
167 | // Get a 0-byte temp file to perform the concatenation at |
168 | $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() |
169 | ->newTempFSFile( 'chunkedupload_', $ext ); |
170 | $tmpPath = false; // fail in concatenate() |
171 | if ( $tmpFile ) { |
172 | // keep alive with $this |
173 | $tmpPath = $tmpFile->bind( $this )->getPath(); |
174 | } else { |
175 | $this->logger->warning( "Error getting tmp file", [ 'filekey' => $oldFileKey ] ); |
176 | } |
177 | |
178 | // Concatenate the chunks at the temp file |
179 | $tStart = microtime( true ); |
180 | $status = $this->repo->concatenate( $fileList, $tmpPath ); |
181 | $tAmount = microtime( true ) - $tStart; |
182 | if ( !$status->isOK() ) { |
183 | // This is a backend error and not user-related, so log is safe |
184 | // Upload verification further on is not safe to log server side |
185 | $this->logFileBackendStatus( |
186 | $status, |
187 | '[{type}] Error on concatenate {chunks} stashed files ({details})', |
188 | [ 'chunks' => $chunkIndex, 'filekey' => $oldFileKey ] |
189 | ); |
190 | return $status; |
191 | } else { |
192 | // Delete old chunks in deferred job. Put in deferred job because deleting |
193 | // lots of chunks can take a long time, sometimes to the point of causing |
194 | // a timeout, and we do not want that to tank the operation. Note that chunks |
195 | // are also automatically deleted after a set time by cleanupUploadStash.php |
196 | // Additionally, using AutoCommitUpdate ensures that we do not delete files |
197 | // if the main transaction is rolled back for some reason. |
198 | DeferredUpdates::addUpdate( new AutoCommitUpdate( |
199 | $this->repo->getPrimaryDB(), |
200 | __METHOD__, |
201 | function () use( $fileList, $oldFileKey ) { |
202 | $status = $this->repo->quickPurgeBatch( $fileList ); |
203 | if ( !$status->isOK() ) { |
204 | $this->logger->warning( |
205 | "Could not delete chunks of {filekey} - {status}", |
206 | [ |
207 | 'status' => (string)$status, |
208 | 'filekey' => $oldFileKey, |
209 | ] |
210 | ); |
211 | } |
212 | } |
213 | ) ); |
214 | } |
215 | |
216 | wfDebugLog( 'fileconcatenate', "Combined $i chunks in $tAmount seconds." ); |
217 | |
218 | // File system path of the actual full temp file |
219 | $this->setTempFile( $tmpPath ); |
220 | |
221 | $ret = $this->verifyUpload(); |
222 | if ( $ret['status'] !== UploadBase::OK ) { |
223 | $this->logger->info( |
224 | "Verification failed for chunked upload {filekey}", |
225 | [ |
226 | 'user' => $this->user->getName(), |
227 | 'filekey' => $oldFileKey |
228 | ] |
229 | ); |
230 | $status->fatal( $this->getVerificationErrorCode( $ret['status'] ) ); |
231 | |
232 | return $status; |
233 | } |
234 | |
235 | // Update the mTempPath and mStashFile |
236 | // (for FileUpload or normal Stash to take over) |
237 | $tStart = microtime( true ); |
238 | // This is a re-implementation of UploadBase::tryStashFile(), we can't call it because we |
239 | // override doStashFile() with completely different functionality in this class... |
240 | $error = $this->runUploadStashFileHook( $this->user ); |
241 | if ( $error ) { |
242 | $status->fatal( ...$error ); |
243 | $this->logger->info( "Aborting stash upload due to hook - {status}", |
244 | [ |
245 | 'status' => (string)$status, |
246 | 'user' => $this->user->getName(), |
247 | 'filekey' => $this->mFileKey |
248 | ] |
249 | ); |
250 | return $status; |
251 | } |
252 | try { |
253 | $this->mStashFile = parent::doStashFile( $this->user ); |
254 | } catch ( UploadStashException $e ) { |
255 | $this->logger->warning( "Could not stash file for {user} because {error} {msg}", |
256 | [ |
257 | 'user' => $this->user->getName(), |
258 | 'error' => get_class( $e ), |
259 | 'msg' => $e->getMessage(), |
260 | 'filekey' => $this->mFileKey |
261 | ] |
262 | ); |
263 | $status->fatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() ); |
264 | return $status; |
265 | } |
266 | |
267 | $tAmount = microtime( true ) - $tStart; |
268 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable tmpFile is set when tmpPath is set here |
269 | $this->mStashFile->setLocalReference( $tmpFile ); // reuse (e.g. for getImageInfo()) |
270 | $this->logger->info( "Stashed combined ({chunks} chunks) of {oldkey} under new name {filekey}", |
271 | [ |
272 | 'chunks' => $i, |
273 | 'stashTime' => $tAmount, |
274 | 'oldpath' => $this->mVirtualTempPath, |
275 | 'filekey' => $this->mStashFile->getFileKey(), |
276 | 'oldkey' => $oldFileKey, |
277 | 'newpath' => $this->mStashFile->getPath(), |
278 | 'user' => $this->user->getName() |
279 | ] |
280 | ); |
281 | wfDebugLog( 'fileconcatenate', "Stashed combined file ($i chunks) in $tAmount seconds." ); |
282 | |
283 | return $status; |
284 | } |
285 | |
286 | /** |
287 | * Returns the virtual chunk location: |
288 | * @param int $index |
289 | * @return string |
290 | */ |
291 | private function getVirtualChunkLocation( $index ) { |
292 | return $this->repo->getVirtualUrl( 'temp' ) . |
293 | '/' . |
294 | $this->repo->getHashPath( |
295 | $this->getChunkFileKey( $index ) |
296 | ) . |
297 | $this->getChunkFileKey( $index ); |
298 | } |
299 | |
300 | /** |
301 | * Add a chunk to the temporary directory |
302 | * |
303 | * @param string $chunkPath Path to temporary chunk file |
304 | * @param int $chunkSize Size of the current chunk |
305 | * @param int $offset Offset of current chunk ( mutch match database chunk offset ) |
306 | * @return Status |
307 | */ |
308 | public function addChunk( $chunkPath, $chunkSize, $offset ) { |
309 | // Get the offset before we add the chunk to the file system |
310 | $preAppendOffset = $this->getOffset(); |
311 | |
312 | if ( $preAppendOffset + $chunkSize > $this->getMaxUploadSize() ) { |
313 | $status = Status::newFatal( 'file-too-large' ); |
314 | } else { |
315 | // Make sure the client is uploading the correct chunk with a matching offset. |
316 | if ( $preAppendOffset == $offset ) { |
317 | // Update local chunk index for the current chunk |
318 | $this->mChunkIndex++; |
319 | try { |
320 | # For some reason mTempPath is set to first part |
321 | $oldTemp = $this->mTempPath; |
322 | $this->mTempPath = $chunkPath; |
323 | $this->verifyChunk(); |
324 | $this->mTempPath = $oldTemp; |
325 | } catch ( UploadChunkVerificationException $e ) { |
326 | $this->logger->info( "Error verifying upload chunk {msg}", |
327 | [ |
328 | 'user' => $this->user->getName(), |
329 | 'msg' => $e->getMessage(), |
330 | 'chunkIndex' => $this->mChunkIndex, |
331 | 'filekey' => $this->mFileKey |
332 | ] |
333 | ); |
334 | |
335 | return Status::newFatal( $e->msg ); |
336 | } |
337 | $status = $this->outputChunk( $chunkPath ); |
338 | if ( $status->isGood() ) { |
339 | // Update local offset: |
340 | $this->mOffset = $preAppendOffset + $chunkSize; |
341 | // Update chunk table status db |
342 | $this->updateChunkStatus(); |
343 | } |
344 | } else { |
345 | $status = Status::newFatal( 'invalid-chunk-offset' ); |
346 | } |
347 | } |
348 | |
349 | return $status; |
350 | } |
351 | |
352 | /** |
353 | * Update the chunk db table with the current status: |
354 | */ |
355 | private function updateChunkStatus() { |
356 | $this->logger->info( "update chunk status for {filekey} offset: {offset} inx: {inx}", |
357 | [ |
358 | 'offset' => $this->getOffset(), |
359 | 'inx' => $this->getChunkIndex(), |
360 | 'filekey' => $this->mFileKey, |
361 | 'user' => $this->user->getName() |
362 | ] |
363 | ); |
364 | |
365 | $dbw = $this->repo->getPrimaryDB(); |
366 | $dbw->newUpdateQueryBuilder() |
367 | ->update( 'uploadstash' ) |
368 | ->set( [ |
369 | 'us_status' => 'chunks', |
370 | 'us_chunk_inx' => $this->getChunkIndex(), |
371 | 'us_size' => $this->getOffset() |
372 | ] ) |
373 | ->where( [ 'us_key' => $this->mFileKey ] ) |
374 | ->caller( __METHOD__ )->execute(); |
375 | } |
376 | |
377 | /** |
378 | * Get the chunk db state and populate update relevant local values |
379 | */ |
380 | private function getChunkStatus() { |
381 | // get primary db to avoid race conditions. |
382 | // Otherwise, if chunk upload time < replag there will be spurious errors |
383 | $dbw = $this->repo->getPrimaryDB(); |
384 | $row = $dbw->newSelectQueryBuilder() |
385 | ->select( [ 'us_chunk_inx', 'us_size', 'us_path' ] ) |
386 | ->from( 'uploadstash' ) |
387 | ->where( [ 'us_key' => $this->mFileKey ] ) |
388 | ->caller( __METHOD__ )->fetchRow(); |
389 | // Handle result: |
390 | if ( $row ) { |
391 | $this->mChunkIndex = $row->us_chunk_inx; |
392 | $this->mOffset = $row->us_size; |
393 | $this->mVirtualTempPath = $row->us_path; |
394 | } |
395 | } |
396 | |
397 | /** |
398 | * Get the current Chunk index |
399 | * @return int Index of the current chunk |
400 | */ |
401 | private function getChunkIndex() { |
402 | if ( $this->mChunkIndex !== null ) { |
403 | return $this->mChunkIndex; |
404 | } |
405 | |
406 | return 0; |
407 | } |
408 | |
409 | /** |
410 | * Get the offset at which the next uploaded chunk will be appended to |
411 | * @return int Current byte offset of the chunk file set |
412 | */ |
413 | public function getOffset() { |
414 | if ( $this->mOffset !== null ) { |
415 | return $this->mOffset; |
416 | } |
417 | |
418 | return 0; |
419 | } |
420 | |
421 | /** |
422 | * Output the chunk to disk |
423 | * |
424 | * @param string $chunkPath |
425 | * @throws UploadChunkFileException |
426 | * @return Status |
427 | */ |
428 | private function outputChunk( $chunkPath ) { |
429 | // Key is fileKey + chunk index |
430 | $fileKey = $this->getChunkFileKey(); |
431 | |
432 | // Store the chunk per its indexed fileKey: |
433 | $hashPath = $this->repo->getHashPath( $fileKey ); |
434 | $storeStatus = $this->repo->quickImport( $chunkPath, |
435 | $this->repo->getZonePath( 'temp' ) . "/{$hashPath}{$fileKey}" ); |
436 | |
437 | // Check for error in stashing the chunk: |
438 | if ( !$storeStatus->isOK() ) { |
439 | $error = $this->logFileBackendStatus( |
440 | $storeStatus, |
441 | '[{type}] Error storing chunk in "{chunkPath}" for {fileKey} ({details})', |
442 | [ 'chunkPath' => $chunkPath, 'fileKey' => $fileKey ] |
443 | ); |
444 | throw new UploadChunkFileException( "Error storing file in '{chunkPath}': " . |
445 | implode( '; ', $error ), [ 'chunkPath' => $chunkPath ] ); |
446 | } |
447 | |
448 | return $storeStatus; |
449 | } |
450 | |
451 | private function getChunkFileKey( $index = null ) { |
452 | return $this->mFileKey . '.' . ( $index ?? $this->getChunkIndex() ); |
453 | } |
454 | |
455 | /** |
456 | * Verify that the chunk isn't really an evil html file |
457 | * |
458 | * @throws UploadChunkVerificationException |
459 | */ |
460 | private function verifyChunk() { |
461 | // Rest mDesiredDestName here so we verify the name as if it were mFileKey |
462 | $oldDesiredDestName = $this->mDesiredDestName; |
463 | $this->mDesiredDestName = $this->mFileKey; |
464 | $this->mTitle = false; |
465 | $res = $this->verifyPartialFile(); |
466 | $this->mDesiredDestName = $oldDesiredDestName; |
467 | $this->mTitle = false; |
468 | if ( is_array( $res ) ) { |
469 | throw new UploadChunkVerificationException( $res ); |
470 | } |
471 | } |
472 | |
473 | /** |
474 | * Log a status object from FileBackend functions (via FileRepo functions) to the upload log channel. |
475 | * Return a array with the first error to build up a exception message |
476 | * |
477 | * @param Status $status |
478 | * @param string $logMessage |
479 | * @param array $context |
480 | * @return array |
481 | */ |
482 | private function logFileBackendStatus( Status $status, string $logMessage, array $context = [] ): array { |
483 | $logger = $this->logger; |
484 | $errorToThrow = null; |
485 | $warningToThrow = null; |
486 | |
487 | foreach ( $status->getErrors() as $errorItem ) { |
488 | // The message key stands for distinct error situation from the file backend, |
489 | // each error situation should be shown up in aggregated stats as own point, replace in message |
490 | $logMessageType = str_replace( '{type}', $errorItem['message'], $logMessage ); |
491 | |
492 | // The message arguments often contains the name of the failing datacenter or file names |
493 | // and should not show up in aggregated stats, add to context |
494 | $context['details'] = implode( '; ', $errorItem['params'] ); |
495 | $context['user'] = $this->user->getName(); |
496 | |
497 | if ( $errorItem['type'] === 'error' ) { |
498 | // Use the first error of the list for the exception text |
499 | $errorToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
500 | $logger->error( $logMessageType, $context ); |
501 | } else { |
502 | // When no error is found, fall back to the first warning |
503 | $warningToThrow ??= [ $errorItem['message'], ...$errorItem['params'] ]; |
504 | $logger->warning( $logMessageType, $context ); |
505 | } |
506 | } |
507 | return $errorToThrow ?? $warningToThrow ?? [ 'unknown', 'no error recorded' ]; |
508 | } |
509 | } |