Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
66.67% |
98 / 147 |
|
47.06% |
8 / 17 |
CRAP | |
0.00% |
0 / 1 |
UploadFromUrl | |
66.67% |
98 / 147 |
|
47.06% |
8 / 17 |
152.15 | |
0.00% |
0 / 1 |
isAllowed | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
isEnabled | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
isAllowedHost | |
94.44% |
17 / 18 |
|
0.00% |
0 / 1 |
9.01 | |||
getCacheKey | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
getCacheKeyFromRequest | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
getAllowedHosts | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
isAllowedUrl | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getUrl | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
initialize | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
initializeFromRequest | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
isValidRequest | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
getSourceType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fetchFile | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
canFetchFile | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
5.26 | |||
makeTemporaryFile | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
saveTempFileChunk | |
41.67% |
5 / 12 |
|
0.00% |
0 / 1 |
2.79 | |||
reallyFetchFile | |
77.08% |
37 / 48 |
|
0.00% |
0 / 1 |
12.46 |
1 | <?php |
2 | /** |
3 | * Backend for uploading files from a HTTP resource. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Upload |
22 | */ |
23 | |
24 | use MediaWiki\Context\RequestContext; |
25 | use MediaWiki\HookContainer\HookRunner; |
26 | use MediaWiki\MainConfigNames; |
27 | use MediaWiki\MediaWikiServices; |
28 | use MediaWiki\Permissions\Authority; |
29 | use MediaWiki\Request\WebRequest; |
30 | use MediaWiki\Status\Status; |
31 | |
32 | /** |
33 | * Implements uploading from a HTTP resource. |
34 | * |
35 | * @ingroup Upload |
36 | * @author Bryan Tong Minh |
37 | * @author Michael Dale |
38 | */ |
39 | class UploadFromUrl extends UploadBase { |
40 | protected $mUrl; |
41 | |
42 | protected $mTempPath; |
43 | protected $mTmpHandle; |
44 | |
45 | protected static $allowedUrls = []; |
46 | |
47 | /** |
48 | * Checks if the user is allowed to use the upload-by-URL feature. If the |
49 | * user is not allowed, return the name of the user right as a string. If |
50 | * the user is allowed, have the parent do further permissions checking. |
51 | * |
52 | * @param Authority $performer |
53 | * |
54 | * @return bool|string |
55 | */ |
56 | public static function isAllowed( Authority $performer ) { |
57 | if ( !$performer->isAllowed( 'upload_by_url' ) ) { |
58 | return 'upload_by_url'; |
59 | } |
60 | |
61 | return parent::isAllowed( $performer ); |
62 | } |
63 | |
64 | /** |
65 | * Checks if the upload from URL feature is enabled |
66 | * @return bool |
67 | */ |
68 | public static function isEnabled() { |
69 | $allowCopyUploads = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::AllowCopyUploads ); |
70 | |
71 | return $allowCopyUploads && parent::isEnabled(); |
72 | } |
73 | |
74 | /** |
75 | * Checks whether the URL is for an allowed host |
76 | * The domains in the allowlist can include wildcard characters (*) in place |
77 | * of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'. |
78 | * |
79 | * @param string $url |
80 | * @return bool |
81 | */ |
82 | public static function isAllowedHost( $url ) { |
83 | $domains = self::getAllowedHosts(); |
84 | if ( !count( $domains ) ) { |
85 | return true; |
86 | } |
87 | $parsedUrl = wfParseUrl( $url ); |
88 | if ( !$parsedUrl ) { |
89 | return false; |
90 | } |
91 | $valid = false; |
92 | foreach ( $domains as $domain ) { |
93 | // See if the domain for the upload matches this allowed domain |
94 | $domainPieces = explode( '.', $domain ); |
95 | $uploadDomainPieces = explode( '.', $parsedUrl['host'] ); |
96 | if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) { |
97 | $valid = true; |
98 | // See if all the pieces match or not (excluding wildcards) |
99 | foreach ( $domainPieces as $index => $piece ) { |
100 | if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) { |
101 | $valid = false; |
102 | } |
103 | } |
104 | if ( $valid ) { |
105 | // We found a match, so quit comparing against the list |
106 | break; |
107 | } |
108 | } |
109 | /* Non-wildcard test |
110 | if ( $parsedUrl['host'] === $domain ) { |
111 | $valid = true; |
112 | break; |
113 | } |
114 | */ |
115 | } |
116 | |
117 | return $valid; |
118 | } |
119 | |
120 | /** |
121 | * Provides a caching key for an upload from url set of parameters |
122 | * Used to set the status of an async job in UploadFromUrlJob |
123 | * and retreive it in frontend clients like ApiUpload. Will return the |
124 | * empty string if not all parameters are present. |
125 | * |
126 | * @param array $params |
127 | * @return string |
128 | */ |
129 | public static function getCacheKey( $params ) { |
130 | if ( !isset( $params['filename'] ) || !isset( $params['url'] ) ) { |
131 | return ""; |
132 | } else { |
133 | // We use sha1 here to ensure we have a fixed-length string of printable |
134 | // characters. There is no cryptography involved, so we just need a |
135 | // relatively fast function. |
136 | return sha1( sprintf( "%s|||%s", $params['filename'], $params['url'] ) ); |
137 | } |
138 | } |
139 | |
140 | /** |
141 | * Get the caching key from a web request |
142 | * @param WebRequest &$request |
143 | * |
144 | * @return string |
145 | */ |
146 | public static function getCacheKeyFromRequest( &$request ) { |
147 | $uploadCacheKey = $request->getText( 'wpCacheKey', $request->getText( 'key', '' ) ); |
148 | if ( $uploadCacheKey !== '' ) { |
149 | return $uploadCacheKey; |
150 | } |
151 | $desiredDestName = $request->getText( 'wpDestFile' ); |
152 | if ( !$desiredDestName ) { |
153 | $desiredDestName = $request->getText( 'wpUploadFileURL' ); |
154 | } |
155 | return self::getCacheKey( |
156 | [ |
157 | 'filename' => $desiredDestName, |
158 | 'url' => trim( $request->getVal( 'wpUploadFileURL' ) ) |
159 | ] |
160 | ); |
161 | } |
162 | |
163 | /** |
164 | * @return string[] |
165 | */ |
166 | private static function getAllowedHosts(): array { |
167 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
168 | $domains = $config->get( MainConfigNames::CopyUploadsDomains ); |
169 | |
170 | if ( $config->get( MainConfigNames::CopyUploadAllowOnWikiDomainConfig ) ) { |
171 | $page = wfMessage( 'copyupload-allowed-domains' )->inContentLanguage()->plain(); |
172 | |
173 | foreach ( explode( "\n", $page ) as $line ) { |
174 | // Strip comments |
175 | $line = preg_replace( "/^\\s*([^#]*)\\s*((.*)?)$/", "\\1", $line ); |
176 | // Trim whitespace |
177 | $line = trim( $line ); |
178 | |
179 | if ( $line !== '' ) { |
180 | $domains[] = $line; |
181 | } |
182 | } |
183 | } |
184 | |
185 | return $domains; |
186 | } |
187 | |
188 | /** |
189 | * Checks whether the URL is not allowed. |
190 | * |
191 | * @param string $url |
192 | * @return bool |
193 | */ |
194 | public static function isAllowedUrl( $url ) { |
195 | if ( !isset( self::$allowedUrls[$url] ) ) { |
196 | $allowed = true; |
197 | ( new HookRunner( MediaWikiServices::getInstance()->getHookContainer() ) ) |
198 | ->onIsUploadAllowedFromUrl( $url, $allowed ); |
199 | self::$allowedUrls[$url] = $allowed; |
200 | } |
201 | |
202 | return self::$allowedUrls[$url]; |
203 | } |
204 | |
205 | /** |
206 | * Get the URL of the file to be uploaded |
207 | * @return string |
208 | */ |
209 | public function getUrl() { |
210 | return $this->mUrl; |
211 | } |
212 | |
213 | /** |
214 | * Entry point for API upload |
215 | * |
216 | * @param string $name |
217 | * @param string $url |
218 | */ |
219 | public function initialize( $name, $url ) { |
220 | $this->mUrl = $url; |
221 | |
222 | $tempPath = $this->makeTemporaryFile(); |
223 | # File size and removeTempFile will be filled in later |
224 | $this->initializePathInfo( $name, $tempPath, 0, false ); |
225 | } |
226 | |
227 | /** |
228 | * Entry point for SpecialUpload |
229 | * @param WebRequest &$request |
230 | */ |
231 | public function initializeFromRequest( &$request ) { |
232 | $desiredDestName = $request->getText( 'wpDestFile' ); |
233 | if ( !$desiredDestName ) { |
234 | $desiredDestName = $request->getText( 'wpUploadFileURL' ); |
235 | } |
236 | $this->initialize( |
237 | $desiredDestName, |
238 | trim( $request->getVal( 'wpUploadFileURL' ) ) |
239 | ); |
240 | } |
241 | |
242 | /** |
243 | * @param WebRequest $request |
244 | * @return bool |
245 | */ |
246 | public static function isValidRequest( $request ) { |
247 | $user = RequestContext::getMain()->getUser(); |
248 | |
249 | $url = $request->getVal( 'wpUploadFileURL' ); |
250 | |
251 | return $url |
252 | && MediaWikiServices::getInstance() |
253 | ->getPermissionManager() |
254 | ->userHasRight( $user, 'upload_by_url' ); |
255 | } |
256 | |
257 | /** |
258 | * @return string |
259 | */ |
260 | public function getSourceType() { |
261 | return 'url'; |
262 | } |
263 | |
264 | /** |
265 | * Download the file |
266 | * |
267 | * @param array $httpOptions Array of options for MWHttpRequest. |
268 | * This could be used to override the timeout on the http request. |
269 | * @return Status |
270 | */ |
271 | public function fetchFile( $httpOptions = [] ) { |
272 | $status = $this->canFetchFile(); |
273 | if ( !$status->isGood() ) { |
274 | return $status; |
275 | } |
276 | return $this->reallyFetchFile( $httpOptions ); |
277 | } |
278 | |
279 | /** |
280 | * verify we can actually download the file |
281 | * |
282 | * @return Status |
283 | */ |
284 | public function canFetchFile() { |
285 | if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) { |
286 | return Status::newFatal( 'http-invalid-url', $this->mUrl ); |
287 | } |
288 | |
289 | if ( !self::isAllowedHost( $this->mUrl ) ) { |
290 | return Status::newFatal( 'upload-copy-upload-invalid-domain' ); |
291 | } |
292 | if ( !self::isAllowedUrl( $this->mUrl ) ) { |
293 | return Status::newFatal( 'upload-copy-upload-invalid-url' ); |
294 | } |
295 | return Status::newGood(); |
296 | } |
297 | |
298 | /** |
299 | * Create a new temporary file in the URL subdirectory of wfTempDir(). |
300 | * |
301 | * @return string Path to the file |
302 | */ |
303 | protected function makeTemporaryFile() { |
304 | $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() |
305 | ->newTempFSFile( 'URL', 'urlupload_' ); |
306 | $tmpFile->bind( $this ); |
307 | |
308 | return $tmpFile->getPath(); |
309 | } |
310 | |
311 | /** |
312 | * Callback: save a chunk of the result of a HTTP request to the temporary file |
313 | * |
314 | * @param mixed $req |
315 | * @param string $buffer |
316 | * @return int Number of bytes handled |
317 | */ |
318 | public function saveTempFileChunk( $req, $buffer ) { |
319 | wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' ); |
320 | $nbytes = fwrite( $this->mTmpHandle, $buffer ); |
321 | |
322 | if ( $nbytes == strlen( $buffer ) ) { |
323 | $this->mFileSize += $nbytes; |
324 | } else { |
325 | // Well... that's not good! |
326 | wfDebugLog( |
327 | 'fileupload', |
328 | 'Short write ' . $nbytes . '/' . strlen( $buffer ) . |
329 | ' bytes, aborting with ' . $this->mFileSize . ' uploaded so far' |
330 | ); |
331 | fclose( $this->mTmpHandle ); |
332 | $this->mTmpHandle = false; |
333 | } |
334 | |
335 | return $nbytes; |
336 | } |
337 | |
338 | /** |
339 | * Download the file, save it to the temporary file and update the file |
340 | * size and set $mRemoveTempFile to true. |
341 | * |
342 | * @param array $httpOptions Array of options for MWHttpRequest |
343 | * @return Status |
344 | */ |
345 | protected function reallyFetchFile( $httpOptions = [] ) { |
346 | $copyUploadProxy = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CopyUploadProxy ); |
347 | $copyUploadTimeout = MediaWikiServices::getInstance()->getMainConfig() |
348 | ->get( MainConfigNames::CopyUploadTimeout ); |
349 | if ( $this->mTempPath === false ) { |
350 | return Status::newFatal( 'tmp-create-error' ); |
351 | } |
352 | |
353 | // Note the temporary file should already be created by makeTemporaryFile() |
354 | $this->mTmpHandle = fopen( $this->mTempPath, 'wb' ); |
355 | if ( !$this->mTmpHandle ) { |
356 | return Status::newFatal( 'tmp-create-error' ); |
357 | } |
358 | wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' ); |
359 | |
360 | $this->mRemoveTempFile = true; |
361 | $this->mFileSize = 0; |
362 | |
363 | $options = $httpOptions + [ 'followRedirects' => false ]; |
364 | |
365 | if ( $copyUploadProxy !== false ) { |
366 | $options['proxy'] = $copyUploadProxy; |
367 | } |
368 | |
369 | if ( $copyUploadTimeout && !isset( $options['timeout'] ) ) { |
370 | $options['timeout'] = $copyUploadTimeout; |
371 | } |
372 | wfDebugLog( |
373 | 'fileupload', |
374 | 'Starting download from "' . $this->mUrl . '" ' . |
375 | '<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>' |
376 | ); |
377 | |
378 | // Manually follow any redirects up to the limit and reset the output file before each new request to prevent |
379 | // capturing the redirect response as part of the file. |
380 | $attemptsLeft = $options['maxRedirects'] ?? 5; |
381 | $targetUrl = $this->mUrl; |
382 | $requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory(); |
383 | while ( $attemptsLeft > 0 ) { |
384 | $req = $requestFactory->create( $targetUrl, $options, __METHOD__ ); |
385 | $req->setCallback( [ $this, 'saveTempFileChunk' ] ); |
386 | $status = $req->execute(); |
387 | if ( !$req->isRedirect() ) { |
388 | break; |
389 | } |
390 | $targetUrl = $req->getFinalUrl(); |
391 | // Remove redirect response content from file. |
392 | ftruncate( $this->mTmpHandle, 0 ); |
393 | rewind( $this->mTmpHandle ); |
394 | $attemptsLeft--; |
395 | } |
396 | |
397 | if ( $attemptsLeft == 0 ) { |
398 | return Status::newFatal( 'upload-too-many-redirects' ); |
399 | } |
400 | |
401 | if ( $this->mTmpHandle ) { |
402 | // File got written ok... |
403 | fclose( $this->mTmpHandle ); |
404 | $this->mTmpHandle = null; |
405 | } else { |
406 | // We encountered a write error during the download... |
407 | return Status::newFatal( 'tmp-write-error' ); |
408 | } |
409 | |
410 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
411 | if ( $status->isOK() ) { |
412 | wfDebugLog( 'fileupload', 'Download by URL completed successfully.' ); |
413 | } else { |
414 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
415 | wfDebugLog( 'fileupload', $status->getWikiText( false, false, 'en' ) ); |
416 | wfDebugLog( |
417 | 'fileupload', |
418 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
419 | 'Download by URL completed with HTTP status ' . $req->getStatus() |
420 | ); |
421 | } |
422 | |
423 | // @phan-suppress-next-line PhanTypeMismatchReturnNullable,PhanPossiblyUndeclaredVariable Always set after loop |
424 | return $status; |
425 | } |
426 | } |