Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
66.90% |
97 / 145 |
|
47.06% |
8 / 17 |
CRAP | |
0.00% |
0 / 1 |
UploadFromUrl | |
66.90% |
97 / 145 |
|
47.06% |
8 / 17 |
145.35 | |
0.00% |
0 / 1 |
isAllowed | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
isEnabled | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
isAllowedHost | |
94.44% |
17 / 18 |
|
0.00% |
0 / 1 |
9.01 | |||
getCacheKey | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
getCacheKeyFromRequest | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
getAllowedHosts | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
isAllowedUrl | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getUrl | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
initialize | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
initializeFromRequest | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
isValidRequest | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
getSourceType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fetchFile | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
canFetchFile | |
57.14% |
4 / 7 |
|
0.00% |
0 / 1 |
5.26 | |||
makeTemporaryFile | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
saveTempFileChunk | |
41.67% |
5 / 12 |
|
0.00% |
0 / 1 |
2.79 | |||
reallyFetchFile | |
78.26% |
36 / 46 |
|
0.00% |
0 / 1 |
11.03 |
1 | <?php |
2 | /** |
3 | * Backend for uploading files from a HTTP resource. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Upload |
22 | */ |
23 | |
24 | use MediaWiki\Context\RequestContext; |
25 | use MediaWiki\HookContainer\HookRunner; |
26 | use MediaWiki\MainConfigNames; |
27 | use MediaWiki\MediaWikiServices; |
28 | use MediaWiki\Permissions\Authority; |
29 | use MediaWiki\Request\WebRequest; |
30 | use MediaWiki\Status\Status; |
31 | |
32 | /** |
33 | * Implements uploading from a HTTP resource. |
34 | * |
35 | * @ingroup Upload |
36 | * @author Bryan Tong Minh |
37 | * @author Michael Dale |
38 | */ |
39 | class UploadFromUrl extends UploadBase { |
40 | /** @var string */ |
41 | protected $mUrl; |
42 | |
43 | /** @var resource|null|false */ |
44 | protected $mTmpHandle; |
45 | |
46 | /** @var array<string,bool> */ |
47 | protected static $allowedUrls = []; |
48 | |
49 | /** |
50 | * Checks if the user is allowed to use the upload-by-URL feature. If the |
51 | * user is not allowed, return the name of the user right as a string. If |
52 | * the user is allowed, have the parent do further permissions checking. |
53 | * |
54 | * @param Authority $performer |
55 | * |
56 | * @return bool|string |
57 | */ |
58 | public static function isAllowed( Authority $performer ) { |
59 | if ( !$performer->isAllowed( 'upload_by_url' ) ) { |
60 | return 'upload_by_url'; |
61 | } |
62 | |
63 | return parent::isAllowed( $performer ); |
64 | } |
65 | |
66 | /** |
67 | * Checks if the upload from URL feature is enabled |
68 | * @return bool |
69 | */ |
70 | public static function isEnabled() { |
71 | $allowCopyUploads = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::AllowCopyUploads ); |
72 | |
73 | return $allowCopyUploads && parent::isEnabled(); |
74 | } |
75 | |
76 | /** |
77 | * Checks whether the URL is for an allowed host |
78 | * The domains in the allowlist can include wildcard characters (*) in place |
79 | * of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'. |
80 | * |
81 | * @param string $url |
82 | * @return bool |
83 | */ |
84 | public static function isAllowedHost( $url ) { |
85 | $domains = self::getAllowedHosts(); |
86 | if ( !count( $domains ) ) { |
87 | return true; |
88 | } |
89 | $parsedUrl = wfGetUrlUtils()->parse( $url ); |
90 | if ( !$parsedUrl ) { |
91 | return false; |
92 | } |
93 | $valid = false; |
94 | foreach ( $domains as $domain ) { |
95 | // See if the domain for the upload matches this allowed domain |
96 | $domainPieces = explode( '.', $domain ); |
97 | $uploadDomainPieces = explode( '.', $parsedUrl['host'] ); |
98 | if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) { |
99 | $valid = true; |
100 | // See if all the pieces match or not (excluding wildcards) |
101 | foreach ( $domainPieces as $index => $piece ) { |
102 | if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) { |
103 | $valid = false; |
104 | } |
105 | } |
106 | if ( $valid ) { |
107 | // We found a match, so quit comparing against the list |
108 | break; |
109 | } |
110 | } |
111 | /* Non-wildcard test |
112 | if ( $parsedUrl['host'] === $domain ) { |
113 | $valid = true; |
114 | break; |
115 | } |
116 | */ |
117 | } |
118 | |
119 | return $valid; |
120 | } |
121 | |
122 | /** |
123 | * Provides a caching key for an upload from url set of parameters |
124 | * Used to set the status of an async job in UploadFromUrlJob |
125 | * and retreive it in frontend clients like ApiUpload. Will return the |
126 | * empty string if not all parameters are present. |
127 | * |
128 | * @param array $params |
129 | * @return string |
130 | */ |
131 | public static function getCacheKey( $params ) { |
132 | if ( !isset( $params['filename'] ) || !isset( $params['url'] ) ) { |
133 | return ""; |
134 | } else { |
135 | // We use sha1 here to ensure we have a fixed-length string of printable |
136 | // characters. There is no cryptography involved, so we just need a |
137 | // relatively fast function. |
138 | return sha1( sprintf( "%s|||%s", $params['filename'], $params['url'] ) ); |
139 | } |
140 | } |
141 | |
142 | /** |
143 | * Get the caching key from a web request |
144 | * @param WebRequest &$request |
145 | * |
146 | * @return string |
147 | */ |
148 | public static function getCacheKeyFromRequest( &$request ) { |
149 | $uploadCacheKey = $request->getText( 'wpCacheKey', $request->getText( 'key', '' ) ); |
150 | if ( $uploadCacheKey !== '' ) { |
151 | return $uploadCacheKey; |
152 | } |
153 | $desiredDestName = $request->getText( 'wpDestFile' ); |
154 | if ( !$desiredDestName ) { |
155 | $desiredDestName = $request->getText( 'wpUploadFileURL' ); |
156 | } |
157 | return self::getCacheKey( |
158 | [ |
159 | 'filename' => $desiredDestName, |
160 | 'url' => trim( $request->getVal( 'wpUploadFileURL' ) ) |
161 | ] |
162 | ); |
163 | } |
164 | |
165 | /** |
166 | * @return string[] |
167 | */ |
168 | private static function getAllowedHosts(): array { |
169 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
170 | $domains = $config->get( MainConfigNames::CopyUploadsDomains ); |
171 | |
172 | if ( $config->get( MainConfigNames::CopyUploadAllowOnWikiDomainConfig ) ) { |
173 | $page = wfMessage( 'copyupload-allowed-domains' )->inContentLanguage()->plain(); |
174 | |
175 | foreach ( explode( "\n", $page ) as $line ) { |
176 | // Strip comments |
177 | $line = preg_replace( "/^\\s*([^#]*)\\s*((.*)?)$/", "\\1", $line ); |
178 | // Trim whitespace |
179 | $line = trim( $line ); |
180 | |
181 | if ( $line !== '' ) { |
182 | $domains[] = $line; |
183 | } |
184 | } |
185 | } |
186 | |
187 | return $domains; |
188 | } |
189 | |
190 | /** |
191 | * Checks whether the URL is not allowed. |
192 | * |
193 | * @param string $url |
194 | * @return bool |
195 | */ |
196 | public static function isAllowedUrl( $url ) { |
197 | if ( !isset( self::$allowedUrls[$url] ) ) { |
198 | $allowed = true; |
199 | ( new HookRunner( MediaWikiServices::getInstance()->getHookContainer() ) ) |
200 | ->onIsUploadAllowedFromUrl( $url, $allowed ); |
201 | self::$allowedUrls[$url] = $allowed; |
202 | } |
203 | |
204 | return self::$allowedUrls[$url]; |
205 | } |
206 | |
207 | /** |
208 | * Get the URL of the file to be uploaded |
209 | * @return string |
210 | */ |
211 | public function getUrl() { |
212 | return $this->mUrl; |
213 | } |
214 | |
215 | /** |
216 | * Entry point for API upload |
217 | * |
218 | * @param string $name |
219 | * @param string $url |
220 | */ |
221 | public function initialize( $name, $url ) { |
222 | $this->mUrl = $url; |
223 | |
224 | $tempPath = $this->makeTemporaryFile(); |
225 | # File size and removeTempFile will be filled in later |
226 | $this->initializePathInfo( $name, $tempPath, 0, false ); |
227 | } |
228 | |
229 | /** |
230 | * Entry point for SpecialUpload |
231 | * @param WebRequest &$request |
232 | */ |
233 | public function initializeFromRequest( &$request ) { |
234 | $desiredDestName = $request->getText( 'wpDestFile' ); |
235 | if ( !$desiredDestName ) { |
236 | $desiredDestName = $request->getText( 'wpUploadFileURL' ); |
237 | } |
238 | $this->initialize( |
239 | $desiredDestName, |
240 | trim( $request->getVal( 'wpUploadFileURL' ) ) |
241 | ); |
242 | } |
243 | |
244 | /** |
245 | * @param WebRequest $request |
246 | * @return bool |
247 | */ |
248 | public static function isValidRequest( $request ) { |
249 | $user = RequestContext::getMain()->getUser(); |
250 | |
251 | $url = $request->getVal( 'wpUploadFileURL' ); |
252 | |
253 | return $url |
254 | && MediaWikiServices::getInstance() |
255 | ->getPermissionManager() |
256 | ->userHasRight( $user, 'upload_by_url' ); |
257 | } |
258 | |
259 | /** |
260 | * @return string |
261 | */ |
262 | public function getSourceType() { |
263 | return 'url'; |
264 | } |
265 | |
266 | /** |
267 | * Download the file |
268 | * |
269 | * @param array $httpOptions Array of options for MWHttpRequest. |
270 | * This could be used to override the timeout on the http request. |
271 | * @return Status |
272 | */ |
273 | public function fetchFile( $httpOptions = [] ) { |
274 | $status = $this->canFetchFile(); |
275 | if ( !$status->isGood() ) { |
276 | return $status; |
277 | } |
278 | return $this->reallyFetchFile( $httpOptions ); |
279 | } |
280 | |
281 | /** |
282 | * verify we can actually download the file |
283 | * |
284 | * @return Status |
285 | */ |
286 | public function canFetchFile() { |
287 | if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) { |
288 | return Status::newFatal( 'http-invalid-url', $this->mUrl ); |
289 | } |
290 | |
291 | if ( !self::isAllowedHost( $this->mUrl ) ) { |
292 | return Status::newFatal( 'upload-copy-upload-invalid-domain' ); |
293 | } |
294 | if ( !self::isAllowedUrl( $this->mUrl ) ) { |
295 | return Status::newFatal( 'upload-copy-upload-invalid-url' ); |
296 | } |
297 | return Status::newGood(); |
298 | } |
299 | |
300 | /** |
301 | * Create a new temporary file in the URL subdirectory of wfTempDir(). |
302 | * |
303 | * @return string Path to the file |
304 | */ |
305 | protected function makeTemporaryFile() { |
306 | $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() |
307 | ->newTempFSFile( 'URL', 'urlupload_' ); |
308 | $tmpFile->bind( $this ); |
309 | |
310 | return $tmpFile->getPath(); |
311 | } |
312 | |
313 | /** |
314 | * Callback: save a chunk of the result of a HTTP request to the temporary file |
315 | * |
316 | * @param mixed $req |
317 | * @param string $buffer |
318 | * @return int Number of bytes handled |
319 | */ |
320 | public function saveTempFileChunk( $req, $buffer ) { |
321 | wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' ); |
322 | $nbytes = fwrite( $this->mTmpHandle, $buffer ); |
323 | |
324 | if ( $nbytes == strlen( $buffer ) ) { |
325 | $this->mFileSize += $nbytes; |
326 | } else { |
327 | // Well... that's not good! |
328 | wfDebugLog( |
329 | 'fileupload', |
330 | 'Short write ' . $nbytes . '/' . strlen( $buffer ) . |
331 | ' bytes, aborting with ' . $this->mFileSize . ' uploaded so far' |
332 | ); |
333 | fclose( $this->mTmpHandle ); |
334 | $this->mTmpHandle = false; |
335 | } |
336 | |
337 | return $nbytes; |
338 | } |
339 | |
340 | /** |
341 | * Download the file, save it to the temporary file and update the file |
342 | * size and set $mRemoveTempFile to true. |
343 | * |
344 | * @param array $httpOptions Array of options for MWHttpRequest |
345 | * @return Status |
346 | */ |
347 | protected function reallyFetchFile( $httpOptions = [] ) { |
348 | $copyUploadProxy = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CopyUploadProxy ); |
349 | $copyUploadTimeout = MediaWikiServices::getInstance()->getMainConfig() |
350 | ->get( MainConfigNames::CopyUploadTimeout ); |
351 | |
352 | // Note the temporary file should already be created by makeTemporaryFile() |
353 | $this->mTmpHandle = fopen( $this->mTempPath, 'wb' ); |
354 | if ( !$this->mTmpHandle ) { |
355 | return Status::newFatal( 'tmp-create-error' ); |
356 | } |
357 | wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' ); |
358 | |
359 | $this->mRemoveTempFile = true; |
360 | $this->mFileSize = 0; |
361 | |
362 | $options = $httpOptions + [ 'followRedirects' => false ]; |
363 | |
364 | if ( $copyUploadProxy !== false ) { |
365 | $options['proxy'] = $copyUploadProxy; |
366 | } |
367 | |
368 | if ( $copyUploadTimeout && !isset( $options['timeout'] ) ) { |
369 | $options['timeout'] = $copyUploadTimeout; |
370 | } |
371 | wfDebugLog( |
372 | 'fileupload', |
373 | 'Starting download from "' . $this->mUrl . '" ' . |
374 | '<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>' |
375 | ); |
376 | |
377 | // Manually follow any redirects up to the limit and reset the output file before each new request to prevent |
378 | // capturing the redirect response as part of the file. |
379 | $attemptsLeft = $options['maxRedirects'] ?? 5; |
380 | $targetUrl = $this->mUrl; |
381 | $requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory(); |
382 | while ( $attemptsLeft > 0 ) { |
383 | $req = $requestFactory->create( $targetUrl, $options, __METHOD__ ); |
384 | $req->setCallback( [ $this, 'saveTempFileChunk' ] ); |
385 | $status = $req->execute(); |
386 | if ( !$req->isRedirect() ) { |
387 | break; |
388 | } |
389 | $targetUrl = $req->getFinalUrl(); |
390 | // Remove redirect response content from file. |
391 | ftruncate( $this->mTmpHandle, 0 ); |
392 | rewind( $this->mTmpHandle ); |
393 | $attemptsLeft--; |
394 | } |
395 | |
396 | if ( $attemptsLeft == 0 ) { |
397 | return Status::newFatal( 'upload-too-many-redirects' ); |
398 | } |
399 | |
400 | if ( $this->mTmpHandle ) { |
401 | // File got written ok... |
402 | fclose( $this->mTmpHandle ); |
403 | $this->mTmpHandle = null; |
404 | } else { |
405 | // We encountered a write error during the download... |
406 | return Status::newFatal( 'tmp-write-error' ); |
407 | } |
408 | |
409 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
410 | if ( $status->isOK() ) { |
411 | wfDebugLog( 'fileupload', 'Download by URL completed successfully.' ); |
412 | } else { |
413 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
414 | wfDebugLog( 'fileupload', $status->getWikiText( false, false, 'en' ) ); |
415 | wfDebugLog( |
416 | 'fileupload', |
417 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop |
418 | 'Download by URL completed with HTTP status ' . $req->getStatus() |
419 | ); |
420 | } |
421 | |
422 | // @phan-suppress-next-line PhanTypeMismatchReturnNullable,PhanPossiblyUndeclaredVariable Always set after loop |
423 | return $status; |
424 | } |
425 | } |