Merge pull request #410 from vladimir-v-diaz/develop

Address issue #409
This commit is contained in:
Vladimir Diaz 2016-11-30 11:24:43 -05:00 committed by GitHub
commit 8da05bba3a
2 changed files with 388 additions and 386 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,17 +1,17 @@
"""
<Program Name>
download.py
<Started>
February 21, 2012. Based on previous version by Geremy Condra.
<Author>
Konstantin Andrianov
Vladimir Diaz <vladimir.v.diaz@gmail.com>
<Copyright>
See LICENSE for licensing information.
<Purpose>
Download metadata and target files and check their validity. The hash and
length of a downloaded file has to match the hash and length supplied by the
@ -65,17 +65,17 @@ def safe_download(url, required_length):
the length of the downloaded file matches 'required_length' exactly.
tuf.download.unsafe_download() may be called if an upper download limit is
preferred.
'tuf.ssl_crypto.util.TempFile', the file-like object returned, is used
instead of regular tempfile object because of additional functionality
provided, such as handling compressed metadata and automatically closing
files after moving to final destination.
<Arguments>
url:
A URL string that represents the location of the file. The URI scheme
component must be one of 'settings.SUPPORTED_URI_SCHEMES'.
required_length:
An integer value representing the length of the file. This is an exact
limit.
@ -83,21 +83,21 @@ def safe_download(url, required_length):
<Side Effects>
A 'tuf.ssl_crypto.util.TempFile' object is created on disk to store the
contents of 'url'.
<Exceptions>
tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a
mismatch of observed vs expected lengths while downloading the file.
tuf.ssl_commons.exceptions.FormatError, if any of the arguments are
improperly formatted.
Any other unforeseen runtime exception.
<Returns>
A 'tuf.ssl_crypto.util.TempFile' file-like object that points to the
contents of 'url'.
"""
# Do all of the arguments have the appropriate format?
# Raise 'tuf.ssl_commons.exceptions.FormatError' if there is a mismatch.
tuf.ssl_crypto.formats.URL_SCHEMA.check_match(url)
@ -108,7 +108,7 @@ def safe_download(url, required_length):
# supported. If the URI scheme of 'url' is empty or "file", files on the
# local system can be accessed. Unexpected files may be accessed by
# compromised metadata (unlikely to happen if targets.json metadata is signed
# with offline keys).
# with offline keys).
parsed_url = six.moves.urllib.parse.urlparse(url)
if parsed_url.scheme not in settings.SUPPORTED_URI_SCHEMES:
@ -116,7 +116,7 @@ def safe_download(url, required_length):
repr(url) + ' specifies an unsupported URI scheme. Supported ' + \
' URI Schemes: ' + repr(settings.SUPPORTED_URI_SCHEMES)
raise tuf.ssl_commons.exceptions.FormatError(message)
return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True)
@ -131,17 +131,17 @@ def unsafe_download(url, required_length):
the length of the downloaded file is up to 'required_length', and no larger.
tuf.download.safe_download() may be called if an exact download limit is
preferred.
'tuf.ssl_crypto.util.TempFile', the file-like object returned, is used
instead of regular tempfile object because of additional functionality
provided, such as handling compressed metadata and automatically closing
files after moving to final destination.
<Arguments>
url:
A URL string that represents the location of the file. The URI scheme
component must be one of 'settings.SUPPORTED_URI_SCHEMES'.
required_length:
An integer value representing the length of the file. This is an upper
limit.
@ -149,40 +149,40 @@ def unsafe_download(url, required_length):
<Side Effects>
A 'tuf.ssl_crypto.util.TempFile' object is created on disk to store the
contents of 'url'.
<Exceptions>
tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a
mismatch of observed vs expected lengths while downloading the file.
tuf.ssl_commons.exceptions.FormatError, if any of the arguments are
improperly formatted.
Any other unforeseen runtime exception.
<Returns>
A 'tuf.ssl_crypto.util.TempFile' file-like object that points to the
contents of 'url'.
"""
# Do all of the arguments have the appropriate format?
# Raise 'tuf.ssl_commons.exceptions.FormatError' if there is a mismatch.
tuf.ssl_crypto.formats.URL_SCHEMA.check_match(url)
tuf.ssl_crypto.formats.LENGTH_SCHEMA.check_match(required_length)
# Ensure 'url' specifies one of the URI schemes in
# 'settings.SUPPORTED_URI_SCHEMES'. Be default, ['http', 'https'] is
# supported. If the URI scheme of 'url' is empty or "file", files on the
# local system can be accessed. Unexpected files may be accessed by
# compromised metadata (unlikely to happen if targets.json metadata is signed
# with offline keys).
# with offline keys).
parsed_url = six.moves.urllib.parse.urlparse(url)
if parsed_url.scheme not in settings.SUPPORTED_URI_SCHEMES:
message = \
repr(url) + ' specifies an unsupported URI scheme. Supported ' + \
' URI Schemes: ' + repr(settings.SUPPORTED_URI_SCHEMES)
' URI Schemes: ' + repr(settings.SUPPORTED_URI_SCHEMES)
raise tuf.ssl_commons.exceptions.FormatError(message)
return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False)
@ -192,18 +192,20 @@ def unsafe_download(url, required_length):
def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
"""
<Purpose>
Given the url, hashes and length of the desired file, this function
opens a connection to 'url' and downloads the file while ensuring its
length and hashes match 'required_hashes' and 'required_length'.
Given the url and length of the desired file, this function opens a
connection to 'url' and downloads the file while ensuring its length
matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False,
the file's length is not checked and a slow retrieval exception is raised
if the downloaded rate falls below the acceptable rate).
tuf.ssl_crypto.util.TempFile is used instead of regular tempfile object
because of additional functionality provided by
'tuf.ssl_crypto.util.TempFile'.
<Arguments>
url:
A URL string that represents the location of the file.
A URL string that represents the location of the file.
required_length:
An integer value representing the length of the file.
@ -216,16 +218,16 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
<Side Effects>
A 'tuf.ssl_crypto.util.TempFile' object is created on disk to store the
contents of 'url'.
<Exceptions>
tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a
mismatch of observed vs expected lengths while downloading the file.
tuf.ssl_commons.exceptions.FormatError, if any of the arguments are
improperly formatted.
Any other unforeseen runtime exception.
<Returns>
A 'tuf.ssl_crypto.util.TempFile' file-like object that points to the
contents of 'url'.
@ -238,7 +240,7 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
# 'url.replace()' is for compatibility with Windows-based systems because
# they might put back-slashes in place of forward-slashes. This converts it
# to the common format.
# to the common format.
url = url.replace('\\', '/')
logger.info('Downloading: ' + repr(url))
@ -261,7 +263,7 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
# temporary file, and get the total number of downloaded bytes.
total_downloaded, average_download_speed = \
_download_fixed_amount_of_data(connection, temp_file, required_length)
# Does the total number of downloaded bytes match the required length?
_check_downloaded_length(total_downloaded, required_length,
STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH,
@ -286,7 +288,7 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
This is a helper function, where the download really happens. While-block
reads data from connection a fixed chunk of data at a time, or less, until
'required_length' is reached.
<Arguments>
connection:
The object that the _open_connection returns for communicating with the
@ -301,20 +303,20 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
always specified by the TUF metadata for the data file in question
(except in the case of timestamp metadata, in which case we would fix a
reasonable upper bound).
<Side Effects>
Data from the server will be written to 'temp_file'.
<Exceptions>
Runtime or network exceptions will be raised without question.
<Returns>
A (total_downloaded, average_download_speed) tuple, where
'total_downloaded' is the total number of bytes downloaded for the desired
file and the 'average_download_speed' calculated for the download
attempt.
"""
# Tolerate servers with a slow start by ignoring their delivery speed for
# 'settings.SLOW_START_GRACE_PERIOD' seconds. Set 'seconds_spent_receiving'
# to negative SLOW_START_GRACE_PERIOD seconds, and begin checking the average
@ -324,7 +326,7 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
# Keep track of total bytes downloaded.
number_of_bytes_received = 0
average_download_speed = 0
start_time = timeit.default_timer()
try:
@ -335,60 +337,60 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
# round, sleep for a short amount of time so that the CPU is not hogged
# in the while loop.
time.sleep(0.05)
data = b''
data = b''
read_amount = min(settings.CHUNK_SIZE,
required_length - number_of_bytes_received)
try:
try:
data = connection.read(read_amount)
# Python 3.2 returns 'IOError' if the remote file object has timed out.
# Python 3.2 returns 'IOError' if the remote file object has timed out.
except (socket.error, IOError):
pass
number_of_bytes_received = number_of_bytes_received + len(data)
# Data successfully read from the connection. Store it.
# Data successfully read from the connection. Store it.
temp_file.write(data)
if number_of_bytes_received == required_length:
break
break
stop_time = timeit.default_timer()
seconds_spent_receiving = stop_time - start_time
if (seconds_spent_receiving + grace_period) < 0:
continue
continue
# Measure the average download speed.
average_download_speed = number_of_bytes_received / seconds_spent_receiving
if average_download_speed < settings.MIN_AVERAGE_DOWNLOAD_SPEED:
logger.debug('The average download speed dropped below the minimum'
' average download speed set in settings.py.')
' average download speed set in settings.py.')
break
else:
logger.debug('The average download speed has not dipped below the'
' mimimum average download speed set in settings.py.')
# We might have no more data to read. Check number of bytes downloaded.
# We might have no more data to read. Check number of bytes downloaded.
if not data:
logger.debug('Downloaded ' + repr(number_of_bytes_received) + '/' +
repr(required_length) + ' bytes.')
# Finally, we signal that the download is complete.
break
except:
raise
else:
# This else block returns and skips closing the connection in the finally
# block, so close the connection here.
connection.close()
return number_of_bytes_received, average_download_speed
finally:
# Whatever happens, make sure that we always close the connection.
connection.close()
@ -430,7 +432,7 @@ def _get_opener(scheme=None):
for handler in opener.handlers:
if isinstance(handler, six.moves.urllib.request.HTTPHandler):
opener.handlers.remove(handler)
else:
# Otherwise, use the default opener.
opener = six.moves.urllib.request.build_opener()
@ -444,40 +446,40 @@ def _get_opener(scheme=None):
def _open_connection(url):
"""
<Purpose>
Helper function that opens a connection to the url. urllib2 supports http,
ftp, and file. In python (2.6+) where the ssl module is available, urllib2
Helper function that opens a connection to the url. urllib2 supports http,
ftp, and file. In python (2.6+) where the ssl module is available, urllib2
also supports https.
TODO: Determine whether this follows http redirects and decide if we like
that. For example, would we not want to allow redirection from ssl to
non-ssl urls?
<Arguments>
url:
URL string (e.g., 'http://...' or 'ftp://...' or 'file://...')
URL string (e.g., 'http://...' or 'ftp://...' or 'file://...')
<Exceptions>
None.
<Side Effects>
Opens a connection to a remote server.
<Returns>
File-like object.
"""
# urllib2.Request produces a Request object that allows for a finer control
# urllib2.Request produces a Request object that allows for a finer control
# of the requesting process. Request object allows to add headers or data to
# the HTTP request. For instance, request method add_header(key, val) can be
# used to change/spoof 'User-Agent' from default Python-urllib/x.y to
# used to change/spoof 'User-Agent' from default Python-urllib/x.y to
# 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if
# servers do not recognize connections that originates from
# servers do not recognize connections that originates from
# Python-urllib/x.y.
parsed_url = six.moves.urllib.parse.urlparse(url)
opener = _get_opener(scheme=parsed_url.scheme)
request = _get_request(url)
return opener.open(request, timeout = settings.SOCKET_TIMEOUT)
@ -488,18 +490,18 @@ def _get_content_length(connection):
"""
<Purpose>
A helper function that gets the purported file length from server.
<Arguments>
connection:
The object that the _open_connection function returns for communicating
with the server about the contents of a URL.
<Side Effects>
No known side effects.
<Exceptions>
Runtime exceptions will be suppressed but logged.
<Returns>
reported_length:
The total number of bytes reported by server. If the process fails, we
@ -509,19 +511,19 @@ def _get_content_length(connection):
try:
# What is the length of this document according to the HTTP spec?
reported_length = connection.info().get('Content-Length')
# Try casting it as a decimal number.
reported_length = int(reported_length, 10)
# Make sure that it is a nonnegative integer.
assert reported_length > -1
except:
message = \
'Could not get content length about ' + str(connection) + ' from server.'
logger.exception(message)
reported_length = None
finally:
return reported_length
@ -534,7 +536,7 @@ def _check_content_length(reported_length, required_length, strict_length=True):
<Purpose>
A helper function that checks whether the length reported by server is
equal to the length we expected.
<Arguments>
reported_length:
The total number of bytes reported by the server.
@ -548,30 +550,30 @@ def _check_content_length(reported_length, required_length, strict_length=True):
<Side Effects>
No known side effects.
<Exceptions>
No known exceptions.
<Returns>
None.
"""
logger.debug('The server reported a length of '+repr(reported_length)+' bytes.')
comparison_result = None
if reported_length < required_length:
comparison_result = 'less than'
comparison_result = 'less than'
elif reported_length > required_length:
comparison_result = 'greater than'
comparison_result = 'greater than'
else:
comparison_result = 'equal to'
comparison_result = 'equal to'
if strict_length:
logger.debug('The reported length is ' + comparison_result + ' the'
' required length of '+repr(required_length)+' bytes.')
else:
logger.debug('The reported length is ' + comparison_result + ' the upper'
' limit of ' + repr(required_length) + ' bytes.')
@ -586,8 +588,8 @@ def _check_downloaded_length(total_downloaded, required_length,
"""
<Purpose>
A helper function which checks whether the total number of downloaded bytes
matches our expectation.
matches our expectation.
<Arguments>
total_downloaded:
The total number of bytes supposedly downloaded for the file in question.
@ -606,11 +608,11 @@ def _check_downloaded_length(total_downloaded, required_length,
timestamp metadata, which has no signed required_length.
average_download_speed:
The average download speed for the downloaded file.
The average download speed for the downloaded file.
<Side Effects>
None.
<Exceptions>
tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if
STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal
@ -627,7 +629,7 @@ def _check_downloaded_length(total_downloaded, required_length,
if total_downloaded == required_length:
logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of the'
' expected ' + str(required_length) + ' bytes.')
else:
difference_in_bytes = abs(total_downloaded - required_length)
@ -637,21 +639,21 @@ def _check_downloaded_length(total_downloaded, required_length,
logger.error('Downloaded ' + str(total_downloaded) + ' bytes, but'
' expected ' + str(required_length) + ' bytes. There is a difference'
' of ' + str(difference_in_bytes) + ' bytes.')
# If the average download speed is below a certain threshold, we flag
# this as a possible slow-retrieval attack.
logger.debug('Average download speed: ' + repr(average_download_speed))
logger.debug('Minimum average download speed: ' + repr(settings.MIN_AVERAGE_DOWNLOAD_SPEED))
if average_download_speed < settings.MIN_AVERAGE_DOWNLOAD_SPEED:
raise tuf.ssl_commons.exceptions.SlowRetrievalError(average_download_speed)
else:
logger.debug('Good average download speed: ' +
repr(average_download_speed) + ' bytes per second')
raise tuf.ssl_commons.exceptions.DownloadLengthMismatchError(required_length, total_downloaded)
else:
# We specifically disabled strict checking of required length, but we
# will log a warning anyway. This is useful when we wish to download the
@ -663,7 +665,7 @@ def _check_downloaded_length(total_downloaded, required_length,
else:
logger.debug('Good average download speed: ' +
repr(average_download_speed) + ' bytes per second')
logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of an'
' upper limit of ' + str(required_length) + ' bytes.')