mirror of
https://github.com/theupdateframework/python-tuf
synced 2026-05-24 10:08:28 +00:00
353 lines
12 KiB
Python
Executable file
353 lines
12 KiB
Python
Executable file
"""
|
|
<Program Name>
|
|
download.py
|
|
|
|
<Started>
|
|
February 21, 2012. Based on previous version by Geremy Condra.
|
|
|
|
<Author>
|
|
Konstantin Andrianov
|
|
Vladimir Diaz <vladimir.v.diaz@gmail.com>
|
|
|
|
<Copyright>
|
|
See LICENSE for licensing information.
|
|
|
|
<Purpose>
|
|
Perform any file downloads and check their validity. This means that the
|
|
hash and length of a downloaded file has to match the hash and length
|
|
supplied by the metadata of that file. The downloaded file is technically a
|
|
file-like object that will automatically destroys itself once closed. Note
|
|
that the file-like object, 'tuf.util.TempFile', is returned by the
|
|
'download_url_to_tempfileobj()' function.
|
|
|
|
"""
|
|
|
|
import logging
|
|
import os.path
|
|
import socket
|
|
|
|
import tuf
|
|
import tuf.hash
|
|
import tuf.util
|
|
import tuf.formats
|
|
|
|
from tuf.compatibility import httplib, ssl, urllib2, urlparse
|
|
if ssl:
|
|
from tuf.compatibility import match_hostname
|
|
else:
|
|
raise tuf.Error( "No SSL support!" ) # TODO: degrade gracefully
|
|
|
|
|
|
# See 'log.py' to learn how logging is handled in TUF.
|
|
logger = logging.getLogger('tuf.download')
|
|
|
|
|
|
class VerifiedHTTPSConnection( httplib.HTTPSConnection ):
|
|
"""
|
|
A connection that wraps connections with ssl certificate verification.
|
|
|
|
https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L72
|
|
"""
|
|
def connect(self):
|
|
|
|
self.connection_kwargs = {}
|
|
|
|
#TODO: refactor compatibility logic into tuf.compatibility?
|
|
|
|
# for > py2.5
|
|
if hasattr(self, 'timeout'):
|
|
self.connection_kwargs.update(timeout = self.timeout)
|
|
|
|
# for >= py2.7
|
|
if hasattr(self, 'source_address'):
|
|
self.connection_kwargs.update(source_address = self.source_address)
|
|
|
|
sock = socket.create_connection((self.host, self.port), **self.connection_kwargs)
|
|
|
|
# for >= py2.7
|
|
if getattr(self, '_tunnel_host', None):
|
|
self.sock = sock
|
|
self._tunnel()
|
|
|
|
# set location of certificate authorities
|
|
assert os.path.isfile( tuf.conf.ssl_certificates )
|
|
cert_path = tuf.conf.ssl_certificates
|
|
|
|
# TODO: Disallow SSLv2.
|
|
# http://docs.python.org/dev/library/ssl.html#protocol-versions
|
|
# TODO: Select the right ciphers.
|
|
# http://docs.python.org/dev/library/ssl.html#cipher-selection
|
|
self.sock = ssl.wrap_socket(sock,
|
|
self.key_file,
|
|
self.cert_file,
|
|
cert_reqs=ssl.CERT_REQUIRED,
|
|
ca_certs=cert_path)
|
|
|
|
match_hostname(self.sock.getpeercert(), self.host)
|
|
|
|
|
|
class VerifiedHTTPSHandler( urllib2.HTTPSHandler ):
|
|
"""
|
|
A HTTPSHandler that uses our own VerifiedHTTPSConnection.
|
|
|
|
https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L109
|
|
"""
|
|
def __init__(self, connection_class = VerifiedHTTPSConnection):
|
|
self.specialized_conn_class = connection_class
|
|
urllib2.HTTPSHandler.__init__(self)
|
|
def https_open(self, req):
|
|
return self.do_open(self.specialized_conn_class, req)
|
|
|
|
|
|
def _get_request(url):
|
|
"""
|
|
Wraps the URL to retrieve to protects against "creative"
|
|
interpretation of the RFC: http://bugs.python.org/issue8732
|
|
|
|
https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L147
|
|
"""
|
|
|
|
return urllib2.Request(url, headers={'Accept-encoding': 'identity'})
|
|
|
|
|
|
def _get_opener( scheme = None ):
|
|
"""
|
|
Build a urllib2 opener based on whether the user now wants SSL.
|
|
|
|
https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L178
|
|
"""
|
|
|
|
if scheme == "https":
|
|
assert os.path.isfile( tuf.conf.ssl_certificates )
|
|
|
|
# If we are going over https, use an opener which will provide SSL
|
|
# certificate verification.
|
|
https_handler = VerifiedHTTPSHandler()
|
|
opener = urllib2.build_opener( https_handler )
|
|
|
|
# strip out HTTPHandler to prevent MITM spoof
|
|
for handler in opener.handlers:
|
|
if isinstance( handler, urllib2.HTTPHandler ):
|
|
opener.handlers.remove( handler )
|
|
else:
|
|
# Otherwise, use the default opener.
|
|
opener = urllib2.build_opener()
|
|
|
|
return opener
|
|
|
|
|
|
def _open_connection(url):
|
|
"""
|
|
<Purpose>
|
|
Helper function that opens a connection to the url. urllib2 supports http,
|
|
ftp, and file. In python (2.6+) where the ssl module is available, urllib2
|
|
also supports https.
|
|
|
|
TODO: Determine whether this follows http redirects and decide if we like
|
|
that. For example, would we not want to allow redirection from ssl to
|
|
non-ssl urls?
|
|
|
|
<Arguments>
|
|
url:
|
|
URL string (e.g., 'http://...' or 'ftp://...' or 'file://...')
|
|
|
|
<Exceptions>
|
|
tuf.DownloadError
|
|
|
|
<Side Effects>
|
|
Opens a connection to a remote server.
|
|
|
|
<Returns>
|
|
File-like object.
|
|
|
|
"""
|
|
|
|
try:
|
|
# urllib2.Request produces a Request object that allows for a finer control
|
|
# of the requesting process. Request object allows to add headers or data to
|
|
# the HTTP request. For instance, request method add_header(key, val) can be
|
|
# used to change/spoof 'User-Agent' from default Python-urllib/x.y to
|
|
# 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if
|
|
# servers do not recognize connections that originates from
|
|
# Python-urllib/x.y.
|
|
|
|
parsed_url = urlparse.urlparse( url )
|
|
opener = _get_opener( scheme = parsed_url.scheme )
|
|
request = _get_request( url )
|
|
return opener.open( request )
|
|
except Exception, e:
|
|
raise tuf.DownloadError(e)
|
|
|
|
|
|
|
|
|
|
|
|
def _check_hashes(input_file, trusted_hashes):
|
|
"""
|
|
<Purpose>
|
|
Helper function that verifies multiple secure hashes of the downloaded file.
|
|
If any of these fail it raises an exception. This is to conform with the
|
|
TUF specs, which support clients with different hashing algorithms. The
|
|
'hash.py' module is used to compute the hashes of the 'input_file'.
|
|
|
|
<Arguments>
|
|
input_file:
|
|
A file or file-like object.
|
|
|
|
trusted_hashes:
|
|
A dictionary with hash-algorithm names as keys and hashes as dict values.
|
|
The hashes should be in the hexdigest format.
|
|
|
|
<Exceptions>
|
|
tuf.BadHashError, if the hashes don't match.
|
|
|
|
<Side Effects>
|
|
Hash digest object is created using the 'tuf.hash' module.
|
|
|
|
<Returns>
|
|
None.
|
|
|
|
"""
|
|
# Verify each trusted hash of 'trusted_hashes'. Raise exception if
|
|
# any of the hashes are incorrect and return if all are correct.
|
|
for algorithm, trusted_hash in trusted_hashes.items():
|
|
digest_object = tuf.hash.digest(algorithm)
|
|
digest_object.update(input_file.read())
|
|
computed_hash = digest_object.hexdigest()
|
|
if trusted_hash != computed_hash:
|
|
msg = 'Hashes do not match. Expected '+trusted_hash+' got '+computed_hash
|
|
raise tuf.BadHashError(msg)
|
|
else:
|
|
logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def download_url_to_tempfileobj(url, required_hashes=None, required_length=None):
|
|
"""
|
|
<Purpose>
|
|
Given the url, hashes and length of the desired file, this function
|
|
opens a connection to 'url' and downloads the file while ensuring its
|
|
length and hashes match 'required_hashes' and 'required_length'.
|
|
|
|
tuf.util.TempFile is used instead of regular tempfile object because of
|
|
additional functionality provided by 'tuf.util.TempFile'.
|
|
|
|
<Arguments>
|
|
url:
|
|
A url string that represents the location of the file.
|
|
|
|
required_hashes:
|
|
A dictionary, where the keys represent the hashing algorithm used to
|
|
hash the file and the dict values the hexdigest.
|
|
|
|
For instance, a hash pair might look something like this:
|
|
{'md5': '37544f383be1fc1a32f42801c9c4b4d6'}
|
|
|
|
required_length:
|
|
An integer value representing the length of the file.
|
|
|
|
<Side Effects>
|
|
'tuf.util.TempFile' object is created.
|
|
|
|
<Exceptions>
|
|
tuf.DownloadError, if there was an error while downloading the file.
|
|
|
|
tuf.FormatError, if any of the arguments are improperly formatted.
|
|
|
|
<Returns>
|
|
'tuf.util.TempFile' instance.
|
|
|
|
"""
|
|
|
|
# Do all of the arguments have the appropriate format?
|
|
# Raise 'tuf.FormatError' if there is a mismatch.
|
|
tuf.formats.URL_SCHEMA.check_match(url)
|
|
if required_hashes is not None:
|
|
tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes)
|
|
if required_length is not None:
|
|
tuf.formats.LENGTH_SCHEMA.check_match(required_length)
|
|
|
|
# 'url.replace()' is for compatibility with Windows-based systems because they
|
|
# might put back-slashes in place of forward-slashes. This converts it to the
|
|
# common format.
|
|
url = url.replace('\\','/')
|
|
logger.info('Downloading: '+url)
|
|
connection = _open_connection(url)
|
|
temp_file = tuf.util.TempFile()
|
|
|
|
# Keep track of total bytes downloaded.
|
|
total_downloaded = 0
|
|
|
|
try:
|
|
# info().get('Content-Length') gets the length of the url file.
|
|
file_length = connection.info().get('Content-Length')
|
|
|
|
# If the HTTP server did not specify a Content-Length...
|
|
if file_length is None:
|
|
# Do we know what is the required_length for this file?
|
|
if required_length is None:
|
|
# No, we do not know this. Raise this to the user!
|
|
message = 'Do not know anything about how much to download for "' + url + '"!'
|
|
raise tuf.DownloadError(message)
|
|
else:
|
|
# Okay, the HTTP server has not told us the Content-Length,
|
|
# but we know how much we are required to download.
|
|
file_length = required_length
|
|
else:
|
|
# Do we know what is the required_length for this file?
|
|
if required_length is None:
|
|
# No, we do not know this. Avoid falling for an arbitrary-length data attack (#26).
|
|
message = 'Do not know how much is required to download for "' + url + '"!'
|
|
logger.debug(message)
|
|
file_length = int(file_length, 10)
|
|
else:
|
|
# Okay, we do know this. Go ahead with checks.
|
|
file_length = int(file_length, 10)
|
|
|
|
# Does the url's 'file_length' match 'required_length'?
|
|
if required_length is not None and file_length != required_length:
|
|
message = 'Incorrect length for '+url+'. Expected '+str(required_length)+ \
|
|
', got '+str(file_length)+' bytes.'
|
|
raise tuf.DownloadError(message)
|
|
|
|
# While-block reads data from connection 8192-bytes at a time, or less,
|
|
# until 'file_length' is reached.
|
|
while True:
|
|
data = connection.read(min(8192, file_length - total_downloaded))
|
|
# We might have no more data to read. Let us check bytes downloaded.
|
|
if not data:
|
|
message = 'Downloaded '+str(total_downloaded)+'/' \
|
|
+str(file_length)+' bytes.'
|
|
logger.debug(message)
|
|
# Did we download the correct amount indicated by 'Content-Length'?
|
|
if total_downloaded != file_length:
|
|
message = 'Downloaded '+str(total_downloaded)+'. Expected '+ \
|
|
str(file_length)+' for '+url
|
|
raise tuf.DownloadError(message)
|
|
# Did we download the correct amount indicated by the user?
|
|
if required_length is not None and total_downloaded != required_length:
|
|
message = 'The user-required length of '+str(required_length)+ \
|
|
'did not match the '+str(len(total_downloaded))+' downloaded'
|
|
raise tuf.DownloadError(message)
|
|
break
|
|
# Data successfully read from the connection. Store it.
|
|
temp_file.write(data)
|
|
total_downloaded = total_downloaded + len(data)
|
|
|
|
# We appear to have downloaded the correct amount. Check the hashes.
|
|
connection.close()
|
|
if required_length is not None and required_hashes is not None:
|
|
_check_hashes(temp_file, required_hashes)
|
|
|
|
# Exception is a base class for all non-exiting exceptions.
|
|
except Exception, e:
|
|
# Closing 'temp_file'. The 'temp_file' data is destroyed.
|
|
temp_file.close_temp_file()
|
|
logger.error(str(e))
|
|
raise tuf.DownloadError(e)
|
|
|
|
return temp_file
|