diff --git a/tuf/__init__.py b/tuf/__init__.py index b8d34217..c3933ca6 100755 --- a/tuf/__init__.py +++ b/tuf/__init__.py @@ -26,6 +26,9 @@ __all__ = ['formats'] + + + class Error(Exception): """Indicate a generic error.""" pass @@ -50,6 +53,14 @@ class FormatError(Error): +class InvalidMetadataJSONError(FormatError): + """Indicate that some metadata file is not valid JSON.""" + pass + + + + + class UnsupportedAlgorithmError(Error): """Indicate an error while trying to identify a user-specified algorithm.""" pass @@ -90,6 +101,22 @@ class RepositoryError(Error): +class ForbiddenTargetError(RepositoryError): + """Indicate that a role signed for a target that it was not delegated to.""" + pass + + + + + +class ReplayError(RepositoryError): + """Indicate that some metadata has been replayed to the client.""" + pass + + + + + class ExpiredMetadataError(Error): """Indicate that a TUF Metadata file has expired.""" pass @@ -114,8 +141,8 @@ class CryptoError(Error): -class UnsupportedLibraryError(Error): - """Indicate that a supported library could not be located or imported.""" +class BadSignatureError(CryptoError): + """Indicate that some metadata file had a bad signature.""" pass @@ -130,6 +157,22 @@ class UnknownMethodError(CryptoError): +class UnsupportedLibraryError(Error): + """Indicate that a supported library could not be located or imported.""" + pass + + + + + +class DecompressionError(Error): + """Indicate that some error happened while decompressing a file.""" + pass + + + + + class DownloadError(Error): """Indicate an error occurred while attempting to download a file.""" pass @@ -138,6 +181,24 @@ class DownloadError(Error): +class DownloadLengthMismatchError(DownloadError): + """Indicate that a mismatch of lengths was seen while downloading a file.""" + pass + + + + + +class SlowRetrievalError(DownloadError): + """"Indicate that downloading a file took an unreasonably long time.""" + + def __init__(self, number_of_slow_chunks): + self.number_of_slow_chunks = number_of_slow_chunks + + + + + class KeyAlreadyExistsError(Error): """Indicate that a key already exists and cannot be added.""" pass @@ -165,3 +226,22 @@ class UnknownRoleError(Error): class InvalidNameError(Error): """Indicate an error while trying to validate any type of named object""" pass + + + + +class UpdateError(Error): + """An updater will throw this exception in case it could not download a + metadata or target file. + + A dictionary of Exception instances indexed by every mirror URL will also be + provided.""" + + def __init__(self, mirror_errors): + # Dictionary of URL strings to Exception instances + self.mirror_errors = mirror_errors + + + + + diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 5f751c5c..d84659b0 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -354,10 +354,6 @@ def _load_metadata_from_file(self, metadata_set, metadata_role): not end in '.txt'. Examples: 'root', 'targets', 'targets/linux/x86'. - tuf.RepositoryError: - If the metadata could not be loaded or the extracted data is not a - valid metadata object. - tuf.FormatError: If role information belonging to a delegated role of 'metadata_role' is improperly formatted. @@ -392,11 +388,7 @@ def _load_metadata_from_file(self, metadata_set, metadata_role): # 'tuf.formats.SIGNABLE_SCHEMA'. metadata_signable = tuf.util.load_json_file(metadata_filepath) - # Ensure the loaded json object is properly formatted. - try: - tuf.formats.check_signable_object_format(metadata_signable) - except tuf.FormatError, e: - raise tuf.RepositoryError('Invalid format: '+repr(metadata_filepath)+'.') + tuf.formats.check_signable_object_format(metadata_signable) # Extract the 'signed' role object from 'metadata_signable'. metadata_object = metadata_signable['signed'] @@ -552,7 +544,7 @@ def refresh(self): None. - tuf.RepositoryError: + tuf.UpdateError: If the metadata for any of the top-level roles cannot be updated. tuf.ExpiredMetadataError: @@ -566,12 +558,23 @@ def refresh(self): None. """ - + + # The timestamp role does not have signed metadata about it; otherwise we + # would need an infinite regress of metadata. Therefore, we use some + # default, sane metadata about it. + DEFAULT_TIMESTAMP_FILEINFO = { + 'hashes':None, + 'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH + } + # Update the top-level metadata. The _update_metadata_if_changed() and # _update_metadata() calls below do NOT perform an update if there # is insufficient trusted signatures for the specified metadata. - # Raise 'tuf.RepositoryError' if an update fails. - self._update_metadata('timestamp') + # Raise 'tuf.UpdateError' if an update fails. + + # Use default but sane information for timestamp metadata, and do not + # require strict checks on its required length. + self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) self._update_metadata_if_changed('release', referenced_metadata='timestamp') @@ -589,7 +592,160 @@ def refresh(self): - def _update_metadata(self, metadata_role, fileinfo=None, compression=None): + def __check_hashes(self, input_file, trusted_hashes): + """ + + A helper function that verifies multiple secure hashes of the downloaded + file. If any of these fail it raises an exception. This is to conform + with the TUF specs, which support clients with different hashing + algorithms. The 'hash.py' module is used to compute the hashes of the + 'input_file'. + + + input_file: + A file-like object. + + trusted_hashes: + A dictionary with hash-algorithm names as keys and hashes as dict values. + The hashes should be in the hexdigest format. + + + tuf.BadHashError, if the hashes don't match. + + + Hash digest object is created using the 'tuf.hash' module. + + + None. + + """ + + # Verify each trusted hash of 'trusted_hashes'. Raise exception if + # any of the hashes are incorrect and return if all are correct. + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = tuf.hash.digest(algorithm) + digest_object.update(input_file.read()) + computed_hash = digest_object.hexdigest() + if trusted_hash != computed_hash: + raise tuf.BadHashError('Hashes do not match! Expected '+ + trusted_hash+' got '+computed_hash) + else: + logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash) + + + + + + def get_target_file(self, target_filepath, file_length, file_hashes): + + def verify_target_file(target_file_object): + self.__check_hashes(target_file_object, file_hashes) + + return self.__get_file(target_filepath, verify_target_file, 'target', + file_length, download_safely=True, compression=None) + + + + + + def __verify_metadata_file(self, metadata_file_object, metadata_role): + # Read and load the downloaded file. + try: + metadata_signable = \ + tuf.util.load_json_string(metadata_file_object.read()) + except: + logger.exception('Invalid metadata from '+mirror_url+'.') + raise + else: + # Verify the signature on the downloaded metadata object. + try: + valid = tuf.sig.verify(metadata_signable, metadata_role) + except: + message = 'Unable to verify '+metadata_filename + logger.exception(message) + raise + else: + if not valid: + raise tuf.BadSignatureError() + + + + + + def unsafely_get_metadata_file(self, metadata_role, metadata_filepath, + file_length): + + def unsafely_verify_metadata_file(metadata_file_object): + self.__verify_metadata_file(metadata_file_object, metadata_role) + + return self.__get_file(metadata_filepath, unsafely_verify_metadata_file, + 'meta', file_length, download_safely=False, + compression=None) + + + + + + def safely_get_metadata_file(self, metadata_role, metadata_filepath, + file_length, file_hashes, compression): + + def safely_verify_metadata_file(metadata_file_object): + self.__check_hashes(metadata_file_object, file_hashes) + self.__verify_metadata_file(metadata_file_object, metadata_role) + + return self.__get_file(metadata_filepath, safely_verify_metadata_file, + 'meta', file_length, download_safely=True, + compression=compression) + + + + + + def __get_file(self, filepath, verify_file, reference_metadata, + trusted_length, download_safely, compression): + file_mirrors = tuf.mirrors.get_list_of_mirrors(reference_metadata, + filepath, self.mirrors) + # file_mirror (URL): error (Exception) + file_mirror_errors = {} + file_object = None + + for file_mirror in file_mirrors: + try: + if download_safely: + file_object = tuf.download.safe_download(file_mirror, trusted_length) + else: + file_object = tuf.download.unsafe_download(file_mirror, + trusted_length) + + if compression: + file_object.decompress_temp_file_object(compression) + + except Exception, e: + # Remember the error from this mirror, and "reset" the target file. + logger.exception('Download failed from '+file_mirror+'.') + file_mirror_errors[file_mirror] = e + file_object = None + else: + try: + verify_file(file_object) + except Exception, e: + file_mirror_errors[file_mirror] = e + file_object = None + else: + break + + if file_object: + return file_object + else: + logger.exception('Failed to download {0}: {1}'.format(filepath, + file_mirror_errors)) + raise tuf.UpdateError(file_mirror_errors) + + + + + + def _update_metadata(self, metadata_role, fileinfo, compression=None): """ Download, verify, and 'install' the metadata belonging to 'metadata_role'. @@ -608,6 +764,13 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): Ex: {"hashes": {"sha256": "3a5a6ec1f353...dedce36e0"}, "length": 1340} + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of the required length in 'fileinfo'. True by default. True + by default. We explicitly set this to False when we know that we want + to turn this off for downloading the timestamp metadata, which has no + signed required_length. + compression: A string designating the compression type of 'metadata_role'. The 'release' metadata file may be optionally downloaded and stored in @@ -615,7 +778,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): are considered. Any other string is ignored. - tuf.RepositoryError: + tuf.UpdateError: The metadata could not be updated. This is not specific to a single failure but rather indicates that all possible ways to update the metadata have been tried and failed. @@ -639,20 +802,13 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): if compression == 'gzip': metadata_filename = metadata_filename + '.gz' - # Reference to the 'get_list_of_mirrors' function. - get_mirrors = tuf.mirrors.get_list_of_mirrors - - # Reference to the 'download_url_to_tempfileobj' function. - download_file = tuf.download.download_url_to_tempfileobj - # Extract file length and file hashes. They will be passed as arguments # to 'download_file' function. - if fileinfo is not None: - file_length=fileinfo['length'] - file_hashes=fileinfo['hashes'] - else: - file_length=None - file_hashes=None + file_length = fileinfo['length'] + file_hashes = fileinfo['hashes'] + + # A dictionary to keep the error from every mirror that we try. + mirror_errors = {} # Attempt a file download from each mirror until the file is downloaded and # verified. If the signature of the downloaded file is valid, proceed, @@ -661,51 +817,18 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): # is the object extracted from 'metadata_file_object'. Metadata saved to # files are regarded as 'signable' objects, conformant to # 'tuf.formats.SIGNABLE_SCHEMA'. - metadata_file_object = None - metadata_signable = None - compressed_file_object = None - for mirror_url in get_mirrors('meta', metadata_filename.encode("utf-8"), self.mirrors): - try: - metadata_file_object = download_file(mirror_url, file_hashes, - file_length) - except tuf.DownloadError, e: - logger.warn('Download failed from '+mirror_url+'.') - continue - if compression: - compressed_file_object = tuf.util.TempFile() - shutil.copyfileobj(metadata_file_object, compressed_file_object) - metadata_file_object.decompress_temp_file_object(compression) + if metadata_role == 'timestamp': + metadata_file_object = \ + self.unsafely_get_metadata_file(metadata_role, metadata_filename, + file_length) + else: + metadata_file_object = \ + self.safely_get_metadata_file(metadata_role, metadata_filename, + file_length, file_hashes, + compression=compression) - # Read and load the downloaded file. - metadata_signable = tuf.util.load_json_string(metadata_file_object.read()) - - # Verify the signature on the downloaded metadata object. - try: - valid = tuf.sig.verify(metadata_signable, metadata_role) - except (tuf.UnknownRoleError, tuf.FormatError, tuf.Error), e: - # FIXME: Exception.message is deprecated in 2.6, and gone in 3.0, - # but this is a workaround for Unicode messages. We need a long-term - # solution with #61. - # http://bugs.python.org/issue2517 - message = 'Unable to verify '+metadata_filename+':'+e.message.encode("utf-8") - logger.exception(message) - metadata_signable = None - continue - else: - if valid: - logger.debug('Good signature on '+mirror_url+'.') - break - else: - logger.warn('Bad signature on '+mirror_url+'.') - metadata_signable = None - continue - - # Raise an exception if a valid metadata signable could not be downloaded - # from any of the mirrors. - if metadata_signable is None: - message = 'Unable to update '+repr(metadata_filename)+'.' - logger.error(message) - raise tuf.RepositoryError(message) + # Read and load the downloaded file. + metadata_signable = tuf.util.load_json_string(metadata_file_object.read()) # Ensure the loaded 'metadata_signable' is properly formatted. try: @@ -724,7 +847,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): current_version = current_metadata_role['version'] downloaded_version = metadata_signable['signed']['version'] if downloaded_version < current_version: - message = repr(mirror_url)+' is older than the version currently '+\ + message = str(current_metadata_role)+' is older than the version currently '+\ 'installed.\nDownloaded version: '+repr(downloaded_version)+'\n'+\ 'Current version: '+repr(current_version) raise tuf.RepositoryError(message) @@ -839,9 +962,11 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas # Ensure the referenced metadata has been loaded. The 'root' role may be # updated without having 'release' available. if referenced_metadata not in self.metadata['current']: - message = 'Cannot update '+repr(metadata_role)+' because '+\ - repr(referenced_metadata)+' is missing.' + message = 'Cannot update '+repr(metadata_role)+' because ' \ + +referenced_metadata+' is missing.' raise tuf.RepositoryError(message) + # The referenced metadata has been loaded. Extract the new + # fileinfo for 'metadata_role' from it. else: message = repr(metadata_role)+' referenced in '+\ repr(referenced_metadata)+'. '+repr(metadata_role)+' may be updated.' @@ -888,7 +1013,7 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas try: self._update_metadata(metadata_role, fileinfo=new_fileinfo, compression=compression) - except tuf.RepositoryError, e: + except: # The current metadata we have is not current but we couldn't # get new metadata. We shouldn't use the old metadata anymore. # This will get rid of in-memory knowledge of the role and @@ -898,8 +1023,8 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas # We shouldn't need to, but we need to check the trust # implications of the current implementation. self._delete_metadata(metadata_role) - message = 'Metadata for '+repr(metadata_role)+' could not be updated: ' - raise tuf.MetadataNotAvailableError(message+str(e)) + logger.error('Metadata for '+str(metadata_role)+' could not be updated') + raise else: # We need to remove delegated roles because the delegated roles # may not be trusted anymore. @@ -1130,11 +1255,6 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo): if self.fileinfo.get(metadata_filename) is None: return True - # 'new_fileinfo' should only be 'None' if updating 'root.txt' - # without having 'release.txt'. - if new_fileinfo is None: - return True - current_fileinfo = self.fileinfo[metadata_filename] if current_fileinfo['length'] != new_fileinfo['length']: @@ -2121,7 +2241,7 @@ def download_target(self, target, destination_directory): tuf.FormatError: If 'target' is not properly formatted. - tuf.DownloadError: + tuf.UpdateError: If a target could not be downloaded from any of the mirrors. @@ -2139,33 +2259,16 @@ def download_target(self, target, destination_directory): # Raise 'tuf.FormatError' if the check fail. tuf.formats.TARGETFILE_SCHEMA.check_match(target) tuf.formats.PATH_SCHEMA.check_match(destination_directory) - - # Reference to the 'get_list_of_mirrors' function. - get_mirrors = tuf.mirrors.get_list_of_mirrors - - # Reference to the 'download_url_to_tempfileobj' function. - download_file = tuf.download.download_url_to_tempfileobj # Extract the target file information. target_filepath = target['filepath'] trusted_length = target['fileinfo']['length'] trusted_hashes = target['fileinfo']['hashes'] - target_file_object = None - # Iterate through the repositority mirrors until we successfully - # download a target. - for mirror_url in get_mirrors('target', target_filepath, self.mirrors): - try: - target_file_object = download_file(mirror_url, trusted_hashes, - trusted_length) - break - except (tuf.DownloadError, tuf.FormatError), e: - logger.warn('Download failed from '+mirror_url+'.') - target_file_object = None - continue - # We have gone through all the mirrors. Did we get a target file object? - if target_file_object == None: - raise tuf.DownloadError('No download locations known.') + # get_target_file checks every mirror and returns the first target + # that passes verification. + target_file_object = self.get_target_file(target_filepath, trusted_length, + trusted_hashes) # We acquired a target file object from a mirror. Move the file into # place (i.e., locally to 'destination_directory'). diff --git a/tuf/conf.py b/tuf/conf.py index 0b897afb..281409b3 100755 --- a/tuf/conf.py +++ b/tuf/conf.py @@ -29,10 +29,25 @@ # not be deleted. At a minimum, each key in the mirrors dictionary # below should have a directory under 'repository_directory' # which already exists and within that directory should have the file -# 'metadata/current/root.txt'. This must be set! +# 'metadata/current/root.txt'. This MUST be set. repository_directory = None # A PEM (RFC 1422) file where you may find SSL certificate authorities # https://en.wikipedia.org/wiki/Certificate_authority # http://docs.python.org/2/library/ssl.html#certificates ssl_certificates = None + +# Since the timestamp role does not have signed metadata about itself, we set a +# default but sane upper bound for the number of bytes required to download it. +DEFAULT_TIMESTAMP_REQUIRED_LENGTH = 2048 + +# Set a timeout value in seconds (float) for non-blocking socket operations. +SOCKET_TIMEOUT = 1 + +# The maximum chunk of data, in bytes, we would download in every round. +CHUNK_SIZE = 8192 + +# The maximum number of slowly-retrieved chunks that we would tolerate. +MAX_NUM_OF_SLOW_CHUNKS = 5 + + diff --git a/tuf/download.py b/tuf/download.py index 162c0b7e..0aec7e2f 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -18,122 +18,306 @@ supplied by the metadata of that file. The downloaded file is technically a file-like object that will automatically destroys itself once closed. Note that the file-like object, 'tuf.util.TempFile', is returned by the - 'download_url_to_tempfileobj()' function. + '_download_file()' function. """ +import httplib import logging import os.path import socket import tuf +import tuf.conf import tuf.hash import tuf.util import tuf.formats from tuf.compatibility import httplib, ssl, urllib2, urlparse + if ssl: from tuf.compatibility import match_hostname else: - raise tuf.Error( "No SSL support!" ) # TODO: degrade gracefully + raise tuf.Error("No SSL support!") # TODO: degrade gracefully + +# We will be overriding socket._fileobject to perform non-blocking socket +# reads. Therefore, we will need these global variables. +# http://hg.python.org/cpython/file/5be3fa83d436/Lib/socket.py#l84 + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +try: + import errno +except ImportError: + errno = None +EINTR = getattr(errno, 'EINTR', 4) # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger('tuf.download') -class VerifiedHTTPSConnection( httplib.HTTPSConnection ): + + + +class SaferSocketFileObject(socket._fileobject): + """We override socket._fileobject to produce a file-like object which reads + from a socket more safely than its ancestor. One the safety properties is + that reading from a socket must be a non-blocking operation.""" + + def __init__(self, sock, mode='rb', bufsize=-1, close=False): + super(SaferSocketFileObject, self).__init__(sock, mode=mode, + bufsize=bufsize, close=close) + + # Count the number of slowly-retrieved chunks. + self.__number_of_slow_chunks = 0 + + + + + + # TODO: Better protection against slow-retrieval attacks. For example, we do + # not take into consideration that a sufficiently large file might take an + # intolerably long time with our present methods. We should be able to better + # protect ourselves with more careful state-keeping (such as measuring time). + def read(self, size): """ - A connection that wraps connections with ssl certificate verification. + + We override the ancestor read (socket._fileobject.read) operation to be a + non-blocking operation. + + Original code is at: + http://hg.python.org/cpython/file/5be3fa83d436/Lib/socket.py#l336 + + + size: + The length of the data chunk that we would like to download. We assume + that the size of the expected data chunk is accurate; otherwise, we are + liable to miscount the number of truly slowly-retrieved chunks. + + + tuf.SlowRetrievalError, in case we detect a slow-retrieval attack. + + Any other exception thrown by socket._fileobject.read. + + + None. + + + Received data up to 'size' bytes. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L72 """ - def connect(self): - self.connection_kwargs = {} + # We should never try to specify a negative size. + assert size >= 0 - #TODO: refactor compatibility logic into tuf.compatibility? + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end - # for > py2.5 - if hasattr(self, 'timeout'): - self.connection_kwargs.update(timeout = self.timeout) + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv - # for >= py2.7 - if hasattr(self, 'source_address'): - self.connection_kwargs.update(source_address = self.source_address) + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while self.__number_of_slow_chunks < tuf.conf.MAX_NUM_OF_SLOW_CHUNKS: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except socket.timeout: + # Since the socket recv operation timed out, we increment the running + # counter of slow chunks and try again. + self.__number_of_slow_chunks += 1 + logger.warn('slow chunk {0}'.format(self.__number_of_slow_chunks)) + continue + except socket.error, e: + if e.args[0] == EINTR: + continue + raise + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + # Since n < left with timeout on self._sock.recv, this is a slow chunk. + # We assume that 'size' is accurate w.r.t. to the overall file length; + # otherwise, we will miscount the number of truly slow chunks. + self.__number_of_slow_chunks += 1 + logger.warn('slow chunk {0}: {1} <= {2}'.format(self.__number_of_slow_chunks, n, left)) + else: + # Since we saw more than a tolerable number of slow chunks, we flag this + # as a possible slow-retrieval attack. This threshold will determine our + # bias: if it is too slow, we will have more false negatives; if it is + # too high, we will have more false positives. + logger.warn('slow chunks: {0}'.format(self.__number_of_slow_chunks)) + raise tuf.SlowRetrievalError(self.__number_of_slow_chunks) + return buf.getvalue() - sock = socket.create_connection((self.host, self.port), **self.connection_kwargs) - # for >= py2.7 - if getattr(self, '_tunnel_host', None): - self.sock = sock - self._tunnel() - # set location of certificate authorities - assert os.path.isfile( tuf.conf.ssl_certificates ) - cert_path = tuf.conf.ssl_certificates - # TODO: Disallow SSLv2. - # http://docs.python.org/dev/library/ssl.html#protocol-versions - # TODO: Select the right ciphers. - # http://docs.python.org/dev/library/ssl.html#cipher-selection - self.sock = ssl.wrap_socket(sock, - self.key_file, - self.cert_file, + +class SaferHTTPResponse(httplib.HTTPResponse): + """A safer version of httplib.HTTPResponse, in which we only use safe socket + file-like objects.""" + + def __init__(self, sock, debuglevel=0, strict=0, method=None, + buffering=False): + httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel, + strict=strict, method=method, + buffering=buffering) + + # Delete the previous socket file-like object... + del self.fp + # ...and replace it with our safer version. + if buffering: + self.fp = SaferSocketFileObject(sock._sock, 'rb') + else: + self.fp = SaferSocketFileObject(sock._sock, 'rb', 0) + + + + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): + """ + A connection that wraps connections with ssl certificate verification. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L72 + """ + + def connect(self): + + self.connection_kwargs = {} + + #TODO: refactor compatibility logic into tuf.compatibility? + + # for > py2.5 + if hasattr(self, 'timeout'): + self.connection_kwargs.update(timeout = self.timeout) + + # for >= py2.7 + if hasattr(self, 'source_address'): + self.connection_kwargs.update(source_address = self.source_address) + + sock = socket.create_connection((self.host, self.port), **self.connection_kwargs) + + # for >= py2.7 + if getattr(self, '_tunnel_host', None): + self.sock = sock + self._tunnel() + + # set location of certificate authorities + assert os.path.isfile( tuf.conf.ssl_certificates ) + cert_path = tuf.conf.ssl_certificates + + # TODO: Disallow SSLv2. + # http://docs.python.org/dev/library/ssl.html#protocol-versions + # TODO: Select the right ciphers. + # http://docs.python.org/dev/library/ssl.html#cipher-selection + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, cert_reqs=ssl.CERT_REQUIRED, ca_certs=cert_path) - match_hostname(self.sock.getpeercert(), self.host) + match_hostname(self.sock.getpeercert(), self.host) + + + + + +class VerifiedHTTPSHandler(urllib2.HTTPSHandler): + """ + A HTTPSHandler that uses our own VerifiedHTTPSConnection. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L109 + """ + + def __init__(self, connection_class = VerifiedHTTPSConnection): + self.specialized_conn_class = connection_class + urllib2.HTTPSHandler.__init__(self) + + def https_open(self, req): + return self.do_open(self.specialized_conn_class, req) -class VerifiedHTTPSHandler( urllib2.HTTPSHandler ): - """ - A HTTPSHandler that uses our own VerifiedHTTPSConnection. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L109 - """ - def __init__(self, connection_class = VerifiedHTTPSConnection): - self.specialized_conn_class = connection_class - urllib2.HTTPSHandler.__init__(self) - def https_open(self, req): - return self.do_open(self.specialized_conn_class, req) def _get_request(url): - """ - Wraps the URL to retrieve to protects against "creative" - interpretation of the RFC: http://bugs.python.org/issue8732 + """ + Wraps the URL to retrieve to protects against "creative" + interpretation of the RFC: http://bugs.python.org/issue8732 - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L147 - """ + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L147 + """ - return urllib2.Request(url, headers={'Accept-encoding': 'identity'}) + return urllib2.Request(url, headers={'Accept-encoding': 'identity'}) -def _get_opener( scheme = None ): - """ - Build a urllib2 opener based on whether the user now wants SSL. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L178 - """ - if scheme == "https": - assert os.path.isfile( tuf.conf.ssl_certificates ) - # If we are going over https, use an opener which will provide SSL - # certificate verification. - https_handler = VerifiedHTTPSHandler() - opener = urllib2.build_opener( https_handler ) +def _get_opener(scheme=None): + """ + Build a urllib2 opener based on whether the user now wants SSL. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L178 + """ + + if scheme == "https": + assert os.path.isfile(tuf.conf.ssl_certificates) + + # If we are going over https, use an opener which will provide SSL + # certificate verification. + https_handler = VerifiedHTTPSHandler() + opener = urllib2.build_opener(https_handler) + + # strip out HTTPHandler to prevent MITM spoof + for handler in opener.handlers: + if isinstance(handler, urllib2.HTTPHandler): + opener.handlers.remove(handler) + else: + # Otherwise, use the default opener. + opener = urllib2.build_opener() + + return opener + - # strip out HTTPHandler to prevent MITM spoof - for handler in opener.handlers: - if isinstance( handler, urllib2.HTTPHandler ): - opener.handlers.remove( handler ) - else: - # Otherwise, use the default opener. - opener = urllib2.build_opener() - return opener def _open_connection(url): @@ -152,7 +336,7 @@ def _open_connection(url): URL string (e.g., 'http://...' or 'ftp://...' or 'file://...') - tuf.DownloadError + None. Opens a connection to a remote server. @@ -161,78 +345,30 @@ def _open_connection(url): File-like object. """ - - try: - # urllib2.Request produces a Request object that allows for a finer control - # of the requesting process. Request object allows to add headers or data to - # the HTTP request. For instance, request method add_header(key, val) can be - # used to change/spoof 'User-Agent' from default Python-urllib/x.y to - # 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if - # servers do not recognize connections that originates from - # Python-urllib/x.y. - parsed_url = urlparse.urlparse( url ) - opener = _get_opener( scheme = parsed_url.scheme ) - request = _get_request( url ) - return opener.open( request ) - except Exception, e: - raise tuf.DownloadError(e) + # urllib2.Request produces a Request object that allows for a finer control + # of the requesting process. Request object allows to add headers or data to + # the HTTP request. For instance, request method add_header(key, val) can be + # used to change/spoof 'User-Agent' from default Python-urllib/x.y to + # 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if + # servers do not recognize connections that originates from + # Python-urllib/x.y. + + parsed_url = urlparse.urlparse(url) + opener = _get_opener(scheme=parsed_url.scheme) + request = _get_request(url) + return opener.open(request) -def _check_hashes(input_file, trusted_hashes): - """ - - Helper function that verifies multiple secure hashes of the downloaded file. - If any of these fail it raises an exception. This is to conform with the - TUF specs, which support clients with different hashing algorithms. The - 'hash.py' module is used to compute the hashes of the 'input_file'. - - - input_file: - A file or file-like object. - - trusted_hashes: - A dictionary with hash-algorithm names as keys and hashes as dict values. - The hashes should be in the hexdigest format. - - - tuf.BadHashError, if the hashes don't match. - - - Hash digest object is created using the 'tuf.hash' module. - - - None. - - """ - # Verify each trusted hash of 'trusted_hashes'. Raise exception if - # any of the hashes are incorrect and return if all are correct. - for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = tuf.hash.digest(algorithm) - digest_object.update(input_file.read()) - computed_hash = digest_object.hexdigest() - if trusted_hash != computed_hash: - msg = 'Hashes do not match. Expected '+trusted_hash+' got '+computed_hash - raise tuf.BadHashError(msg) - else: - logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash) - - return - - - - - -def _download_fixed_amount_of_data(connection, temp_file, file_length, - required_length): +def _download_fixed_amount_of_data(connection, temp_file, required_length): """ This is a helper function, where the download really happens. While-block reads data from connection a fixed chunk of data at a time, or less, until - 'file_length' is reached. + 'required_length' is reached. connection: @@ -243,9 +379,6 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, A temporary file where the contents at the URL specified by the 'connection' object will be stored. - file_length: - The number of bytes that the server claims is the size of the file. - required_length: The number of bytes that we must download for the file. This is almost always specified by the TUF metadata for the data file in question @@ -265,9 +398,6 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, """ - # The maximum chunk of data, in bytes, we would download in every round. - BLOCK_SIZE = 8192 - # Keep track of total bytes downloaded. total_downloaded = 0 @@ -276,22 +406,17 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, # We download a fixed chunk of data in every round. This is so that we # can defend against slow retrieval attacks. Furthermore, we do not wish # to download an extremely large file in one shot. - data = connection.read(min(BLOCK_SIZE, file_length-total_downloaded)) + amount_to_read = min(tuf.conf.CHUNK_SIZE, + required_length-total_downloaded) + logger.debug('Reading next chunk...') + data = connection.read(amount_to_read) # We might have no more data to read. Check number of bytes downloaded. if not data: message = 'Downloaded '+str(total_downloaded)+'/'+ \ - str(file_length)+' bytes.' + str(required_length)+' bytes.' logger.debug(message) - # Did we download the correct amount indicated by 'Content-Length' - # or user? Because file_length is always eaqual to required_length - # we just need check one of them. - if total_downloaded != file_length: - message = 'Downloaded '+str(total_downloaded)+'. Expected '+ \ - str(file_length)+' for '+url - raise tuf.DownloadError(message) - # Finally, we signal that the download is complete. break @@ -303,14 +428,169 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, else: return total_downloaded finally: + # Whatever happens, make sure that we always close the connection. connection.close() -def download_url_to_tempfileobj(url, required_hashes=None, - required_length=None): +def _get_content_length(connection): + """ + + A helper function that gets the purported file length from server. + + + connection: + The object that the _open_connection function returns for communicating + with the server about the contents of a URL. + + + No known side effects. + + + Runtime exceptions will be suppressed but logged. + + + reported_length: + The total number of bytes reported by server. If the process fails, we + return None; otherwise we would return a nonnegative integer. + + """ + + try: + # What is the length of this document according to the HTTP spec? + reported_length = connection.info().get('Content-Length') + # Try casting it as a decimal number. + reported_length = int(reported_length, 10) + # Make sure that it is a nonnegative integer. + assert reported_length > -1 + except: + logger.exception('Could not get content length about '+str(connection)+ + ' from server!') + reported_length = None + finally: + return reported_length + + + + + +def _check_content_length(reported_length, required_length): + """ + + A helper function that checks whether the length reported by server is + equal to the length we expected. + + + reported_length: + The total number of bytes reported by the server. + + required_length: + The total number of bytes obtained from (possibly default) metadata. + + + No known side effects. + + + No known exceptions. + + + None. + + """ + + try: + if reported_length < required_length: + logger.warn('reported_length ('+str(reported_length)+ + ') < required_length ('+str(required_length)+')') + elif reported_length > required_length: + logger.warn('reported_length ('+str(reported_length)+ + ') > required_length ('+str(required_length)+')') + else: + logger.debug('reported_length ('+str(reported_length)+ + ') == required_length ('+str(required_length)+')') + except: + logger.exception('Could not check reported and required lengths!') + + + + + +def _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=True): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its (possibly + default) metadata. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + None. + + + tuf.DownloadLengthMismatchError, if STRICT_REQUIRED_LENGTH is True and + total_downloaded is not equal required_length. + + + None. + + """ + + if total_downloaded == required_length: + logger.debug('total_downloaded == required_length == '+ + str(required_length)) + else: + difference_in_bytes = abs(total_downloaded-required_length) + message = 'Downloaded '+str(total_downloaded)+' bytes, but expected '+\ + str(required_length)+' bytes. There is a difference of '+\ + str(difference_in_bytes)+' bytes!' + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + # This must be due to a programming error, and must never happen! + logger.error(message) + raise tuf.DownloadLengthMismatchError(message) + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # timestamp metadata, for which we have no signed metadata; so, we must + # guess a reasonable required_length for it. + logger.warn(message) + + + + + +def safe_download(url, required_length): + return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True) + + + + + +def unsafe_download(url, required_length): + return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False) + + + + + +def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): """ Given the url, hashes and length of the desired file, this function @@ -322,98 +602,96 @@ def download_url_to_tempfileobj(url, required_hashes=None, url: - A url string that represents the location of the file. - - required_hashes: - A dictionary, where the keys represent the hashing algorithm used to - hash the file and the dict values the hexdigest. - - For instance, a hash pair might look something like this: - {'md5': '37544f383be1fc1a32f42801c9c4b4d6'} + A URL string that represents the location of the file. required_length: An integer value representing the length of the file. - + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + - 'tuf.util.TempFile' object is created. + A 'tuf.util.TempFile' object is created on disk to store the contents of + 'url'. - tuf.DownloadError, if there was an error while downloading the file. - - tuf.FormatError, if any of the arguments are improperly formatted. + tuf.DownloadLengthMismatchError, if there was a mismatch of observed vs + expected lengths while downloading the file. + + tuf.FormatError, if any of the arguments are improperly formatted. + + Any other unforeseen runtime exception. - 'tuf.util.TempFile' instance. + A 'tuf.util.TempFile' file-like object which points to the contents of + 'url'. """ # Do all of the arguments have the appropriate format? # Raise 'tuf.FormatError' if there is a mismatch. tuf.formats.URL_SCHEMA.check_match(url) - if required_hashes is not None: - tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes) - if required_length is not None: - tuf.formats.LENGTH_SCHEMA.check_match(required_length) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) - # 'url.replace()' is for compatibility with Windows-based systems because they - # might put back-slashes in place of forward-slashes. This converts it to the - # common format. - url = url.replace('\\','/') - logger.info('Downloading: '+url) - connection = _open_connection(url) + # 'url.replace()' is for compatibility with Windows-based systems because + # they might put back-slashes in place of forward-slashes. This converts it + # to the common format. + url = url.replace('\\', '/') + logger.info('Downloading: '+str(url)) + + # NOTE: Not thread-safe. + # Save current values or functions for restoration later. + previous_socket_timeout = socket.getdefaulttimeout() + previous_http_response_class = httplib.HTTPConnection.response_class + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. temp_file = tuf.util.TempFile() - try: - # info().get('Content-Length') gets the length of the url file. - file_length = connection.info().get('Content-Length') + # NOTE: Not thread-safe. + # Set timeout to induce non-blocking socket operations. + socket.setdefaulttimeout(tuf.conf.SOCKET_TIMEOUT) + # Replace the socket file-like object class with our safer version. + httplib.HTTPConnection.response_class = SaferHTTPResponse - # If the HTTP server did not specify a Content-Length... - if file_length is None: - # Do we know what is the required_length for this file? - if required_length is None: - # No, we do not know this. Raise this to the user! - message = 'Do not know anything about how much to download for "' + url + '"!' - raise tuf.DownloadError(message) - else: - # Okay, the HTTP server has not told us the Content-Length, - # but we know how much we are required to download. - file_length = required_length - else: - # Do we know what is the required_length for this file? - if required_length is None: - # No, we do not know this. Avoid falling for an arbitrary-length data attack (#26). - message = 'Do not know how much is required to download for "' + url + '"!' - logger.debug(message) - file_length = int(file_length, 10) - else: - # Okay, we do know this. Go ahead with checks. - file_length = int(file_length, 10) + # Open the connection to the remote file. + connection = _open_connection(url) - # Does the url's 'file_length' match 'required_length'? - if required_length is not None and file_length != required_length: - message = 'Incorrect length for '+url+'. Expected '+str(required_length)+ \ - ', got '+str(file_length)+' bytes.' - raise tuf.DownloadError(message) + # We ask the server about how big it thinks this file should be. + reported_length = _get_content_length(connection) - # For readibility, we perform the download in a separate function, which - # returns the total number of downloaded bytes; this number should be equal - # to required_length. - total_downloaded = _download_fixed_amount_of_data(connection, temp_file, - file_length, + # Then, we check whether the required length matches the reported length. + _check_content_length(reported_length, required_length) + + # Download the contents of the URL, up to the required length, to a + # temporary file, and get the total number of downloaded bytes. + total_downloaded = _download_fixed_amount_of_data(connection, temp_file, required_length) - - # We appear to have downloaded the correct amount. Check the hashes. - if required_length is not None and required_hashes is not None: - _check_hashes(temp_file, required_hashes) - # Exception is a base class for all non-exiting exceptions. - except Exception, e: - # Closing 'temp_file'. The 'temp_file' data is destroyed. + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH) + + except: + # Close 'temp_file'; any written data is lost. temp_file.close_temp_file() - logger.error(str(e)) - raise tuf.DownloadError(e) + logger.exception('Could not download URL: '+str(url)) + raise + + else: + return temp_file + + finally: + # NOTE: Not thread-safe. + # Restore previously saved values or functions. + httplib.HTTPConnection.response_class = previous_http_response_class + socket.setdefaulttimeout(previous_socket_timeout) + + - return temp_file diff --git a/tuf/tests/system_tests/slow_retrieval_server.py b/tuf/tests/system_tests/slow_retrieval_server.py index 161277b6..6ccb3cd2 100755 --- a/tuf/tests/system_tests/slow_retrieval_server.py +++ b/tuf/tests/system_tests/slow_retrieval_server.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ slow_retrieval_server.py @@ -24,7 +26,18 @@ import random from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -DELAY = 1 + + + + +# Modify the HTTPServer class to pass the test_mode argument to do_GET function. +class HTTPServer_Test(HTTPServer): + def __init__(self, server_address, Handler, test_mode): + HTTPServer.__init__(self, server_address, Handler) + self.test_mode = test_mode + + + # HTTP request handler. @@ -41,37 +54,50 @@ def do_GET(self): self.send_response(200) self.send_header('Content-length', str(len(data))) self.end_headers() - - # Throttle the file by sending a character every few seconds. - for i in range(len(data)): + + if self.server.test_mode == "mode_1": + # before sends any data, the server does nothing during a long time. + DELAY = 1000 time.sleep(DELAY) - self.wfile.write(data[i]) + self.wfile.write(data) - return + return + + else: # "mode_2" + DELAY = 1 + # Throttle the file by sending a character every few seconds. + for i in range(len(data)): + self.wfile.write(data[i]) + time.sleep(DELAY) + return except IOError, e: self.send_error(404, 'File Not Found!') + + def get_random_port(): port = random.randint(30000, 45000) return port -def run(port): + + +def run(port, test_mode): server_address = ('localhost', port) - httpd = HTTPServer(server_address, Handler) + httpd = HTTPServer_Test(server_address, Handler, test_mode) print('Slow server is active on port: '+str(port)+' ...') httpd.handle_request() -if __name__ == '__main__': - if len(sys.argv) > 1: - port = int(sys.argv[1]) - else: - port = get_random_port() - run(port) + +if __name__ == '__main__': + port = int(sys.argv[1]) + test_mode = sys.argv[2] + assert test_mode in ("mode_1", "mode_2") + run(port, test_mode) diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py index 4cac5531..e2472425 100755 --- a/tuf/tests/system_tests/test_endless_data_attack.py +++ b/tuf/tests/system_tests/test_endless_data_attack.py @@ -31,8 +31,6 @@ """ -# TODO:... - import os import shutil import urllib @@ -41,7 +39,7 @@ import tuf from tuf.interposition import urllib_tuf - +from tuf.log import logger class EndlessDataAttack(Exception): pass @@ -57,7 +55,7 @@ def _download(url, filename, tuf=False): -def test_arbitrary_package_attack(TUF=False): +def test_arbitrary_package_attack(TUF=False, TIMESTAMP=False): """ TUF: @@ -85,13 +83,12 @@ def test_arbitrary_package_attack(TUF=False): file_basename = os.path.basename(filepath) url_to_repo = url+'reg_repo/'+file_basename downloaded_file = os.path.join(downloads, file_basename) - endless_data = 'A'*100 + endless_data = 'A'*100000 if TUF: # Update TUF metadata before attacker modifies anything. util_test_tools.tuf_refresh_repo(root_repo, keyids) - # Modify the url. Remember that the interposition will intercept # urls that have 'localhost:9999' hostname, which was specified in # the json interposition configuration file. Look for 'hostname' @@ -103,6 +100,13 @@ def test_arbitrary_package_attack(TUF=False): target = os.path.join(tuf_targets, file_basename) util_test_tools.modify_file_at_repository(target, endless_data) + # Attacker modifies the timestamp.txt metadata. + if TIMESTAMP: + metadata = os.path.join(tuf_repo, 'metadata') + timestamp = os.path.join(metadata, 'timestamp.txt') + # FIXME: This does not correctly "patch" the timestamp metadata. + util_test_tools.modify_file_at_repository(timestamp, endless_data) + # Attacker modifies the file at the regular repository. util_test_tools.modify_file_at_repository(filepath, endless_data) @@ -113,11 +117,11 @@ def test_arbitrary_package_attack(TUF=False): # Client downloads (tries to download) the file. _download(url=url_to_repo, filename=downloaded_file, tuf=TUF) - except tuf.DownloadError: - # If tuf.DownloadError is raised, this means that TUF has prevented - # the download of an unrecognized file. Enable the logging to see, - # what actually happened. - pass + except (tuf.DownloadError, tuf.RepositoryError), e: + # If tuf.DownloadError or tuf.RepositoryError is raised, this means + # that TUF has prevented the download of an unrecognized file. Enable + # logging to see what actually happened. + logger.warn('Download failed: '+repr(e)) else: # Check whether the attack succeeded by inspecting the content of the @@ -136,7 +140,7 @@ def test_arbitrary_package_attack(TUF=False): try: - test_arbitrary_package_attack(TUF=False) + test_arbitrary_package_attack(TUF=False, TIMESTAMP=False) except EndlessDataAttack, error: print('Without TUF: '+str(error)) @@ -144,7 +148,19 @@ def test_arbitrary_package_attack(TUF=False): try: - test_arbitrary_package_attack(TUF=True) + test_arbitrary_package_attack(TUF=True, TIMESTAMP=False) except EndlessDataAttack, error: print('With TUF: '+str(error)) + + + +try: + # FIXME: This test passes, but not yet because we avoided an endless data + # attack with timestamp metadata, but rather because the timestamp metadata + # is invalid. + test_arbitrary_package_attack(TUF=True, TIMESTAMP=True) + raise EndlessDataAttack('Timestamp metadata is not yet immune from the endless data attack!') + +except EndlessDataAttack, error: + print('With TUF: '+str(error)) diff --git a/tuf/tests/system_tests/test_slow_retrieval_attack.py b/tuf/tests/system_tests/test_slow_retrieval_attack.py index 7d25a7d2..025535f7 100755 --- a/tuf/tests/system_tests/test_slow_retrieval_attack.py +++ b/tuf/tests/system_tests/test_slow_retrieval_attack.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ test_slow_retrieval_attack.py @@ -41,6 +43,9 @@ import random import subprocess from multiprocessing import Process +import tuf +import socket + import tuf.tests.system_tests.util_test_tools as util_test_tools from tuf.interposition import urllib_tuf @@ -50,25 +55,29 @@ class SlowRetrievalAttackAlert(Exception): pass -def _download(url, filename, tuf=False): - if tuf: - urllib_tuf.urlretrieve(url, filename) - +def _download(url, filename, TUF=False): + if TUF: + try: + urllib_tuf.urlretrieve(url, filename) + # If timeout or RepositoryError is raised, this means + # that TUF has prevented the slow retrieval attack. Enable + # the logging to see, what actually happened. + except (socket.timeout, tuf.RepositoryError), e: + print "Download exits with " + str(e) + "! Successfully avoid slow retrieval attack!\n\n" else: urllib.urlretrieve(url, filename) -def test_slow_retrieval_attack(TUF=False): +def test_slow_retrieval_attack(TUF=False, mode=None): - WAIT_TIME = 5 # Number of seconds to wait until download completes. - ERROR_MSG = '\tSlow Retrieval Attack was Successful!\n\n' + WAIT_TIME = 10 # Number of seconds to wait until download completes. + ERROR_MSG = mode + '\tSlow Retrieval Attack was Successful!\n\n' # Launch the server. port = random.randint(30000, 45000) - command = ['python', 'slow_retrieval_server.py', str(port)] - server_process = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + command = ['python', 'slow_retrieval_server.py', str(port), mode] + server_process = subprocess.Popen(command, stderr=subprocess.PIPE) time.sleep(.1) try: @@ -105,6 +114,7 @@ def test_slow_retrieval_attack(TUF=False): proc = Process(target=_download, args=(url_to_file, downloaded_file, TUF)) proc.start() proc.join(WAIT_TIME) + if proc.exitcode is None: proc.terminate() raise SlowRetrievalAttackAlert(ERROR_MSG) @@ -113,21 +123,36 @@ def test_slow_retrieval_attack(TUF=False): finally: if server_process.returncode is None: server_process.kill() - print 'Slow server terminated.\n' - + print 'Communication with slow server aborted. Terminate the slow server.\n' + util_test_tools.cleanup(root_repo, server_proc) - +# Stimulates two kinds of slow retrieval attacks. +# mode_1: When download begins,the server blocks the download +# for a long time by doing nothing before it sends first byte of data. +# mode_2: During the download process, the server blocks the download +# by sending just several characters every few seconds. try: - test_slow_retrieval_attack(TUF=False) + #test_slow_retrieval_attack(TUF=False, mode = "mode_1") + pass except SlowRetrievalAttackAlert, error: print error +try: + #test_slow_retrieval_attack(TUF=False, mode = "mode_2") + pass +except SlowRetrievalAttackAlert, error: + print error try: - test_slow_retrieval_attack(TUF=True) + test_slow_retrieval_attack(TUF=True, mode = "mode_1") +except SlowRetrievalAttackAlert, error: + print error + +try: + test_slow_retrieval_attack(TUF=True, mode = "mode_2") except SlowRetrievalAttackAlert, error: print error diff --git a/tuf/tests/test_download.py b/tuf/tests/test_download.py index d2e0e38d..77d0d5fd 100755 --- a/tuf/tests/test_download.py +++ b/tuf/tests/test_download.py @@ -23,20 +23,20 @@ """ -import os -import sys -import time -import random import hashlib import logging -import unittest +import os +import random import subprocess -import SocketServer -import SimpleHTTPServer +import time +import unittest +import urllib2 + import tuf -import tuf.log +import tuf.conf as conf import tuf.download as download +import tuf.log import tuf.tests.unittest_toolbox as unittest_toolbox logger = logging.getLogger('tuf.test_download') @@ -70,7 +70,7 @@ def setUp(self): # NOTE: Following error is raised if delay is not applied: # - time.sleep(.1) + time.sleep(1) # Computing hash of target file data. m = hashlib.md5() @@ -79,7 +79,6 @@ def setUp(self): self.target_hash = {'md5':digest} - # Stop server process and perform clean up. def tearDown(self): unittest_toolbox.Modified_TestCase.tearDown(self) @@ -89,84 +88,57 @@ def tearDown(self): self.target_fileobj.close() - # Unit Test. + # Test: Normal case. def test_download_url_to_tempfileobj(self): - # Test: Normal cases without supplying hash and/or length arguments. - temp_fileobj = download.download_url_to_tempfileobj(self.url) + + download_file = download.safe_download + + temp_fileobj = download_file(self.url, self.target_data_length) self.assertEquals(self.target_data, temp_fileobj.read()) self.assertEquals(self.target_data_length, len(temp_fileobj.read())) temp_fileobj.close_temp_file() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_length=self.target_data_length) + + # Test: Incorrect lengths. + def test_download_url_to_tempfileobj_and_lengths(self): + + # NOTE: We catch tuf.BadHashError here because the file, shorter by a byte, + # would not match the expected hashes. We log a warning when we find that + # the server-reported length of the file does not match our + # required_length. We also see that STRICT_REQUIRED_LENGTH does not change + # the outcome of the previous test. + download.safe_download(self.url, self.target_data_length - 1) + download.unsafe_download(self.url, self.target_data_length - 1) + + # NOTE: We catch tuf.DownloadError here because the STRICT_REQUIRED_LENGTH, + # which is True by default, mandates that we must download exactly what is + # required. + exception_message = 'Downloaded '+str(self.target_data_length)+\ + ' bytes, but expected '+\ + str(self.target_data_length+1)+\ + ' bytes. There is a difference of 1 bytes!' + self.assertRaisesRegexp(tuf.DownloadError, exception_message, + download.safe_download, self.url, + self.target_data_length + 1) + + # NOTE: However, we do not catch a tuf.DownloadError here for the same test + # as the previous one because we have disabled STRICT_REQUIRED_LENGTH. + temp_fileobj = download.unsafe_download(self.url, self.target_data_length + 1) self.assertEquals(self.target_data, temp_fileobj.read()) self.assertEquals(self.target_data_length, len(temp_fileobj.read())) temp_fileobj.close_temp_file() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash) - self.assertEquals(self.target_data, temp_fileobj.read()) - self.assertEquals(self.target_data_length, len(temp_fileobj.read())) - temp_fileobj.close_temp_file() - # Test: Normal case. - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length) - self.assertEquals(self.target_data, temp_fileobj.read()) - self.assertEquals(self.target_data_length, len(temp_fileobj.read())) - temp_fileobj.close_temp_file() - - # Test: Incorrect length. - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length - 1) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length + 1) - - # Test: Incorrect hashs. - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes={'md5':self.random_string()}, - required_length=self.target_data_length) - - # Test: Incorrect/Unreachable url. - self.assertRaises(tuf.FormatError, - download.download_url_to_tempfileobj, None, - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - 'http://localhost:'+str(self.PORT)+'/'+self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - 'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) + def test_download_url_to_tempfileobj_and_performance(self): """ # Measuring performance of 'auto_flush = False' vs. 'auto_flush = True' - # in download_url_to_tempfileobj() during write. No change was observed. + # in download._download_file() during write. No change was observed. star_cpu = time.clock() star_real = time.time() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length) + temp_fileobj = download_file(self.url, + self.target_data_length) end_cpu = time.clock() end_real = time.time() @@ -182,6 +154,28 @@ def test_download_url_to_tempfileobj(self): """ + # Test: Incorrect/Unreachable URLs. + def test_download_url_to_tempfileobj_and_urls(self): + + download_file = download.safe_download + + self.assertRaises(tuf.FormatError, + download_file, None, self.target_data_length) + + self.assertRaises(ValueError, + download_file, + self.random_string(), self.target_data_length) + + self.assertRaises(urllib2.HTTPError, + download_file, + 'http://localhost:'+str(self.PORT)+'/'+self.random_string(), + self.target_data_length) + + self.assertRaises(urllib2.URLError, + download_file, + 'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), + self.target_data_length) + # Run unit test. if __name__ == '__main__': diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py index ba7e234c..acd536c7 100755 --- a/tuf/tests/test_updater.py +++ b/tuf/tests/test_updater.py @@ -45,18 +45,28 @@ class guarantees the order of unit tests. So that, 'test_something_A' import tuf +import tuf.client.updater as updater +import tuf.conf import tuf.log -import tuf.util import tuf.formats +import tuf.keydb import tuf.repo.keystore as keystore import tuf.repo.signerlib as signerlib -import tuf.client.updater as updater +import tuf.roledb import tuf.tests.repository_setup as setup import tuf.tests.unittest_toolbox as unittest_toolbox +import tuf.util logger = logging.getLogger('tuf.test_updater') +# This is the default metadata that we would create for the timestamp role, +# because it has no signed metadata for itself. +DEFAULT_TIMESTAMP_FILEINFO = { + 'hashes': None, + 'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH +} + class TestUpdater_init_(unittest_toolbox.Modified_TestCase): @@ -203,7 +213,7 @@ def _mock_download_url_to_tempfileobj(self, output): """ - def _mock_download(url, hashes=None, length=None): + def _mock_download(url, length): if isinstance(output, (str, unicode)): file_path = output elif isinstance(output, list): @@ -213,8 +223,8 @@ def _mock_download(url, hashes=None, length=None): temp_fileobj.write(file_obj.read()) return temp_fileobj - # Patch tuf.download.download_url_to_tempfileobj(). - tuf.download.download_url_to_tempfileobj = _mock_download + # Patch tuf.download.safe_download(). + tuf.download.safe_download = _mock_download @@ -327,7 +337,7 @@ def _get_list_of_target_paths(self, targets_directory, relative=True): def _update_top_level_roles(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Reference self.Repository._update_metadata_if_changed(). update_if_changed = self.Repository._update_metadata_if_changed @@ -480,7 +490,7 @@ def test_3__update_metadata(self): """ # Setup - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # Since client's '.../metadata/current' will need to have separate # gzipped metadata file in order to test compressed file handling, @@ -504,13 +514,15 @@ def test_3__update_metadata(self): # Test: Invalid file downloaded. # Patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(self.release_filepath) - self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets') + # TODO: Set fileinfo to a valid object. + self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets', None) # Test: normal case. # Patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(self.targets_filepath) - _update_metadata('targets') + # TODO: Set fileinfo to a valid object. + _update_metadata('targets', None) list_of_targets = self.Repository.metadata['current']['targets']['targets'] # Verify that the added target's path is listed in target's metadata. @@ -527,7 +539,8 @@ def test_3__update_metadata(self): # Re-patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(targets_filepath_compressed) - _update_metadata('targets', compression='gzip') + # TODO: Set fileinfo to a valid object. + _update_metadata('targets', None, compression='gzip') list_of_targets = self.Repository.metadata['current']['targets']['targets'] # Verify that the added target's path is listed in target's metadata. @@ -541,7 +554,7 @@ def test_3__update_metadata(self): self._remove_target_from_targets_dir(added_target_1) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -603,7 +616,7 @@ def test_3__update_metadata_if_changed(self): """ # Setup - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # To test updater._update_metadata_if_changed, 'targets' metadata file is # going to be modified at the server's repository. @@ -623,7 +636,7 @@ def test_3__update_metadata_if_changed(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) # Update timestamp metadata, it will indicate change in release metadata. - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Save current release metadata before updating. It will be used to # verify the update. @@ -667,7 +680,7 @@ def test_3__update_metadata_if_changed(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) # Update timestamp metadata, it will indicate change in release metadata. - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Save current release metadata before updating. It will be used to # verify the update. @@ -694,7 +707,7 @@ def test_3__update_metadata_if_changed(self): self._remove_target_from_targets_dir(added_target_1) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -753,7 +766,7 @@ def test_2__ensure_not_expired(self): def test_4_refresh(self): # Setup. - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # This unit test is based on adding an extra target file to the # server and rebuilding all server-side metadata. When 'refresh' @@ -786,7 +799,7 @@ def test_4_refresh(self): setup.build_server_repository(self.server_repo_dir, self.targets_dir) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -794,7 +807,7 @@ def test_4_refresh(self): def test_4__refresh_targets_metadata(self): # Setup - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # To test this method a target file would be added to a delegated role, # and metadata on the server side would be rebuilt. @@ -851,7 +864,7 @@ def test_4__refresh_targets_metadata(self): setup.build_server_repository(self.server_repo_dir, self.targets_dir) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -881,10 +894,10 @@ def test_3__targets_of_role(self): def test_5_all_targets(self): # Setup - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # As with '_refresh_targets_metadata()', tuf.roledb._roledb_dict - # has to be populated. The 'tuf.download.download_url_to_tempfileobj' method + # has to be populated. The 'tuf.download.safe_download' method # should be patched. The 'self.all_role_paths' argument is passed so that # the top-level roles and delegations may be all "downloaded" when # Repository.refresh() is called below. '_mock_download_url_to_tempfileobj' @@ -912,7 +925,7 @@ def test_5_all_targets(self): self.assertTrue(len(all_targets) is 6) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -941,7 +954,7 @@ def test_5_targets_of_role(self): def test_6_target(self): # Requirements: make sure roledb_dict is populated and - # tuf.download.download_url_to_tempfileobj function is patched. + # tuf.download.safe_download function is patched. # Setup targets_dir_content = os.listdir(self.targets_dir) @@ -972,9 +985,9 @@ def test_6_target(self): def test_6_download_target(self): # Setup: - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download - # 'tuf.download.download_url_to_tempfileobj' method should be patched. + # 'tuf.download.safe_download' method should be patched. target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir) # Create temporary directory that will be passed as an argument to the @@ -1019,7 +1032,7 @@ def test_6_download_target(self): mirrors[mirror_name]['confined_target_dirs'] = [''] # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -1027,11 +1040,11 @@ def test_6_download_target(self): def test_7_updated_targets(self): # Setup: - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # In this test, client will have two target files. Server will modify # one of them. As with 'all_targets' function, tuf.roledb._roledb_dict - # has to be populated. 'tuf.download.download_url_to_tempfileobj' method + # has to be populated. 'tuf.download.safe_download' method # should be patched. target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir) @@ -1090,7 +1103,7 @@ def test_7_updated_targets(self): self.fail(msg) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download @@ -1098,7 +1111,7 @@ def test_7_updated_targets(self): def test_8_remove_obsolete_targets(self): # Setup: - original_download = tuf.download.download_url_to_tempfileobj + original_download = tuf.download.safe_download # This unit test should be last, because it removes target files from the # server's targets directory. It is done to avoid adding files, rebuilding @@ -1149,7 +1162,7 @@ def test_8_remove_obsolete_targets(self): self.assertTrue(os.listdir(dest_dir), 2) # RESTORE - tuf.download.download_url_to_tempfileobj = original_download + tuf.download.safe_download = original_download def tearDownModule(): diff --git a/tuf/util.py b/tuf/util.py index 6651cb85..cd422db8 100755 --- a/tuf/util.py +++ b/tuf/util.py @@ -249,6 +249,8 @@ def decompress_temp_file_object(self, compression): tuf.Error: If an invalid compression is given. + tuf.DecompressionError: If the compression failed for any reason. + 'self._orig_file' is used to store the original data of 'temporary_file'. @@ -266,10 +268,17 @@ def decompress_temp_file_object(self, compression): if compression != 'gzip': raise tuf.Error('Only gzip compression is supported.') + self.seek(0) self._compression = compression self._orig_file = self.temporary_file - self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb') + + try: + self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb') + except: + raise tuf.DecompressionError(self.temporary_file) + +