diff --git a/README.md b/README.md new file mode 100644 index 00000000..87f96f42 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# A Framework for Securing Software Update Systems + +TUF (The Update Framework) helps developers secure their new or existing +software update systems. Software update systems are vulnerable to many known +attacks, including those that can result in clients being compromised or +crashed. TUF helps solve this problem by providing a flexible security +framework that can be added to software updaters. + +# What Is a Software Update System? + +Generally, a software update system is an application (or part of an +application) running on a client system that obtains and installs software. +This can include updates to software that is already installed or even +completely new software. + +Three major classes of software update systems are: + +* Application Updaters - which are used by applications use to update +themselves. For example, Firefox updates itself through its own application +updater. + +* Library Package Managers - such as those offered by many programming +languages for installing additional libraries. These are systems such as +Python's pip/easy_install + PyPI, Perl's CPAN, Ruby's Gems, and PHP's PEAR. + +* System Package Managers - used by operating systems to update and install all +of the software on a client system. Debian's APT, Red Hat's YUM, and openSUSE's +YaST are examples of these. + +# Our Approach + +There are literally thousands of different software update systems in common +use today. (In fact the average Windows user has about two dozen different +software updaters on their machine!) + +We are building a library that can be universally (and in most cases +transparently) used to secure software update systems. diff --git a/README.txt b/README.txt deleted file mode 100644 index 953720f9..00000000 --- a/README.txt +++ /dev/null @@ -1,40 +0,0 @@ -A Framework for Securing Software Update Systems ------------------------------------------------- - -TUF (The Update Framework) helps developers secure their new or existing -software update systems. Software update systems are vulnerable to many known -attacks, including those that can result in clients being compromised or crashed. -TUF helps solve this problem by providing a flexible security framework that can -be added to software updaters. - - -What Is a Software Update System? ---------------------------------- - -Generally, a software update system is an application (or part of an application) -running on a client system that obtains and installs software. This can include -updates to software that is already installed or even completely new software. - -Three major classes of software update systems are: - -Application Updaters - which are used by applications use to update themselves. -For example, Firefox updates itself through its own application updater. - -Library Package Managers - such as those offered by many programming languages -for installing additional libraries. These are systems such as Python's -pip/easy_install + PyPI, Perl's CPAN, Ruby's Gems, and PHP's PEAR. - -System Package Managers - used by operating systems to update and install all of -the software on a client system. Debian's APT, Red Hat's YUM, and openSUSE's -YaST are examples of these. - - -Our Approach ------------- - -There are literally thousands of different software update systems in common use -today. (In fact the average Windows user has about two dozen different software -updaters on their machine!) - -We are building a library that can be universally (and in most cases transparently) -used to secure software update systems. diff --git a/docs/tuf-spec.txt b/docs/tuf-spec.txt index 517d17e3..52eab91c 100644 --- a/docs/tuf-spec.txt +++ b/docs/tuf-spec.txt @@ -564,16 +564,39 @@ "name": ROLE, "keyids" : [ KEYID, ... ] , "threshold" : THRESHOLD, - "paths" : [ PATHPATTERN, ... ] + ("path_hash_prefixes" : [ HEX_DIGEST, ... ] | + "paths" : [ PATHPATTERN, ... ]) }, ... ] } + In order to discuss target paths, a role MUST specify only one of the + "path_hash_prefixes" or "paths" attributes, each of which we discuss next. + + The "path_hash_prefixes" list is used to succinctly describe a set of target + paths. Specifically, each HEX_DIGEST in "path_hash_prefixes" describes a set + of target paths; therefore, "path_hash_prefixes" is the union over each + prefix of its set of target paths. The target paths must meet this + condition: each target path, when hashed with the SHA-256 hash function to + produce a 64-byte hexadecimal digest (HEX_DIGEST), must share the same + prefix as one of the prefixes in "path_hash_prefixes". This is useful to + split a large number of targets into separate bins identified by consistent + hashing. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role dictionary + the algorithm used for these generated hashed paths? + The "paths" list describes paths that the role is trusted to provide. Clients MUST check that a target is in one of the trusted paths of all roles in a delegation chain, not just in a trusted path of the role that describes - the target file. The format of a PATHPATTERN may be either a path to a single - file, or a path to a directory to indicate all files and/or subdirectories - under that directory. + the target file. The format of a PATHPATTERN may be either a path to a + single file, or a path to a directory to indicate all files and/or + subdirectories under that directory. + + A path to a directory is used to indicate all possible targets sharing that + directory as a prefix; e.g. if the directory is "targets/A", then targets + which match that directory include "targets/A/B.txt" and + "targets/A/B/C.txt". We are currently investigating a few "priority tag" schemes to resolve conflicts between delegated roles that share responsibility for overlapping @@ -581,11 +604,11 @@ consider metadata in order of appearance of delegations; we treat the order of delegations such that the first delegation is trusted more than the second one, the second delegation is trusted more than the third one, and so - on. The metadata of the first delegation will override that of the second delegation, - the metadata of the second delegation will override that of the third - delegation, and so on. In order to accommodate this scheme, the "roles" key - in the DELEGATIONS object above points to an array, instead of a hash - table, of delegated roles. + on. The metadata of the first delegation will override that of the second + delegation, the metadata of the second delegation will override that of the + third delegation, and so on. In order to accommodate this scheme, the + "roles" key in the DELEGATIONS object above points to an array, instead of a + hash table, of delegated roles. Another priority tag scheme would have the clients prefer the delegated role with the latest metadata for a conflicting target path. Similar ideas were diff --git a/setup.py b/setup.py index bbbc3c85..7523827d 100755 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ setup( name='tuf', - version='0.1', + version='0.7.5', description='A secure updater framework for Python', author='https://www.updateframework.com', author_email='info@updateframework.com', diff --git a/tuf/__init__.py b/tuf/__init__.py index b8d34217..d2031fb4 100755 --- a/tuf/__init__.py +++ b/tuf/__init__.py @@ -26,6 +26,9 @@ __all__ = ['formats'] + + + class Error(Exception): """Indicate a generic error.""" pass @@ -50,6 +53,21 @@ class FormatError(Error): +class InvalidMetadataJSONError(FormatError): + """Indicate that a metadata file is not valid JSON.""" + + def __init__(self, exception): + # Store the original exception. + self.exception = exception + + def __str__(self): + # Show the original exception. + return str(self.exception) + + + + + class UnsupportedAlgorithmError(Error): """Indicate an error while trying to identify a user-specified algorithm.""" pass @@ -90,6 +108,14 @@ class RepositoryError(Error): +class ForbiddenTargetError(RepositoryError): + """Indicate that a role signed for a target that it was not delegated to.""" + pass + + + + + class ExpiredMetadataError(Error): """Indicate that a TUF Metadata file has expired.""" pass @@ -98,9 +124,19 @@ class ExpiredMetadataError(Error): -class MetadataNotAvailableError(Error): - """Indicate an error locating a Metadata file for a specified target/role.""" - pass +class ReplayedMetadataError(RepositoryError): + """Indicate that some metadata has been replayed to the client.""" + + def __init__(self, metadata_role, previous_version, current_version): + self.metadata_role = metadata_role + self.previous_version = previous_version + self.current_version = current_version + + + def __str__(self): + return str(self.metadata_role)+' is older than the version currently'+\ + 'installed.\nDownloaded version: '+repr(self.previous_version)+'\n'+\ + 'Current version: '+repr(self.current_version) @@ -114,8 +150,8 @@ class CryptoError(Error): -class UnsupportedLibraryError(Error): - """Indicate that a supported library could not be located or imported.""" +class BadSignatureError(CryptoError): + """Indicate that some metadata file had a bad signature.""" pass @@ -130,6 +166,22 @@ class UnknownMethodError(CryptoError): +class UnsupportedLibraryError(Error): + """Indicate that a supported library could not be located or imported.""" + pass + + + + + +class DecompressionError(Error): + """Indicate that some error happened while decompressing a file.""" + pass + + + + + class DownloadError(Error): """Indicate an error occurred while attempting to download a file.""" pass @@ -138,6 +190,28 @@ class DownloadError(Error): +class DownloadLengthMismatchError(DownloadError): + """Indicate that a mismatch of lengths was seen while downloading a file.""" + pass + + + + + +class SlowRetrievalError(DownloadError): + """"Indicate that downloading a file took an unreasonably long time.""" + + def __init__(self, average_download_speed): + self.__average_download_speed = average_download_speed #bytes/second + + def __str__(self): + return "Average download speed: "+str(self.__average_download_speed)+\ + " bytes/second" + + + + + class KeyAlreadyExistsError(Error): """Indicate that a key already exists and cannot be added.""" pass @@ -162,6 +236,37 @@ class UnknownRoleError(Error): +class UnknownTargetError(Error): + """Indicate an error trying to locate or identify a specified target.""" + pass + + + + + class InvalidNameError(Error): """Indicate an error while trying to validate any type of named object""" pass + + + + + +class NoWorkingMirrorError(Error): + """An updater will throw this exception in case it could not download a + metadata or target file. + + A dictionary of Exception instances indexed by every mirror URL will also be + provided.""" + + def __init__(self, mirror_errors): + # Dictionary of URL strings to Exception instances + self.mirror_errors = mirror_errors + + def __str__(self): + return str(self.mirror_errors) + + + + + diff --git a/tuf/client/updater.py b/tuf/client/updater.py index f58a0416..ddbcf277 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -110,6 +110,7 @@ import tuf.conf import tuf.download import tuf.formats +import tuf.hash import tuf.keydb import tuf.log import tuf.mirrors @@ -353,10 +354,6 @@ def _load_metadata_from_file(self, metadata_set, metadata_role): not end in '.txt'. Examples: 'root', 'targets', 'targets/linux/x86'. - tuf.RepositoryError: - If the metadata could not be loaded or the extracted data is not a - valid metadata object. - tuf.FormatError: If role information belonging to a delegated role of 'metadata_role' is improperly formatted. @@ -391,11 +388,7 @@ def _load_metadata_from_file(self, metadata_set, metadata_role): # 'tuf.formats.SIGNABLE_SCHEMA'. metadata_signable = tuf.util.load_json_file(metadata_filepath) - # Ensure the loaded json object is properly formatted. - try: - tuf.formats.check_signable_object_format(metadata_signable) - except tuf.FormatError, e: - raise tuf.RepositoryError('Invalid format: '+repr(metadata_filepath)+'.') + tuf.formats.check_signable_object_format(metadata_signable) # Extract the 'signed' role object from 'metadata_signable'. metadata_object = metadata_signable['signed'] @@ -551,7 +544,7 @@ def refresh(self): None. - tuf.RepositoryError: + tuf.NoWorkingMirrorError: If the metadata for any of the top-level roles cannot be updated. tuf.ExpiredMetadataError: @@ -565,12 +558,23 @@ def refresh(self): None. """ - + + # The timestamp role does not have signed metadata about it; otherwise we + # would need an infinite regress of metadata. Therefore, we use some + # default, sane metadata about it. + DEFAULT_TIMESTAMP_FILEINFO = { + 'hashes':None, + 'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH + } + # Update the top-level metadata. The _update_metadata_if_changed() and # _update_metadata() calls below do NOT perform an update if there # is insufficient trusted signatures for the specified metadata. - # Raise 'tuf.RepositoryError' if an update fails. - self._update_metadata('timestamp') + # Raise 'tuf.NoWorkingMirrorError' if an update fails. + + # Use default but sane information for timestamp metadata, and do not + # require strict checks on its required length. + self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) self._update_metadata_if_changed('release', referenced_metadata='timestamp') @@ -588,7 +592,349 @@ def refresh(self): - def _update_metadata(self, metadata_role, fileinfo=None, compression=None): + def __check_hashes(self, input_file, trusted_hashes): + """ + + A helper function that verifies multiple secure hashes of the downloaded + file. If any of these fail it raises an exception. This is to conform + with the TUF specs, which support clients with different hashing + algorithms. The 'hash.py' module is used to compute the hashes of the + 'input_file'. + + + input_file: + A file-like object. + + trusted_hashes: + A dictionary with hash-algorithm names as keys and hashes as dict values. + The hashes should be in the hexdigest format. + + + tuf.BadHashError, if the hashes don't match. + + + Hash digest object is created using the 'tuf.hash' module. + + + None. + + """ + + # Verify each trusted hash of 'trusted_hashes'. Raise exception if + # any of the hashes are incorrect and return if all are correct. + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = tuf.hash.digest(algorithm) + digest_object.update(input_file.read()) + computed_hash = digest_object.hexdigest() + if trusted_hash != computed_hash: + raise tuf.BadHashError('Hashes do not match! Expected '+ + trusted_hash+' got '+computed_hash) + else: + logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash) + + + + + + def get_target_file(self, target_filepath, file_length, file_hashes): + """ + + Safely download a target file up to a certain length, and check its + hashes thereafter. + + + target_filepath: + The relative target filepath obtained from TUF targets metadata. + + file_length: + The expected length of the target file. + + file_hashes: + The expected hashes of the target file. + + + tuf.NoWorkingMirrorError: + The target could not be fetched. This is raised only when all known + mirrors failed to provide a valid copy of the desired target file. + + + The target file is downloaded from all known repository mirrors in the + worst case. If a valid copy of the target file is found, it is stored in + a temporary file and returned. + + + A tuf.util.TempFile file-like object containing the target. + + """ + + def verify_target_file(target_file_object): + # Every target file must have its hashes inspected. + self.__check_hashes(target_file_object, file_hashes) + + return self.__get_file(target_filepath, verify_target_file, 'target', + file_length, download_safely=True, compression=None) + + + + + + def __verify_metadata_file(self, metadata_file_object, metadata_role): + """ + + A private helpe function to verify a downloaded metadata file. + + + metadata_file_object: + A tuf.util.TempFile instance containing the metadata file. + + metadata_role: + The role name of the metadata. + + + tuf.ForbiddenTargetError: + In case a targets role has signed for a target it was not delegated to. + + tuf.FormatError: + In case the metadata file is valid JSON, but not valid TUF metadata. + + tuf.InvalidMetadataJSONError: + In case the metadata file is not valid JSON. + + tuf.ReplayedMetadataError: + In case the downloaded metadata file is older than the current one. + + tuf.RepositoryError: + In case the repository is somehow inconsistent; e.g. a parent has not + delegated to a child (contrary to expectations). + + tuf.SignatureError: + In case the metadata file does not have a valid signature. + + + None. + + + None. + + """ + + metadata = metadata_file_object.read() + try: + metadata_signable = tuf.util.load_json_string(metadata) + except Exception, exception: + raise tuf.InvalidMetadataJSONError(exception) + else: + # Ensure the loaded 'metadata_signable' is properly formatted. + tuf.formats.check_signable_object_format(metadata_signable) + + # Is 'metadata_signable' newer than the currently installed + # version? + current_metadata_role = self.metadata['current'].get(metadata_role) + + # Compare metadata version numbers. Ensure there is a current + # version of the metadata role to be updated. + if current_metadata_role is not None: + current_version = current_metadata_role['version'] + downloaded_version = metadata_signable['signed']['version'] + if downloaded_version < current_version: + raise tuf.ReplayedMetadataError(metadata_role, downloaded_version, + current_version) + + # Reject the metadata if any specified targets are not allowed. + if metadata_signable['signed']['_type'] == 'Targets': + self._ensure_all_targets_allowed(metadata_role, + metadata_signable['signed']) + + # Verify the signature on the downloaded metadata object. + valid = tuf.sig.verify(metadata_signable, metadata_role) + if not valid: + raise tuf.BadSignatureError() + + + + + + def unsafely_get_metadata_file(self, metadata_role, metadata_filepath, + file_length): + """ + + Unsafely download a metadata file up to a certain length. The actual file + length may not be strictly equal to its expected length. File hashes will + not be checked because it is expected to be unknown. + + + metadata_role: + The role name of the metadata. + + metadata_filepath: + The relative metadata filepath. + + file_length: + The expected length of the metadata file. + + + tuf.NoWorkingMirrorError: + The metadata could not be fetched. This is raised only when all known + mirrors failed to provide a valid copy of the desired metadata file. + + + The metadata file is downloaded from all known repository mirrors in the + worst case. If a valid copy of the metadata file is found, it is stored + in a temporary file and returned. + + + A tuf.util.TempFile file-like object containing the metadata. + + """ + + def unsafely_verify_metadata_file(metadata_file_object): + self.__verify_metadata_file(metadata_file_object, metadata_role) + + return self.__get_file(metadata_filepath, unsafely_verify_metadata_file, + 'meta', file_length, download_safely=False, + compression=None) + + + + + + def safely_get_metadata_file(self, metadata_role, metadata_filepath, + file_length, file_hashes, compression): + """ + + Safely download a metadata file up to a certain length, and check its + hashes thereafter. + + + metadata_role: + The role name of the metadata. + + metadata_filepath: + The relative metadata filepath. + + file_length: + The expected length of the metadata file. + + file_hashes: + The expected hashes of the metadata file. + + compression: + The name of the compression algorithm used to compress the metadata. + + + tuf.NoWorkingMirrorError: + The metadata could not be fetched. This is raised only when all known + mirrors failed to provide a valid copy of the desired metadata file. + + + The metadata file is downloaded from all known repository mirrors in the + worst case. If a valid copy of the metadata file is found, it is stored + in a temporary file and returned. + + + A tuf.util.TempFile file-like object containing the metadata. + + """ + + def safely_verify_metadata_file(metadata_file_object): + self.__check_hashes(metadata_file_object, file_hashes) + self.__verify_metadata_file(metadata_file_object, metadata_role) + + return self.__get_file(metadata_filepath, safely_verify_metadata_file, + 'meta', file_length, download_safely=True, + compression=compression) + + + + + + # TODO: Instead of the more fragile 'download_safely' switch, unroll the + # function into two separate ones: one for "safe" download, and the other one + # for "unsafe" download? This should induce safer and more readable code. + def __get_file(self, filepath, verify_file, file_type, + file_length, download_safely, compression): + """ + + Try downloading, up to a certain length, a metadata or target file from a + list of known mirrors. As soon as the first valid copy of the file is + found, the rest of the mirrors will be skipped. + + + filepath: + The relative metadata or target filepath. + + verify_file: + A function which expects a file-like object and which will raise an + exception in case the file is not valid for any reason. + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to NAME_SCHEMA + format. + + file_length: + The expected length of the metadata or target file. + + download_safely: + A boolean switch to toggle safe or unsafe download of the file. + + compression: + The name of the compression algorithm used to compress the file. + + + tuf.NoWorkingMirrorError: + The metadata could not be fetched. This is raised only when all known + mirrors failed to provide a valid copy of the desired metadata file. + + + The file is downloaded from all known repository mirrors in the worst + case. If a valid copy of the file is found, it is stored in a temporary + file and returned. + + + A tuf.util.TempFile file-like object containing the metadata or target. + + """ + + file_mirrors = tuf.mirrors.get_list_of_mirrors(file_type, filepath, + self.mirrors) + # file_mirror (URL): error (Exception) + file_mirror_errors = {} + file_object = None + + for file_mirror in file_mirrors: + try: + if download_safely: + file_object = tuf.download.safe_download(file_mirror, file_length) + else: + file_object = tuf.download.unsafe_download(file_mirror, file_length) + + if compression: + file_object.decompress_temp_file_object(compression) + + verify_file(file_object) + + except Exception, exception: + # Remember the error from this mirror, and "reset" the target file. + logger.exception('Download failed from '+file_mirror+'.') + file_mirror_errors[file_mirror] = exception + file_object = None + else: + break + + if file_object: + return file_object + else: + logger.exception('Failed to download {0}: {1}'.format(filepath, + file_mirror_errors)) + raise tuf.NoWorkingMirrorError(file_mirror_errors) + + + + + + def _update_metadata(self, metadata_role, fileinfo, compression=None): """ Download, verify, and 'install' the metadata belonging to 'metadata_role'. @@ -607,6 +953,13 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): Ex: {"hashes": {"sha256": "3a5a6ec1f353...dedce36e0"}, "length": 1340} + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of the required length in 'fileinfo'. True by default. True + by default. We explicitly set this to False when we know that we want + to turn this off for downloading the timestamp metadata, which has no + signed required_length. + compression: A string designating the compression type of 'metadata_role'. The 'release' metadata file may be optionally downloaded and stored in @@ -614,7 +967,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): are considered. Any other string is ignored. - tuf.RepositoryError: + tuf.NoWorkingMirrorError: The metadata could not be updated. This is not specific to a single failure but rather indicates that all possible ways to update the metadata have been tried and failed. @@ -628,29 +981,20 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): None. """ - + # Construct the metadata filename as expected by the download/mirror modules. metadata_filename = metadata_role + '.txt' + uncompressed_metadata_filename = metadata_filename # The 'release' or Targets metadata may be compressed. Add the appropriate # extension to 'metadata_filename'. if compression == 'gzip': metadata_filename = metadata_filename + '.gz' - # Reference to the 'get_list_of_mirrors' function. - get_mirrors = tuf.mirrors.get_list_of_mirrors - - # Reference to the 'download_url_to_tempfileobj' function. - download_file = tuf.download.download_url_to_tempfileobj - # Extract file length and file hashes. They will be passed as arguments # to 'download_file' function. - if fileinfo is not None: - file_length=fileinfo['length'] - file_hashes=fileinfo['hashes'] - else: - file_length=None - file_hashes=None + file_length = fileinfo['length'] + file_hashes = fileinfo['hashes'] # Attempt a file download from each mirror until the file is downloaded and # verified. If the signature of the downloaded file is valid, proceed, @@ -659,74 +1003,28 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): # is the object extracted from 'metadata_file_object'. Metadata saved to # files are regarded as 'signable' objects, conformant to # 'tuf.formats.SIGNABLE_SCHEMA'. - metadata_file_object = None - metadata_signable = None - for mirror_url in get_mirrors('meta', metadata_filename.encode("utf-8"), self.mirrors): - try: - metadata_file_object = download_file(mirror_url, file_hashes, - file_length) - except tuf.DownloadError, e: - logger.warn('Download failed from '+mirror_url+'.') - continue - if compression: - metadata_file_object.decompress_temp_file_object(compression) + # + # Some metadata (presently timestamp) will be downloaded "unsafely", in the + # sense that we can only estimate its true length and know nothing about + # its hashes. This is because not all metadata will have other metadata + # for it; otherwise we will have an infinite regress of metadata signing + # for each other. In this case, we will download the metadata up to the + # best length we can get for it, not check its hashes, but perform the rest + # of the checks (e.g signature verification). + # + # Note also that we presently support decompression of only "safe" + # metadata, but this is easily extend to "unsafe" metadata as well as + # "safe" targets. - # Read and load the downloaded file. - metadata_signable = tuf.util.load_json_string(metadata_file_object.read()) - - # Verify the signature on the downloaded metadata object. - try: - valid = tuf.sig.verify(metadata_signable, metadata_role) - except (tuf.UnknownRoleError, tuf.FormatError, tuf.Error), e: - # FIXME: Exception.message is deprecated in 2.6, and gone in 3.0, - # but this is a workaround for Unicode messages. We need a long-term - # solution with #61. - # http://bugs.python.org/issue2517 - message = 'Unable to verify '+metadata_filename+':'+e.message.encode("utf-8") - logger.exception(message) - metadata_signable = None - continue - else: - if valid: - logger.debug('Good signature on '+mirror_url+'.') - break - else: - logger.warn('Bad signature on '+mirror_url+'.') - metadata_signable = None - continue - - # Raise an exception if a valid metadata signable could not be downloaded - # from any of the mirrors. - if metadata_signable is None: - message = 'Unable to update '+repr(metadata_filename)+'.' - logger.error(message) - raise tuf.RepositoryError(message) - - # Ensure the loaded 'metadata_signable' is properly formatted. - try: - tuf.formats.check_signable_object_format(metadata_signable) - except tuf.FormatError, e: - message = 'Unable to load '+repr(metadata_filename)+' after update: '+str(e) - raise tuf.RepositoryError(message) - - # Is 'metadata_signable' newer than the currently installed - # version? - current_metadata_role = self.metadata['current'].get(metadata_role) - - # Compare metadata version numbers. Ensure there is a current - # version of the metadata role to be updated. - if current_metadata_role is not None: - current_version = current_metadata_role['version'] - downloaded_version = metadata_signable['signed']['version'] - if downloaded_version < current_version: - message = repr(mirror_url)+' is older than the version currently '+\ - 'installed.\nDownloaded version: '+repr(downloaded_version)+'\n'+\ - 'Current version: '+repr(current_version) - raise tuf.RepositoryError(message) - - # Reject the metadata if any specified targets are not allowed. - if metadata_signable['signed']['_type'] == 'Targets': - self._ensure_all_targets_allowed(metadata_role, metadata_signable['signed']) + if metadata_role == 'timestamp': + metadata_file_object = \ + self.unsafely_get_metadata_file(metadata_role, metadata_filename, + file_length) + else: + metadata_file_object = \ + self.safely_get_metadata_file(metadata_role, metadata_filename, + file_length, file_hashes, + compression=compression) # The metadata has been verified. Move the metadata file into place. # First, move the 'current' metadata file to the 'previous' directory @@ -747,8 +1045,15 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): # Next, move the verified updated metadata file to the 'current' directory. # Note that the 'move' method comes from tuf.util's TempFile class. # 'metadata_file_object' is an instance of tuf.util.TempFile. - metadata_file_object.move(current_filepath) - + metadata_signable = tuf.util.load_json_string(metadata_file_object.read()) + if compression == 'gzip': + current_uncompressed_filepath = os.path.join(self.metadata_directory['current'], + uncompressed_metadata_filename) + current_uncompressed_filepath = os.path.abspath(current_uncompressed_filepath) + metadata_file_object.move(current_uncompressed_filepath) + else: + metadata_file_object.move(current_filepath) + # Extract the metadata object so we can store it to the metadata store. # 'current_metadata_object' set to 'None' if there is not an object # stored for 'metadata_role'. @@ -756,7 +1061,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): current_metadata_object = self.metadata['current'].get(metadata_role) # Finally, update the metadata and fileinfo stores. - logger.debug('Updated '+current_filepath+'.') + logger.debug('Updated '+repr(current_filepath)+'.') self.metadata['previous'][metadata_role] = current_metadata_object self.metadata['current'][metadata_role] = updated_metadata_object self._update_fileinfo(metadata_filename) @@ -764,6 +1069,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): + def _update_metadata_if_changed(self, metadata_role, referenced_metadata='release'): """ @@ -802,7 +1108,7 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas is 'timestamp'. See refresh(). - tuf.MetadataNotAvailableError: + tuf.NoWorkingMirrorError: If 'metadata_role' could not be downloaded after determining that it had changed. @@ -822,60 +1128,75 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas """ - metadata_filename = metadata_role + '.txt' + uncompressed_metadata_filename = metadata_role + '.txt' - # Need to ensure the referenced metadata has been loaded. - # The 'root' role may be updated without having 'release' - # available. + # Ensure the referenced metadata has been loaded. The 'root' role may be + # updated without having 'release' available. if referenced_metadata not in self.metadata['current']: - if metadata_role == 'root': - new_fileinfo = None - else: - message = 'Cannot update '+repr(metadata_role)+' because ' \ - +referenced_metadata+' is missing.' - raise tuf.RepositoryError(message) + message = 'Cannot update '+repr(metadata_role)+' because ' \ + +referenced_metadata+' is missing.' + raise tuf.RepositoryError(message) # The referenced metadata has been loaded. Extract the new # fileinfo for 'metadata_role' from it. else: - new_fileinfo = self.metadata['current'][referenced_metadata] \ - ['meta'][metadata_filename] - - # Simply return if the fileinfo has not changed according to the - # fileinfo provided by the referenced metadata. - if not self._fileinfo_has_changed(metadata_filename, new_fileinfo): - return - - logger.info('Metadata '+repr(metadata_filename)+' has changed.') - + message = repr(metadata_role)+' referenced in '+\ + repr(referenced_metadata)+'. '+repr(metadata_role)+' may be updated.' + logger.debug(message) + # There might be a compressed version of 'release.txt' or Targets # metadata available for download. Check the 'meta' field of # 'referenced_metadata' to see if it is listed when 'metadata_role' - # is 'release'. Check the 'meta' field of 'release' when 'metadata_role' - # is Targets metadata. The full rolename for delegated Targets metadata + # is 'release'. The full rolename for delegated Targets metadata # must begin with 'targets/'. The Release role lists all the Targets # metadata available on the repository, including any that may be in # compressed form. compression = None - gzip_path = metadata_filename + '.gz' - if metadata_role == 'release': - if gzip_path in self.metadata['current'][referenced_metadata]['meta']: - compression = 'gzip' - # Check for available compressed versions of 'targets.txt' and delegated - # Targets, which also start with 'targets'. - elif metadata_role.startswith('targets'): - # For 'targets.txt' and delegated metadata, 'referenced_metata' - # should always be 'release'. 'release.txt' specifies all roles - # provided by a repository, including their file sizes and hashes. - if gzip_path in self.metadata['current'][referenced_metadata]['meta']: + + # Extract the fileinfo of the uncompressed version of 'metadata_role'. + uncompressed_fileinfo = self.metadata['current'][referenced_metadata] \ + ['meta'] \ + [uncompressed_metadata_filename] + + # Check for availability of compressed versions of 'release.txt', + # 'targets.txt', and delegated Targets, which also start with 'targets'. + # For 'targets.txt' and delegated metadata, 'referenced_metata' + # should always be 'release'. 'release.txt' specifies all roles + # provided by a repository, including their file sizes and hashes. + if metadata_role == 'release' or metadata_role.startswith('targets'): + gzip_metadata_filename = uncompressed_metadata_filename + '.gz' + if gzip_metadata_filename in self.metadata['current'] \ + [referenced_metadata]['meta']: compression = 'gzip' + compressed_fileinfo = self.metadata['current'][referenced_metadata] \ + ['meta'][gzip_metadata_filename] + # NOTE: When we download the compressed file, we care about its + # compressed length. However, we check the hash of the decompressed + # file; therefore we use the hashes of the uncompressed file. + fileinfo = {'length': compressed_fileinfo['length'], + 'hashes': uncompressed_fileinfo['hashes']} + logger.debug('Compressed version of '+\ + repr(uncompressed_metadata_filename)+' is available at '+\ + repr(gzip_metadata_filename)+'.') + else: + logger.debug('Compressed version of '+\ + repr(uncompressed_metadata_filename)+' not available.') + fileinfo = uncompressed_fileinfo else: - message = 'Compressed version of '+repr(metadata_filename)+' not available.' - logger.debug(message) + fileinfo = uncompressed_fileinfo + + # Simply return if the file has not changed, according to the metadata + # about the uncompressed file provided by the referenced metadata. + if not self._fileinfo_has_changed(uncompressed_metadata_filename, + uncompressed_fileinfo): + return + + logger.debug('Metadata '+repr(uncompressed_metadata_filename)+\ + ' has changed.') try: - self._update_metadata(metadata_role, fileinfo=new_fileinfo, + self._update_metadata(metadata_role, fileinfo=fileinfo, compression=compression) - except tuf.RepositoryError, e: + except: # The current metadata we have is not current but we couldn't # get new metadata. We shouldn't use the old metadata anymore. # This will get rid of in-memory knowledge of the role and @@ -885,8 +1206,8 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas # We shouldn't need to, but we need to check the trust # implications of the current implementation. self._delete_metadata(metadata_role) - message = 'Metadata for '+repr(metadata_role)+' could not be updated: ' - raise tuf.MetadataNotAvailableError(message+str(e)) + logger.error('Metadata for '+str(metadata_role)+' could not be updated') + raise else: # We need to remove delegated roles because the delegated roles # may not be trusted anymore. @@ -917,6 +1238,10 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): also validated (i.e., its calculated path hash prefix must be delegated by the parent role. + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + metadata_role: The name of the metadata. This is a role name and should not end @@ -929,11 +1254,11 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): 'signable' object). - tuf.RepositoryError: + tuf.ForbiddenTargetError: If the targets of 'metadata_role' are not allowed according to - the parent's metadata file. The 'paths' and 'path_hash_prefix' fields - are verified. - + the parent's metadata file. The 'paths' and 'path_hash_prefixes' + attributes are verified. + None. @@ -941,13 +1266,6 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): None. """ - - # The algorithm used by the repository to generate the hashes of the - # target filepaths. The repository may optionally organize - # targets into hashed bins to ease target delegations and role metadata - # management. The use of consistent hashing allows for a uniform - # distribution of targets into bins. - HASH_PATH_ALGORITHM = 'sha256' # Return if 'metadata_role' is 'targets'. 'targets' is not # a delegated role. @@ -965,33 +1283,25 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): roles = self.metadata['current'][parent_role]['delegations']['roles'] role_index = tuf.repo.signerlib.find_delegated_role(roles, metadata_role) - # Ensure the delegated role exists prior to extracting trusted paths - # from the parent's 'paths', or trusted path hash prefixes from the parent's - # 'path_hash_prefix'. + # Ensure the delegated role exists prior to extracting trusted paths from + # the parent's 'paths', or trusted path hash prefixes from the parent's + # 'path_hash_prefixes'. if role_index is not None: role = roles[role_index] allowed_child_paths = role.get('paths') - allowed_child_path_hash_prefix = role.get('path_hash_prefix') + allowed_child_path_hash_prefixes = role.get('path_hash_prefixes') actual_child_targets = metadata_object['targets'].keys() - - if allowed_child_path_hash_prefix is not None: - for child_target in actual_child_targets: - # Calculate the hash of 'child_target' to determine if it has been - # placed in the correct bin. The client currently assumes the - # repository uses 'HASH_PATH_ALGORITHM' to generate hashes. - # TODO: Should the TUF spec restrict the repository to one particular - # algorithm? Should we allow the repository to specify in the role - # dictionary the algorithm used for these generated hashed paths? - digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM) - digest_object.update(child_target) - child_target_path_hash = digest_object.hexdigest() - if not child_target_path_hash.startswith(allowed_child_path_hash_prefix): - message = 'Role '+repr(metadata_role)+' specifies target '+\ - repr(child_target)+ ' which does not have a path hash prefix '+\ - 'matching the prefix listed by the parent role '+\ - repr(parent_role)+'.' - raise tuf.RepositoryError(message) + if allowed_child_path_hash_prefixes is not None: + consistent = self._paths_are_consistent_with_hash_prefixes + if not consistent(actual_child_targets, + allowed_child_path_hash_prefixes): + raise tuf.ForbiddenTargetError('Role '+repr(metadata_role)+\ + ' specifies target which does not'+\ + ' have a path hash prefix matching'+\ + ' the prefix listed by the parent'+\ + ' role '+repr(parent_role)+'.') + elif allowed_child_paths is not None: # Check that each delegated target is either explicitly listed or a parent @@ -1007,26 +1317,77 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): if prefix == allowed_child_path: break else: - message = 'Role '+repr(metadata_role)+' specifies target '+\ - repr(child_target)+' which is not an allowed path according '+\ - 'to the delegations set by '+repr(parent_role)+'.' - raise tuf.RepositoryError(message) + raise tuf.ForbiddenTargetError('Role '+repr(metadata_role)+\ + ' specifies target '+\ + repr(child_target)+' which is not'+\ + ' an allowed path according to'+\ + ' the delegations set by '+\ + repr(parent_role)+'.') + else: - + # 'role' should have been validated when it was downloaded. - # The 'paths' or 'path_hash_prefix' fields should not be missing, - # so log a warning if this else clause is reached. - message = repr(role)+' unexpectedly did not contain one of '+\ - 'the required fields ("paths" or "path_hash_prefix").' - logger.warn(message) + # The 'paths' or 'path_hash_prefixes' attributes should not be missing, + # so raise an error in case this clause is reached. + raise tuf.FormatError(repr(role)+' did not contain one of '+\ + 'the required fields ("paths" or '+\ + '"path_hash_prefixes").') # Raise an exception if the parent has not delegated to the specified # 'metadata_role' child role. else: - message = repr(parent_role)+' has not delegated to '+\ - repr(metadata_role)+'.' - raise tuf.RepositoryError(message) - + raise tuf.RepositoryError(repr(parent_role)+' has not delegated to '+\ + repr(metadata_role)+'.') + + + + + + def _paths_are_consistent_with_hash_prefixes(self, paths, + path_hash_prefixes): + """ + + Determine whether a list of paths are consistent with theirs alleged + path hash prefixes. By default, the SHA256 hash function will be used. + + + paths: + A list of paths for which their hashes will be checked. + + path_hash_prefixes: + The list of path hash prefixes with which to check the list of paths. + + + No known exceptions. + + + No known side effects. + + + A Boolean indicating whether or not the paths are consistent with the + hash prefix. + """ + + # Assume that 'paths' and 'path_hash_prefixes' are inconsistent until + # proven otherwise. + consistent = False + + if len(paths) > 0 and len(path_hash_prefixes) > 0: + for path in paths: + path_hash = self._get_target_hash(path) + # Assume that every path is inconsistent until proven otherwise. + consistent = False + + for path_hash_prefix in path_hash_prefixes: + if path_hash.startswith(path_hash_prefix): + consistent = True + break + + # This path has no matching path_hash_prefix. Stop looking further. + if not consistent: break + + return consistent + @@ -1055,7 +1416,7 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo): dict conforms to 'tuf.formats.FILEINFO_SCHEMA' and has the form: {'length': 23423 - 'hashes': {'sha256': /dfbc32343..}} + 'hashes': {'sha256': adfbc32343..}} None. @@ -1080,11 +1441,6 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo): if self.fileinfo.get(metadata_filename) is None: return True - # 'new_fileinfo' should only be 'None' if updating 'root.txt' - # without having 'release.txt'. - if new_fileinfo is None: - return True - current_fileinfo = self.fileinfo[metadata_filename] if current_fileinfo['length'] != new_fileinfo['length']: @@ -1113,7 +1469,7 @@ def _update_fileinfo(self, metadata_filename): Update the 'self.fileinfo' entry for the metadata belonging to 'metadata_filename'. If the 'current' metadata for 'metadata_filename' - cannot be loaded, set the its fileinfo' to 'None' to signal that + cannot be loaded, set its fileinfo' to 'None' to signal that it is not in the 'self.fileinfo' AND it also doesn't exist locally. @@ -1126,7 +1482,7 @@ def _update_fileinfo(self, metadata_filename): The file details of 'metadata_filename' is calculated and - stored to the 'self.fileinfo' store. + stored in 'self.fileinfo'. None. @@ -1184,11 +1540,11 @@ def _move_current_to_previous(self, metadata_role): metadata_filepath) current_filepath = os.path.join(self.metadata_directory['current'], metadata_filepath) - + # Remove the previous path if it exists. if os.path.exists(previous_filepath): os.remove(previous_filepath) - + # Move the current path to the previous path. if os.path.exists(current_filepath): tuf.util.ensure_parent_dir(previous_filepath) @@ -1277,7 +1633,7 @@ def _ensure_not_expired(self, metadata_role): # convert it to seconds since the epoch, which is the time format # returned by time.time() (i.e., current time), before comparing. if tuf.formats.parse_time(expires) < time.time(): - message = 'Metadata '+repr(rolepath)+' expired on '+expires+' UTC.' + message = 'Metadata '+repr(rolepath)+' expired on '+repr(expires)+'.' raise tuf.ExpiredMetadataError(message) @@ -1416,6 +1772,82 @@ def _refresh_targets_metadata(self, rolename='targets', include_delegations=Fals + def refresh_targets_metadata_chain(self, rolename): + """ + Proof-of-concept. + + """ + + # List of parent roles to update. + parent_roles = [] + + parts = rolename.split('/') + + # Append the first role to the list. + parent_roles.append(parts[0]) + + # The 'roles_added' string contains the roles already added. If 'a' and 'a/b' + # have been added to 'parent_roles', 'roles_added' would contain 'a/b' + roles_added = parts[0] + + # Add each subsequent role to the previous string (with a '/' separator). + # This only goes to -1 because we only want to return the parents (so we + # ignore the last element). + for next_role in parts[1:-1]: + parent_roles.append(roles_added+'/'+next_role) + roles_added = roles_added+'/'+next_role + + message = 'Minimum metadata to download to set chain of trust: '+\ + repr(parent_roles)+'.' + logger.info(message) + + # See if this role provides metadata. All the available roles + # on the repository are specified in the 'release.txt' metadata. + targets_metadata_allowed = self.metadata['current']['release']['meta'].keys() + for parent_role in parent_roles: + parent_role = parent_role + '.txt' + + if parent_role not in targets_metadata_allowed: + message = '"release.txt" does not provide all the parent roles'+\ + 'of '+repr(rolename)+'.' + raise tuf.Repository(message) + + # Remove the 'targets' role because it gets updated when the targets.txt + # file is updated in _update_metadata_if_changed('targets'). + if rolename == 'targets': + try: + parent_roles.remove('targets') + except ValueError: + message = 'The Release metadata file is missing the "targets.txt" entry.' + raise tuf.RepositoryError(message) + + # If there is nothing to refresh, we are done. + if not parent_roles: + return + + # Sort the roles so that parent roles always come first. + parent_roles.sort() + logger.debug('Roles to update: '+repr(parent_roles)+'.') + + # Iterate over 'roles_to_update', load its metadata + # file, and update it if it has changed. + for rolename in parent_roles: + self._load_metadata_from_file('previous', rolename) + self._load_metadata_from_file('current', rolename) + + self._update_metadata_if_changed(rolename) + + # Remove the role if it has expired. + try: + self._ensure_not_expired(rolename) + except tuf.ExpiredMetadataError: + tuf.roledb.remove_role(rolename) + + + + + + def _targets_of_role(self, rolename, targets=None, skip_refresh=False): """ @@ -1538,10 +1970,7 @@ def targets_of_role(self, rolename='targets'): def target(self, target_filepath): """ - Return the target file information for 'target_filepath'. We interrogate - the tree of target delegations in order of appearance (which implicitly - order trustworthiness), and return the matching target found in the most - trusted role. + Return the target file information for 'target_filepath'. target_filepath: @@ -1553,14 +1982,58 @@ def target(self, target_filepath): tuf.FormatError: If 'target_filepath' is improperly formatted. - tuf.RepositoryError: + tuf.UnknownTargetError: If 'target_filepath' was not found. - Exception: - In case of an unforeseen runtime error. + Any other unforeseen runtime exception. + + + The metadata for updated delegated roles are downloaded and stored. + + + The target information for 'target_filepath', conformant to + 'tuf.formats.TARGETFILE_SCHEMA'. - TODO: Update these exceptions once the final 'path_hash_prefix' - changes have been implemented. + """ + + # Does 'target_filepath' have the correct format? + # Raise 'tuf.FormatError' if there is a mismatch. + tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) + + # Get target by looking at roles in order of priority tags. + target = self._preorder_depth_first_walk(target_filepath) + + # Raise an exception if the target information could not be retrieved. + if target is None: + message = target_filepath+' not found.' + logger.error(message) + raise tuf.UnknownTargetError(message) + # Otherwise, return the found target. + else: + return target + + + + + + def _preorder_depth_first_walk(self, target_filepath): + """ + + Interrogate the tree of target delegations in order of appearance (which + implicitly order trustworthiness), and return the matching target + found in the most trusted role. + + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + tuf.FormatError: + If 'target_filepath' is improperly formatted. + + tuf.RepositoryError: + If 'target_filepath' was not found. The metadata for updated delegated roles are downloaded and stored. @@ -1571,123 +2044,239 @@ def target(self, target_filepath): """ - # Does 'target_filepath' have the correct format? - # Raise 'tuf.FormatError' if there is a mismatch. - tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) + target = None + current_metadata = self.metadata['current'] + role_names = ['targets'] - # The algorithm used by the repository to generate the hashes of the - # target filepaths. The repository may optionally organize - # targets into hashed bins to ease target delegations and role metadata - # management. The use of consistent hashing allows for a uniform - # distribution of targets into bins. - HASH_PATH_ALGORITHM = 'sha256' - # Ensure the client has the most up-to-date version of 'targets.txt'. - # Raise 'tuf.MetadataNotAvailableError' if the changed metadata - # cannot be successfully downloaded and 'tuf.RepositoryError' if the - # referenced metadata is missing. Target methods such as this one - # are called after the top-level metadata have been refreshed (i.e., - # updater.refresh()). + # Raise 'tuf.NoWorkingMirrorError' if the changed metadata cannot be successfully + # downloaded and 'tuf.RepositoryError' if the referenced metadata is + # missing. Target methods such as this one are called after the top-level + # metadata have been refreshed (i.e., updater.refresh()). self._update_metadata_if_changed('targets') - # The target is assumed to be missing until proven otherwise. + # Preorder depth-first traversal of the tree of target delegations. + while len(role_names) > 0 and target is None: + + # Pop the role name from the top of the stack. + role_name = role_names.pop(-1) + + # The metadata for 'role_name' must be downloaded/updated before + # its targets, delegations, and child roles can be inspected. + # self.metadata['current'][role_name] is currently missing. + # _refresh_targets_metadata() does not refresh 'targets.txt', it + # expects _update_metadata_if_changed() to have already refreshed it, + # which this function has checked above. + self._refresh_targets_metadata(role_name, include_delegations=False) + + role_metadata = current_metadata[role_name] + targets = role_metadata['targets'] + delegations = role_metadata.get('delegations', {}) + child_roles = delegations.get('roles', []) + target = self._get_target_from_targets_role(role_name, targets, + target_filepath) + + if target is None: + + # Push children in reverse order of appearance onto the stack. + # NOTE: This may be a slow operation if there are many delegated roles. + for child_role in reversed(child_roles): + child_role_name = self._visit_child_role(child_role, target_filepath) + if child_role_name is None: + logger.debug('Skipping child role '+repr(child_role_name)) + else: + logger.debug('Adding child role '+repr(child_role_name)) + role_names.append(child_role_name) + + else: + logger.debug('Found target in current role '+repr(role_name)) + + return target + + + + + + def _get_target_from_targets_role(self, role_name, targets, target_filepath): + """ + + Determine whether the targets role with the given 'role_name' has the + target with the name 'target_filepath'. + + + role_name: + The name of the targets role that we are inspecting. + + targets: + The targets of the Targets role with the name 'role_name'. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + The target information for 'target_filepath', conformant to + 'tuf.formats.TARGETFILE_SCHEMA'. + + """ + target = None + # Does the current role name have our target? + logger.debug('Asking role '+repr(role_name)+' about target '+\ + repr(target_filepath)) + for filepath, fileinfo in targets.iteritems(): + if filepath == target_filepath: + logger.debug('Found target '+target_filepath+' in role '+role_name) + target = {'filepath': filepath, 'fileinfo': fileinfo} + break + else: + logger.debug('No target '+target_filepath+' in role '+role_name) + + return target + + + + + + + def _visit_child_role(self, child_role, target_filepath): + """ + + Determine whether the given 'child_role' has been delegated the target + with the name 'target_filepath'. + + Ensure that we explore only delegated roles trusted with the target. We + assume conservation of delegated paths in the complete tree of + delegations. Note that the call to _ensure_all_targets_allowed in + __verify_metadata_file should already ensure that all targets metadata is + valid; i.e. that the targets signed by a delegatee is a proper subset of + the targets delegated to it by the delegator. Nevertheless, we check it + again here for performance and safety reasons. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + + """ + + child_role_name = child_role['name'] + child_role_paths = child_role.get('paths') + child_role_path_hash_prefixes = child_role.get('path_hash_prefixes') + # A boolean indicator that tell us whether 'child_role' has been delegated + # the target with the name 'target_filepath'. + child_role_is_relevant = False + + if child_role_path_hash_prefixes is not None: + target_filepath_hash = self._get_target_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if target_filepath_hash.startswith(child_role_path_hash_prefix): + child_role_is_relevant = True + + elif child_role_paths is not None: + for child_role_path in child_role_paths: + # A child role path may be a filepath or directory. The child + # role 'child_role_name' is added if 'target_filepath' is located + # under 'child_role_path'. Explicit filepaths are also added. + prefix = os.path.commonprefix([target_filepath, child_role_path]) + if prefix == child_role_path: + child_role_is_relevant = True + + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise tuf.FormatError(repr(child_role_name)+' has neither ' \ + '"paths" nor "path_hash_prefixes"!') + + if child_role_is_relevant: + logger.debug('Child role '+repr(child_role_name)+' has target '+ + repr(target_filepath)) + return child_role_name + else: + logger.debug('Child role '+repr(child_role_name)+ + ' does not have target '+repr(target_filepath)) + return None + + + + + + def _get_target_hash(self, target_filepath, hash_function='sha256'): + """ + + Compute the hash of 'target_filepath'. This is useful in conjunction with + the "path_hash_prefixes" attribute in a delegated targets role, which + tells us which paths it is implicitly responsible for. + + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + hash_function: + The algorithm used by the repository to generate the hashes of the + target filepaths. The repository may optionally organize targets into + hashed bins to ease target delegations and role metadata management. + The use of consistent hashing allows for a uniform distribution of + targets into bins. + + + None. + + + None. + + + The hash of 'target_filepath'. + + """ + # Calculate the hash of the filepath to determine which bin to find the # target. The client currently assumes the repository uses - # 'HASH_PATH_ALGORITHM' to generate hashes. - # TODO: Should the TUF spec restrict the repository to one particular - # algorithm? Should we allow the repository to specify in the role - # dictionary the algorithm used for these generated hashed paths? - digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM) - digest_object.update(target_filepath) - target_file_path_hash = digest_object.hexdigest() + # 'hash_function' to generate hashes. + + digest_object = tuf.hash.digest(hash_function) try: - current_metadata = self.metadata['current'] - role_names = ['targets'] + digest_object.update(target_filepath) + except UnicodeEncodeError: + # Sometimes, there are Unicode characters in target paths. We assume a + # UTF-8 encoding and try to hash that. + digest_object = tuf.hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode('utf-8') + digest_object.update(encoded_target_filepath) - # Preorder depth-first traversal of the tree of target delegations. - while len(role_names) > 0 and target is None: - - # Pop the role name from the top of the stack. - role_name = role_names.pop(-1) - - # The metadata for 'role_name' must be downloaded/updated before - # its targets, delegations, and child roles can be inspected. - # self.metadata['current'][role_name] is currently missing. - # _refresh_targets_metadata() does not refresh 'targets.txt', it - # expects _update_metadata_if_changed() to have already refreshed it, - # which this function has checked above. - self._refresh_targets_metadata(role_name, include_delegations=False) - role_metadata = current_metadata[role_name] - targets = role_metadata['targets'] - delegations = role_metadata.get('delegations', {}) - child_roles = delegations.get('roles', []) + target_filepath_hash = digest_object.hexdigest() - # Does the current role name have our target? - logger.info('Asking role '+role_name+' about target '+target_filepath) - for filepath, fileinfo in targets.iteritems(): - if filepath == target_filepath: - logger.info('Found target '+target_filepath+' in role '+role_name) - target = {'filepath': filepath, 'fileinfo': fileinfo} - break - - # Push children in reverse order of appearance onto the stack. - # NOTE: This may be a slow operation if there are many delegated roles - # or bins. - for child_role in reversed(child_roles): - child_role_name = child_role['name'] - child_role_paths = child_role.get('paths') - child_role_path_hash_prefix = child_role.get('path_hash_prefix') - - if child_role_path_hash_prefix is not None: - if target_file_path_hash.startswith(child_role_path_hash_prefix): - - # Found a matching path hash prefix. The metadata for - # 'child_role_name' will be retrieved on the next iteration - # of the while-loop. - role_names.append(child_role_name) - elif child_role_paths is not None: - - # Ensure that we explore only delegated roles trusted with the target. - # We assume conservation of delegated paths in the complete tree of - # delegations. Note that the call to _ensure_all_targets_allowed in - # _update_metadata should already ensure that all targets metadata is - # valid; i.e. that the targets signed by a delegatee is a proper - # subset of the targets delegated to it by the delegator. - # Nevertheless, we check it again here for performance and safety - # reasons. - for child_role_path in child_role_paths: - - # A child role path may be a filepath or directory. The child - # role 'child_role_name' is added if 'target_filepath' is located - # under 'child_role_path'. Explicit filepaths are also added. - prefix = os.path.commonprefix([target_filepath, child_role_path]) - if prefix == child_role_path: - - # The metadata for 'child_role_name' will be retrieved on the next - # iteration of the while-loop. - role_names.append(child_role_name) - else: - - # 'role_name' should have been validated when it was downloaded. - # The 'paths' or 'path_hash_prefix' fields should not be missing, - # so log a warning if this else clause is reached. - message = repr(child_role)+' unexpectedly did not contain one of '+\ - 'the required fields ("paths" or "path_hash_prefix").' - logger.warn(message) - except: - raise - finally: - # Raise an exception if the target information could not be retrieved. - if target is None: - message = target_filepath+' not found.' - logger.error(message) - raise tuf.RepositoryError(message) - # Otherwise, return the found target. - else: - return target + return target_filepath_hash @@ -1837,7 +2426,7 @@ def download_target(self, target, destination_directory): tuf.FormatError: If 'target' is not properly formatted. - tuf.DownloadError: + tuf.NoWorkingMirrorError: If a target could not be downloaded from any of the mirrors. @@ -1855,33 +2444,16 @@ def download_target(self, target, destination_directory): # Raise 'tuf.FormatError' if the check fail. tuf.formats.TARGETFILE_SCHEMA.check_match(target) tuf.formats.PATH_SCHEMA.check_match(destination_directory) - - # Reference to the 'get_list_of_mirrors' function. - get_mirrors = tuf.mirrors.get_list_of_mirrors - - # Reference to the 'download_url_to_tempfileobj' function. - download_file = tuf.download.download_url_to_tempfileobj # Extract the target file information. target_filepath = target['filepath'] trusted_length = target['fileinfo']['length'] trusted_hashes = target['fileinfo']['hashes'] - target_file_object = None - # Iterate through the repositority mirrors until we successfully - # download a target. - for mirror_url in get_mirrors('target', target_filepath, self.mirrors): - try: - target_file_object = download_file(mirror_url, trusted_hashes, - trusted_length) - break - except (tuf.DownloadError, tuf.FormatError), e: - logger.warn('Download failed from '+mirror_url+'.') - target_file_object = None - continue - # We have gone through all the mirrors. Did we get a target file object? - if target_file_object == None: - raise tuf.DownloadError('No download locations known.') + # get_target_file checks every mirror and returns the first target + # that passes verification. + target_file_object = self.get_target_file(target_filepath, trusted_length, + trusted_hashes) # We acquired a target file object from a mirror. Move the file into # place (i.e., locally to 'destination_directory'). @@ -1892,9 +2464,14 @@ def download_target(self, target, destination_directory): try: os.makedirs(target_dirpath) except OSError, e: - if e.errno == errno.EEXIST: - pass - else: - raise - + if e.errno == errno.EEXIST: pass + else: raise + else: + logger.warn(str(target_dirpath)+' does not exist.') + target_file_object.move(destination) + + + + + diff --git a/tuf/conf.py b/tuf/conf.py index 0b897afb..de9ad7f7 100755 --- a/tuf/conf.py +++ b/tuf/conf.py @@ -29,10 +29,29 @@ # not be deleted. At a minimum, each key in the mirrors dictionary # below should have a directory under 'repository_directory' # which already exists and within that directory should have the file -# 'metadata/current/root.txt'. This must be set! +# 'metadata/current/root.txt'. This MUST be set. repository_directory = None # A PEM (RFC 1422) file where you may find SSL certificate authorities # https://en.wikipedia.org/wiki/Certificate_authority # http://docs.python.org/2/library/ssl.html#certificates ssl_certificates = None + +# Since the timestamp role does not have signed metadata about itself, we set a +# default but sane upper bound for the number of bytes required to download it. +DEFAULT_TIMESTAMP_REQUIRED_LENGTH = 2048 #bytes + +# Set a timeout value in seconds (float) for non-blocking socket operations. +SOCKET_TIMEOUT = 1 #seconds + +# The maximum chunk of data, in bytes, we would download in every round. +CHUNK_SIZE = 8192 #bytes + +# The minimum average of download speed (bytes/second) that must be met to +# avoid being considered as a slow retrieval attack. +MIN_AVERAGE_DOWNLOAD_SPEED = CHUNK_SIZE #bytes/second + +# The time (in seconds) we ignore a server with a slow initial retrieval speed. +SLOW_START_GRACE_PERIOD = 30 #seconds + + diff --git a/tuf/download.py b/tuf/download.py index 162c0b7e..5f1f2f30 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -18,122 +18,384 @@ supplied by the metadata of that file. The downloaded file is technically a file-like object that will automatically destroys itself once closed. Note that the file-like object, 'tuf.util.TempFile', is returned by the - 'download_url_to_tempfileobj()' function. + '_download_file()' function. """ +# Induce "true division" (http://www.python.org/dev/peps/pep-0238/). +from __future__ import division + +import httplib import logging import os.path import socket +import time import tuf +import tuf.conf import tuf.hash import tuf.util import tuf.formats from tuf.compatibility import httplib, ssl, urllib2, urlparse + if ssl: from tuf.compatibility import match_hostname else: - raise tuf.Error( "No SSL support!" ) # TODO: degrade gracefully + raise tuf.Error("No SSL support!") # TODO: degrade gracefully +# We will be overriding socket._fileobject to perform non-blocking socket +# reads. Therefore, we will need these global variables. +# http://hg.python.org/cpython/file/5be3fa83d436/Lib/socket.py#l84 + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +try: + import errno +except ImportError: + errno = None +EINTR = getattr(errno, 'EINTR', 4) # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger('tuf.download') -class VerifiedHTTPSConnection( httplib.HTTPSConnection ): + + + +class SaferSocketFileObject(socket._fileobject): + """We override socket._fileobject to produce a file-like object which reads + from a socket more safely than its ancestor. One the safety properties is + that reading from a socket must be a non-blocking operation.""" + + def __init__(self, sock, mode='rb', bufsize=-1, close=False): + super(SaferSocketFileObject, self).__init__(sock, mode=mode, + bufsize=bufsize, close=close) + + # Count the number of bytes received with this socket. + self.__number_of_bytes_received = 0 + # Count the seconds spent receiving with this socket. + self.__seconds_spent_receiving = 0 + # Remember the time a clock was started. + self.__start_time = None + + + + + + def __start_clock(self): """ - A connection that wraps connections with ssl certificate verification. + + Start the clock to measure time difference later. + + + None. + + + AssertionError: + When any internal condition is not true. + + + Start time is kept inside this object. + + + None. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L72 """ - def connect(self): - self.connection_kwargs = {} + # We must have reset the clock before this. + assert self.__start_time is None + # We are using wall time, so it will be imprecise sometimes. + self.__start_time = time.time() - #TODO: refactor compatibility logic into tuf.compatibility? - # for > py2.5 - if hasattr(self, 'timeout'): - self.connection_kwargs.update(timeout = self.timeout) - # for >= py2.7 - if hasattr(self, 'source_address'): - self.connection_kwargs.update(source_address = self.source_address) - sock = socket.create_connection((self.host, self.port), **self.connection_kwargs) - # for >= py2.7 - if getattr(self, '_tunnel_host', None): - self.sock = sock - self._tunnel() + def __stop_clock_and_check_speed(self, data_length): + """ + + Stop the clock and try to detect slow retrieval. - # set location of certificate authorities - assert os.path.isfile( tuf.conf.ssl_certificates ) - cert_path = tuf.conf.ssl_certificates + + data_length: + A nonnegative integer indicating the size of data retrieved in bytes. - # TODO: Disallow SSLv2. - # http://docs.python.org/dev/library/ssl.html#protocol-versions - # TODO: Select the right ciphers. - # http://docs.python.org/dev/library/ssl.html#cipher-selection - self.sock = ssl.wrap_socket(sock, - self.key_file, - self.cert_file, + + tuf.SlowRetrievalError: + When slow retrieval is detected. + + AssertionError: + When any internal condition is not true. + + + Start time is cleared inside this object. + + + None. + + """ + + # We are using wall time, so it will be imprecise sometimes. + stop_time = time.time() + # We must have already started the clock. + assert self.__start_time > 0 + time_delta = stop_time-self.__start_time + # Reset the clock. + self.__start_time = None + + # Measure the average download speed. + self.__number_of_bytes_received += data_length + self.__seconds_spent_receiving += time_delta + average_download_speed = \ + self.__number_of_bytes_received/self.__seconds_spent_receiving + + # If the average download speed is below a certain threshold, we flag this + # as a possible slow-retrieval attack. This threshold will determine our + # bias: if it is too low, we will have more false positives; if it is too + # high, we will have more false negatives. + if average_download_speed < tuf.conf.MIN_AVERAGE_DOWNLOAD_SPEED: + if self.__seconds_spent_receiving <= tuf.conf.SLOW_START_GRACE_PERIOD: + logger.debug('Slow average download speed: '+\ + str(average_download_speed)+' bytes/second') + else: + raise tuf.SlowRetrievalError(average_download_speed) + else: + logger.debug('Good average download speed: '+\ + str(average_download_speed)+' bytes/second') + + + + + + def read(self, size): + """ + + We override the ancestor read (socket._fileobject.read) operation to be a + non-blocking operation. + + Original code is at: + http://hg.python.org/cpython/file/5be3fa83d436/Lib/socket.py#l336 + + + size: + The length of the data chunk that we would like to download. We assume + that the size of the expected data chunk is accurate; otherwise, we are + liable to miscount the number of truly slowly-retrieved chunks. + + + tuf.SlowRetrievalError, in case we detect a slow-retrieval attack. + + Any other exception thrown by socket._fileobject.read. + + + None. + + + Received data up to 'size' bytes. + + """ + + # We should never try to specify a negative size. + assert size >= 0 + + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + # Since we try to detect slow retrieval, this should not be an infinite loop. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + self.__start_clock() + data = self._sock.recv(left) + except socket.timeout: + self.__stop_clock_and_check_speed(0) + continue + except socket.error, e: + if e.args[0] == EINTR: + self.__stop_clock_and_check_speed(0) + continue + raise + else: + self.__stop_clock_and_check_speed(len(data)) + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + + + + +class SaferHTTPResponse(httplib.HTTPResponse): + """A safer version of httplib.HTTPResponse, in which we only use safe socket + file-like objects.""" + + def __init__(self, sock, debuglevel=0, strict=0, method=None, + buffering=False): + httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel, + strict=strict, method=method, + buffering=buffering) + + # Delete the previous socket file-like object... + del self.fp + # ...and replace it with our safer version. + if buffering: + self.fp = SaferSocketFileObject(sock._sock, 'rb') + else: + self.fp = SaferSocketFileObject(sock._sock, 'rb', 0) + + + + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): + """ + A connection that wraps connections with ssl certificate verification. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L72 + """ + + def connect(self): + + self.connection_kwargs = {} + + #TODO: refactor compatibility logic into tuf.compatibility? + + # for > py2.5 + if hasattr(self, 'timeout'): + self.connection_kwargs.update(timeout = self.timeout) + + # for >= py2.7 + if hasattr(self, 'source_address'): + self.connection_kwargs.update(source_address = self.source_address) + + sock = socket.create_connection((self.host, self.port), **self.connection_kwargs) + + # for >= py2.7 + if getattr(self, '_tunnel_host', None): + self.sock = sock + self._tunnel() + + # set location of certificate authorities + assert os.path.isfile( tuf.conf.ssl_certificates ) + cert_path = tuf.conf.ssl_certificates + + # TODO: Disallow SSLv2. + # http://docs.python.org/dev/library/ssl.html#protocol-versions + # TODO: Select the right ciphers. + # http://docs.python.org/dev/library/ssl.html#cipher-selection + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, cert_reqs=ssl.CERT_REQUIRED, ca_certs=cert_path) - match_hostname(self.sock.getpeercert(), self.host) + match_hostname(self.sock.getpeercert(), self.host) + + + + + +class VerifiedHTTPSHandler(urllib2.HTTPSHandler): + """ + A HTTPSHandler that uses our own VerifiedHTTPSConnection. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L109 + """ + + def __init__(self, connection_class = VerifiedHTTPSConnection): + self.specialized_conn_class = connection_class + urllib2.HTTPSHandler.__init__(self) + + def https_open(self, req): + return self.do_open(self.specialized_conn_class, req) -class VerifiedHTTPSHandler( urllib2.HTTPSHandler ): - """ - A HTTPSHandler that uses our own VerifiedHTTPSConnection. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L109 - """ - def __init__(self, connection_class = VerifiedHTTPSConnection): - self.specialized_conn_class = connection_class - urllib2.HTTPSHandler.__init__(self) - def https_open(self, req): - return self.do_open(self.specialized_conn_class, req) def _get_request(url): - """ - Wraps the URL to retrieve to protects against "creative" - interpretation of the RFC: http://bugs.python.org/issue8732 + """ + Wraps the URL to retrieve to protects against "creative" + interpretation of the RFC: http://bugs.python.org/issue8732 - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L147 - """ + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L147 + """ - return urllib2.Request(url, headers={'Accept-encoding': 'identity'}) + return urllib2.Request(url, headers={'Accept-encoding': 'identity'}) -def _get_opener( scheme = None ): - """ - Build a urllib2 opener based on whether the user now wants SSL. - https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L178 - """ - if scheme == "https": - assert os.path.isfile( tuf.conf.ssl_certificates ) - # If we are going over https, use an opener which will provide SSL - # certificate verification. - https_handler = VerifiedHTTPSHandler() - opener = urllib2.build_opener( https_handler ) +def _get_opener(scheme=None): + """ + Build a urllib2 opener based on whether the user now wants SSL. + + https://github.com/pypa/pip/blob/d0fa66ecc03ab20b7411b35f7c7b423f31f77761/pip/download.py#L178 + """ + + if scheme == "https": + assert os.path.isfile(tuf.conf.ssl_certificates) + + # If we are going over https, use an opener which will provide SSL + # certificate verification. + https_handler = VerifiedHTTPSHandler() + opener = urllib2.build_opener(https_handler) + + # strip out HTTPHandler to prevent MITM spoof + for handler in opener.handlers: + if isinstance(handler, urllib2.HTTPHandler): + opener.handlers.remove(handler) + else: + # Otherwise, use the default opener. + opener = urllib2.build_opener() + + return opener + - # strip out HTTPHandler to prevent MITM spoof - for handler in opener.handlers: - if isinstance( handler, urllib2.HTTPHandler ): - opener.handlers.remove( handler ) - else: - # Otherwise, use the default opener. - opener = urllib2.build_opener() - return opener def _open_connection(url): @@ -152,7 +414,7 @@ def _open_connection(url): URL string (e.g., 'http://...' or 'ftp://...' or 'file://...') - tuf.DownloadError + None. Opens a connection to a remote server. @@ -161,78 +423,30 @@ def _open_connection(url): File-like object. """ - - try: - # urllib2.Request produces a Request object that allows for a finer control - # of the requesting process. Request object allows to add headers or data to - # the HTTP request. For instance, request method add_header(key, val) can be - # used to change/spoof 'User-Agent' from default Python-urllib/x.y to - # 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if - # servers do not recognize connections that originates from - # Python-urllib/x.y. - parsed_url = urlparse.urlparse( url ) - opener = _get_opener( scheme = parsed_url.scheme ) - request = _get_request( url ) - return opener.open( request ) - except Exception, e: - raise tuf.DownloadError(e) + # urllib2.Request produces a Request object that allows for a finer control + # of the requesting process. Request object allows to add headers or data to + # the HTTP request. For instance, request method add_header(key, val) can be + # used to change/spoof 'User-Agent' from default Python-urllib/x.y to + # 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' this can be useful if + # servers do not recognize connections that originates from + # Python-urllib/x.y. + + parsed_url = urlparse.urlparse(url) + opener = _get_opener(scheme=parsed_url.scheme) + request = _get_request(url) + return opener.open(request) -def _check_hashes(input_file, trusted_hashes): - """ - - Helper function that verifies multiple secure hashes of the downloaded file. - If any of these fail it raises an exception. This is to conform with the - TUF specs, which support clients with different hashing algorithms. The - 'hash.py' module is used to compute the hashes of the 'input_file'. - - - input_file: - A file or file-like object. - - trusted_hashes: - A dictionary with hash-algorithm names as keys and hashes as dict values. - The hashes should be in the hexdigest format. - - - tuf.BadHashError, if the hashes don't match. - - - Hash digest object is created using the 'tuf.hash' module. - - - None. - - """ - # Verify each trusted hash of 'trusted_hashes'. Raise exception if - # any of the hashes are incorrect and return if all are correct. - for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = tuf.hash.digest(algorithm) - digest_object.update(input_file.read()) - computed_hash = digest_object.hexdigest() - if trusted_hash != computed_hash: - msg = 'Hashes do not match. Expected '+trusted_hash+' got '+computed_hash - raise tuf.BadHashError(msg) - else: - logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash) - - return - - - - - -def _download_fixed_amount_of_data(connection, temp_file, file_length, - required_length): +def _download_fixed_amount_of_data(connection, temp_file, required_length): """ This is a helper function, where the download really happens. While-block reads data from connection a fixed chunk of data at a time, or less, until - 'file_length' is reached. + 'required_length' is reached. connection: @@ -243,9 +457,6 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, A temporary file where the contents at the URL specified by the 'connection' object will be stored. - file_length: - The number of bytes that the server claims is the size of the file. - required_length: The number of bytes that we must download for the file. This is almost always specified by the TUF metadata for the data file in question @@ -265,9 +476,6 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, """ - # The maximum chunk of data, in bytes, we would download in every round. - BLOCK_SIZE = 8192 - # Keep track of total bytes downloaded. total_downloaded = 0 @@ -276,22 +484,17 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, # We download a fixed chunk of data in every round. This is so that we # can defend against slow retrieval attacks. Furthermore, we do not wish # to download an extremely large file in one shot. - data = connection.read(min(BLOCK_SIZE, file_length-total_downloaded)) + amount_to_read = min(tuf.conf.CHUNK_SIZE, + required_length-total_downloaded) + logger.debug('Reading next chunk...') + data = connection.read(amount_to_read) # We might have no more data to read. Check number of bytes downloaded. if not data: message = 'Downloaded '+str(total_downloaded)+'/'+ \ - str(file_length)+' bytes.' + str(required_length)+' bytes.' logger.debug(message) - # Did we download the correct amount indicated by 'Content-Length' - # or user? Because file_length is always eaqual to required_length - # we just need check one of them. - if total_downloaded != file_length: - message = 'Downloaded '+str(total_downloaded)+'. Expected '+ \ - str(file_length)+' for '+url - raise tuf.DownloadError(message) - # Finally, we signal that the download is complete. break @@ -303,14 +506,169 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length, else: return total_downloaded finally: + # Whatever happens, make sure that we always close the connection. connection.close() -def download_url_to_tempfileobj(url, required_hashes=None, - required_length=None): +def _get_content_length(connection): + """ + + A helper function that gets the purported file length from server. + + + connection: + The object that the _open_connection function returns for communicating + with the server about the contents of a URL. + + + No known side effects. + + + Runtime exceptions will be suppressed but logged. + + + reported_length: + The total number of bytes reported by server. If the process fails, we + return None; otherwise we would return a nonnegative integer. + + """ + + try: + # What is the length of this document according to the HTTP spec? + reported_length = connection.info().get('Content-Length') + # Try casting it as a decimal number. + reported_length = int(reported_length, 10) + # Make sure that it is a nonnegative integer. + assert reported_length > -1 + except: + logger.exception('Could not get content length about '+str(connection)+ + ' from server!') + reported_length = None + finally: + return reported_length + + + + + +def _check_content_length(reported_length, required_length): + """ + + A helper function that checks whether the length reported by server is + equal to the length we expected. + + + reported_length: + The total number of bytes reported by the server. + + required_length: + The total number of bytes obtained from (possibly default) metadata. + + + No known side effects. + + + No known exceptions. + + + None. + + """ + + try: + if reported_length < required_length: + logger.warn('reported_length ('+str(reported_length)+ + ') < required_length ('+str(required_length)+')') + elif reported_length > required_length: + logger.warn('reported_length ('+str(reported_length)+ + ') > required_length ('+str(required_length)+')') + else: + logger.debug('reported_length ('+str(reported_length)+ + ') == required_length ('+str(required_length)+')') + except: + logger.exception('Could not check reported and required lengths!') + + + + + +def _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=True): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its (possibly + default) metadata. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + None. + + + tuf.DownloadLengthMismatchError, if STRICT_REQUIRED_LENGTH is True and + total_downloaded is not equal required_length. + + + None. + + """ + + if total_downloaded == required_length: + logger.debug('total_downloaded == required_length == '+ + str(required_length)) + else: + difference_in_bytes = abs(total_downloaded-required_length) + message = 'Downloaded '+str(total_downloaded)+' bytes, but expected '+\ + str(required_length)+' bytes. There is a difference of '+\ + str(difference_in_bytes)+' bytes!' + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + # This must be due to a programming error, and must never happen! + logger.error(message) + raise tuf.DownloadLengthMismatchError(message) + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # timestamp metadata, for which we have no signed metadata; so, we must + # guess a reasonable required_length for it. + logger.warn(message) + + + + + +def safe_download(url, required_length): + return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True) + + + + + +def unsafe_download(url, required_length): + return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False) + + + + + +def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): """ Given the url, hashes and length of the desired file, this function @@ -322,98 +680,96 @@ def download_url_to_tempfileobj(url, required_hashes=None, url: - A url string that represents the location of the file. - - required_hashes: - A dictionary, where the keys represent the hashing algorithm used to - hash the file and the dict values the hexdigest. - - For instance, a hash pair might look something like this: - {'md5': '37544f383be1fc1a32f42801c9c4b4d6'} + A URL string that represents the location of the file. required_length: An integer value representing the length of the file. - + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + - 'tuf.util.TempFile' object is created. + A 'tuf.util.TempFile' object is created on disk to store the contents of + 'url'. - tuf.DownloadError, if there was an error while downloading the file. - - tuf.FormatError, if any of the arguments are improperly formatted. + tuf.DownloadLengthMismatchError, if there was a mismatch of observed vs + expected lengths while downloading the file. + + tuf.FormatError, if any of the arguments are improperly formatted. + + Any other unforeseen runtime exception. - 'tuf.util.TempFile' instance. + A 'tuf.util.TempFile' file-like object which points to the contents of + 'url'. """ # Do all of the arguments have the appropriate format? # Raise 'tuf.FormatError' if there is a mismatch. tuf.formats.URL_SCHEMA.check_match(url) - if required_hashes is not None: - tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes) - if required_length is not None: - tuf.formats.LENGTH_SCHEMA.check_match(required_length) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) - # 'url.replace()' is for compatibility with Windows-based systems because they - # might put back-slashes in place of forward-slashes. This converts it to the - # common format. - url = url.replace('\\','/') - logger.info('Downloading: '+url) - connection = _open_connection(url) + # 'url.replace()' is for compatibility with Windows-based systems because + # they might put back-slashes in place of forward-slashes. This converts it + # to the common format. + url = url.replace('\\', '/') + logger.info('Downloading: '+str(url)) + + # NOTE: Not thread-safe. + # Save current values or functions for restoration later. + previous_socket_timeout = socket.getdefaulttimeout() + previous_http_response_class = httplib.HTTPConnection.response_class + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. temp_file = tuf.util.TempFile() - try: - # info().get('Content-Length') gets the length of the url file. - file_length = connection.info().get('Content-Length') + # NOTE: Not thread-safe. + # Set timeout to induce non-blocking socket operations. + socket.setdefaulttimeout(tuf.conf.SOCKET_TIMEOUT) + # Replace the socket file-like object class with our safer version. + httplib.HTTPConnection.response_class = SaferHTTPResponse - # If the HTTP server did not specify a Content-Length... - if file_length is None: - # Do we know what is the required_length for this file? - if required_length is None: - # No, we do not know this. Raise this to the user! - message = 'Do not know anything about how much to download for "' + url + '"!' - raise tuf.DownloadError(message) - else: - # Okay, the HTTP server has not told us the Content-Length, - # but we know how much we are required to download. - file_length = required_length - else: - # Do we know what is the required_length for this file? - if required_length is None: - # No, we do not know this. Avoid falling for an arbitrary-length data attack (#26). - message = 'Do not know how much is required to download for "' + url + '"!' - logger.debug(message) - file_length = int(file_length, 10) - else: - # Okay, we do know this. Go ahead with checks. - file_length = int(file_length, 10) + # Open the connection to the remote file. + connection = _open_connection(url) - # Does the url's 'file_length' match 'required_length'? - if required_length is not None and file_length != required_length: - message = 'Incorrect length for '+url+'. Expected '+str(required_length)+ \ - ', got '+str(file_length)+' bytes.' - raise tuf.DownloadError(message) + # We ask the server about how big it thinks this file should be. + reported_length = _get_content_length(connection) - # For readibility, we perform the download in a separate function, which - # returns the total number of downloaded bytes; this number should be equal - # to required_length. - total_downloaded = _download_fixed_amount_of_data(connection, temp_file, - file_length, + # Then, we check whether the required length matches the reported length. + _check_content_length(reported_length, required_length) + + # Download the contents of the URL, up to the required length, to a + # temporary file, and get the total number of downloaded bytes. + total_downloaded = _download_fixed_amount_of_data(connection, temp_file, required_length) - - # We appear to have downloaded the correct amount. Check the hashes. - if required_length is not None and required_hashes is not None: - _check_hashes(temp_file, required_hashes) - # Exception is a base class for all non-exiting exceptions. - except Exception, e: - # Closing 'temp_file'. The 'temp_file' data is destroyed. + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH) + + except: + # Close 'temp_file'; any written data is lost. temp_file.close_temp_file() - logger.error(str(e)) - raise tuf.DownloadError(e) + logger.exception('Could not download URL: '+str(url)) + raise + + else: + return temp_file + + finally: + # NOTE: Not thread-safe. + # Restore previously saved values or functions. + httplib.HTTPConnection.response_class = previous_http_response_class + socket.setdefaulttimeout(previous_socket_timeout) + + - return temp_file diff --git a/tuf/examples/example_client.py b/tuf/examples/example_client.py index a1735412..78b1103f 100755 --- a/tuf/examples/example_client.py +++ b/tuf/examples/example_client.py @@ -27,12 +27,10 @@ import logging -import tuf -import tuf.log import tuf.client.updater -logger = logging.getLogger('tuf.cient.basic_client') - +# Uncomment the line below to enable printing of debugging information. +#tuf.log.set_log_level(logging.DEBUG) # Set the local repository directory containing the metadata files. tuf.conf.repository_directory = '.' diff --git a/tuf/formats.py b/tuf/formats.py index 60692efd..7142469b 100755 --- a/tuf/formats.py +++ b/tuf/formats.py @@ -273,6 +273,11 @@ targets_directory=PATH_SCHEMA, backup_directory=PATH_SCHEMA)) +# A path hash prefix is a hexadecimal string. +PATH_HASH_PREFIX_SCHEMA = HEX_SCHEMA +# A list of path hash prefixes. +PATH_HASH_PREFIXES_SCHEMA = SCHEMA.ListOf(PATH_HASH_PREFIX_SCHEMA) + # Role object in {'keyids': [keydids..], 'name': 'ABC', 'threshold': 1, # 'paths':[filepaths..]} # format. ROLE_SCHEMA = SCHEMA.Object( @@ -280,7 +285,8 @@ keyids=SCHEMA.ListOf(KEYID_SCHEMA), name=SCHEMA.Optional(ROLENAME_SCHEMA), threshold=THRESHOLD_SCHEMA, - paths=SCHEMA.Optional(RELPATHS_SCHEMA)) + paths=SCHEMA.Optional(RELPATHS_SCHEMA), + path_hash_prefixes=SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA)) # A dict of roles where the dict keys are role names and the dict values holding # the role data/information. @@ -831,7 +837,8 @@ def make_fileinfo(length, hashes, custom=None): -def make_role_metadata(keyids, threshold, name=None, paths=None): +def make_role_metadata(keyids, threshold, name=None, paths=None, + path_hash_prefixes=None): """ Create a dictionary conforming to 'tuf.formats.ROLE_SCHEMA', @@ -853,7 +860,12 @@ def make_role_metadata(keyids, threshold, name=None, paths=None): The 'Target' role stores the paths of target files in its metadata file. 'paths' is a list of file paths. - + + path_hash_prefixes: + The 'Target' role stores the paths of target files in its metadata file. + 'path_hash_prefixes' is a succint way to describe a set of paths to + target files. + tuf.FormatError, if the returned role meta is formatted incorrectly. @@ -876,7 +888,18 @@ def make_role_metadata(keyids, threshold, name=None, paths=None): if name is not None: role_meta['name'] = name - if paths is not None: + # According to the specification, the 'paths' and 'path_hash_prefixes' must + # be mutually exclusive. However, at the time of writing we do not always + # ensure that this is the case with the schema checks (see #83). Therefore, + # we must do it for ourselves. + + if paths is not None and path_hash_prefixes is not None: + raise \ + tuf.FormatError('Both "paths" and "path_hash_prefixes" are specified!') + + if path_hash_prefixes is not None: + role_meta['path_hash_prefixes'] = path_hash_prefixes + elif paths is not None: role_meta['paths'] = paths # Does 'role_meta' have the correct type? diff --git a/tuf/interposition/__init__.py b/tuf/interposition/__init__.py index db2262a9..00c53e7c 100644 --- a/tuf/interposition/__init__.py +++ b/tuf/interposition/__init__.py @@ -172,14 +172,21 @@ def __read_configuration(configuration_handler, parent_repository_directory=None, parent_ssl_certificates_directory=None): """ - A generic function to read a TUF interposition configuration off the disk, - and handle it. configuration_handler must be a function which accepts a - tuf.interposition.Configuration instance.""" + A generic function to read TUF interposition configurations off a file, and + then handle those configurations with a given function. configuration_handler + must be a function which accepts a tuf.interposition.Configuration + instance. + + Returns the parsed configurations as a dictionary of configurations indexed + by hostnames.""" INVALID_TUF_CONFIGURATION = "Invalid configuration for {network_location}!" INVALID_TUF_INTERPOSITION_JSON = "Invalid configuration in {filename}!" NO_CONFIGURATIONS = "No configurations found in configuration in {filename}!" + # Configurations indexed by hostnames. + parsed_configurations = {} + try: with open(filename) as tuf_interposition_json: tuf_interpositions = json.load(tuf_interposition_json) @@ -197,6 +204,7 @@ def __read_configuration(configuration_handler, configuration = configuration_parser.parse() configuration_handler(configuration) + parsed_configurations[configuration.hostname] = configuration except: Logger.exception(INVALID_TUF_CONFIGURATION.format(network_location=network_location)) @@ -206,6 +214,10 @@ def __read_configuration(configuration_handler, Logger.exception(INVALID_TUF_INTERPOSITION_JSON.format(filename=filename)) raise + else: + return parsed_configurations + + @@ -218,8 +230,7 @@ def configure(filename="tuf.interposition.json", parent_repository_directory=None, parent_ssl_certificates_directory=None): - """ - The optional parent_repository_directory parameter is used to specify the + """The optional parent_repository_directory parameter is used to specify the containing parent directory of the "repository_directory" specified in a configuration for *all* network locations, because sometimes the absolute location of the "repository_directory" is only known at runtime. If you @@ -259,20 +270,26 @@ def configure(filename="tuf.interposition.json", Unless any "url_prefix" begins with "https://", "ssl_certificates" is optional; it must specify certificates bundled as PEM (RFC 1422). - """ - __read_configuration(__updater_controller.add, filename=filename, - parent_repository_directory=parent_repository_directory, - parent_ssl_certificates_directory=parent_ssl_certificates_directory) + Returns the parsed configurations as a dictionary of configurations indexed + by hostnames.""" + + configurations = \ + __read_configuration(__updater_controller.add, filename=filename, + parent_repository_directory=parent_repository_directory, + parent_ssl_certificates_directory=parent_ssl_certificates_directory) + + return configurations -def deconfigure(filename="tuf.interposition.json"): - """Remove TUF interposition for a previously read configuration.""" +def deconfigure(configurations): + """Remove TUF interposition for previously read configurations.""" - __read_configuration(__updater_controller.remove, filename=filename) + for configuration in configurations.itervalues(): + __updater_controller.remove(configuration) @@ -328,3 +345,8 @@ def wrapper(self, *args, **kwargs): # Build and monkey patch public copies of the urllib and urllib2 modules. __monkey_patch() + + + + + diff --git a/tuf/interposition/configuration.py b/tuf/interposition/configuration.py index 5a66e4af..295ccac6 100644 --- a/tuf/interposition/configuration.py +++ b/tuf/interposition/configuration.py @@ -1,5 +1,4 @@ import os.path -import tempfile import types import urlparse @@ -43,7 +42,6 @@ def __init__(self, hostname, port, repository_directory, repository_mirrors, self.repository_mirrors = repository_mirrors self.target_paths = target_paths self.ssl_certificates = ssl_certificates - self.tempdir = tempfile.mkdtemp() def __repr__(self): diff --git a/tuf/interposition/updater.py b/tuf/interposition/updater.py index 8be219f5..6c83e964 100644 --- a/tuf/interposition/updater.py +++ b/tuf/interposition/updater.py @@ -2,6 +2,7 @@ import os.path import re import shutil +import tempfile import urllib import urlparse @@ -37,7 +38,12 @@ class Updater(object): def __init__(self, configuration): + CREATED_TEMPDIR_MESSAGE = "Created temporary directory at {tempdir}" + self.configuration = configuration + # A temporary directory used for this updater over runtime. + self.tempdir = tempfile.mkdtemp() + Logger.debug(CREATED_TEMPDIR_MESSAGE.format(tempdir=self.tempdir)) # must switch context before instantiating updater # because updater depends on some module (tuf.conf) variables @@ -46,11 +52,19 @@ def __init__(self, configuration): self.configuration.repository_mirrors) + def cleanup(self): + """Clean up after certain side effects, such as temporary directories.""" + + DELETED_TEMPDIR_MESSAGE = "Deleted temporary directory at {tempdir}" + shutil.rmtree(self.tempdir) + Logger.debug(DELETED_TEMPDIR_MESSAGE.format(tempdir=self.tempdir)) + + def download_target(self, target_filepath): """Downloads target with TUF as a side effect.""" # download file into a temporary directory shared over runtime - destination_directory = self.configuration.tempdir + destination_directory = self.tempdir filename = os.path.join(destination_directory, target_filepath) self.switch_context() # switch TUF context @@ -132,12 +146,25 @@ def open(self, url, data=None): def retrieve(self, url, filename=None, reporthook=None, data=None): INTERPOSITION_MESSAGE = "Interposing for {url}" - # TODO: set valid headers - content_type, content_encoding = mimetypes.guess_type(url) - headers = {"content-type": content_type} - Logger.info(INTERPOSITION_MESSAGE.format(url=url)) + + # What is the actual target to download given the URL? Sometimes we would + # like to transform the given URL to the intended target; e.g. "/simple/" + # => "/simple/index.html". target_filepath = self.get_target_filepath(url) + + # TODO: Set valid headers fetched from the actual download. + # NOTE: Important to guess the mime type from the target_filepath, not the + # unmodified URL. + content_type, content_encoding = mimetypes.guess_type(target_filepath) + headers = { + # NOTE: pip refers to this same header in at least these two duplicate + # ways. + "content-type": content_type, + "Content-Type": content_type, + } + + # Download the target filepath determined by the original URL. temporary_directory, temporary_filename = self.download_target(target_filepath) if filename is None: @@ -301,9 +328,18 @@ def remove(self, configuration): assert configuration.hostname in self.__updaters assert repository_mirror_hostnames.issubset(self.__repository_mirror_hostnames) + # Get the updater. + updater = self.__updaters.get(configuration.hostname) + # If all is well, remove the stored Updater as well as its associated # repository mirror hostnames. + updater.cleanup() del self.__updaters[configuration.hostname] self.__repository_mirror_hostnames.difference_update(repository_mirror_hostnames) Logger.info(UPDATER_REMOVED_MESSAGE.format(configuration=configuration)) + + + + + diff --git a/tuf/interposition/utility.py b/tuf/interposition/utility.py index c68ad777..b70fd702 100644 --- a/tuf/interposition/utility.py +++ b/tuf/interposition/utility.py @@ -20,9 +20,15 @@ class Logger(object): """A static logging object for tuf.interposition.""" + tuf.log.add_console_handler() __logger = logging.getLogger("tuf.interposition") + @staticmethod + def debug(message): + Logger.__logger.debug(message) + + @staticmethod def exception(message): Logger.__logger.exception(message) diff --git a/tuf/log.py b/tuf/log.py index 51397d1f..0667e4ad 100755 --- a/tuf/log.py +++ b/tuf/log.py @@ -229,6 +229,7 @@ def add_console_handler(log_level=_DEFAULT_CONSOLE_LOG_LEVEL): # Set the console handler for the logger. The built-in console handler will # log messages to 'sys.stderr' and capture 'log_level' messages. + global console_handler console_handler = logging.StreamHandler() console_handler.setLevel(log_level) console_handler.setFormatter(formatter) diff --git a/tuf/repo/signercli.py b/tuf/repo/signercli.py index 5787b420..5c36377b 100755 --- a/tuf/repo/signercli.py +++ b/tuf/repo/signercli.py @@ -1141,7 +1141,8 @@ def make_delegation(keystore_directory): # Update the parent role's metadata file. The parent role's delegation # field must be updated with the newly created delegated role. _update_parent_metadata(metadata_directory, delegated_role, delegated_keyids, - delegated_paths, parent_role, parent_keyids) + parent_role, parent_keyids, + delegated_paths=delegated_paths) @@ -1327,8 +1328,9 @@ def _make_delegated_metadata(metadata_directory, delegated_targets, -def _update_parent_metadata(metadata_directory, delegated_role, delegated_keyids, - delegated_paths, parent_role, parent_keyids): +def _update_parent_metadata(metadata_directory, delegated_role, + delegated_keyids, parent_role, parent_keyids, + delegated_paths=None, path_hash_prefixes=None): """ Update the parent role's metadata file. The delegations field of the metadata file is updated with the key and role information belonging @@ -1337,6 +1339,28 @@ def _update_parent_metadata(metadata_directory, delegated_role, delegated_keyids """ + # According to the specification, the 'paths' and 'path_hash_prefixes' + # attributes must be mutually exclusive. However, at the time of writing we + # do not always ensure that this is the case with the schema checks (see + # #83). Therefore, we must do it for ourselves. + + if delegated_paths is not None and path_hash_prefixes is not None: + raise \ + tuf.FormatError('Both "paths" and "path_hash_prefixes" are specified!') + + if delegated_paths is None and path_hash_prefixes is None: + raise \ + tuf.FormatError('Neither "paths" nor`"path_hash_prefixes" is specified!') + + # The 'delegated_paths' are relative to 'repository'. + # The 'relative_paths' are relative to 'repository/targets'. + if delegated_paths is None: + relative_paths = None + else: + relative_paths = [] + for path in delegated_paths: + relative_paths.append(os.path.sep.join(path.split(os.path.sep)[1:])) + # Extract the metadata from the parent role's file. parent_filename = os.path.join(metadata_directory, parent_role) parent_filename = parent_filename+'.txt' @@ -1366,12 +1390,14 @@ def _update_parent_metadata(metadata_directory, delegated_role, delegated_keyids roles = delegations.get('roles', []) threshold = len(delegated_keyids) delegated_role = parent_role+'/'+delegated_role - relative_paths = [] - for path in delegated_paths: - relative_paths.append(os.path.sep.join(path.split(os.path.sep)[1:])) - role_metadata = tuf.formats.make_role_metadata(delegated_keyids, threshold, - name=delegated_role, - paths=relative_paths) + + # Write either the "paths" or the "path_hash_prefixes" attribute. + role_metadata = \ + tuf.formats.make_role_metadata(delegated_keyids, threshold, + name=delegated_role, paths=relative_paths, + path_hash_prefixes=path_hash_prefixes) + + # Find the appropriate role to create or update. role_index = tuf.repo.signerlib.find_delegated_role(roles, delegated_role) if role_index is None: diff --git a/tuf/repo/signerlib.py b/tuf/repo/signerlib.py index 3dfd6226..2698518f 100755 --- a/tuf/repo/signerlib.py +++ b/tuf/repo/signerlib.py @@ -19,12 +19,14 @@ """ +import gzip import os import ConfigParser import logging import tuf import tuf.formats +import tuf.hash import tuf.rsa_key import tuf.repo.keystore import tuf.sig @@ -493,9 +495,9 @@ def generate_timestamp_metadata(release_filename, version, Conformant to 'tuf.formats.TIME_SCHEMA'. compressions: - Compression extensions (e.g., 'gz' and 'tgz'). If 'release.txt' is also - saved in compressed form, these compression extensions should be stored - in 'compressions' so the compressed timestamp files can be added to the + Compression extensions (e.g., 'gz'). If 'release.txt' is also saved in + compressed form, these compression extensions should be stored in + 'compressions' so the compressed timestamp files can be added to the timestamp metadata object. @@ -524,8 +526,13 @@ def generate_timestamp_metadata(release_filename, version, # Save the file info of the compressed versions of 'timestamp.txt'. for file_extension in compressions: compressed_filename = release_filename + '.' + file_extension - compressed_fileinfo = get_metadata_file_info(compressed_filename) - fileinfo['release.txt.' + file_extension] = compressed_fileinfo + try: + compressed_fileinfo = get_metadata_file_info(compressed_filename) + except: + logger.warn('Could not get fileinfo about '+str(compressed_filename)) + else: + logger.info('Including fileinfo about '+str(compressed_filename)) + fileinfo['release.txt.' + file_extension] = compressed_fileinfo # Generate the timestamp metadata object. timestamp_metadata = tuf.formats.TimestampFile.make_metadata(version, @@ -538,7 +545,7 @@ def generate_timestamp_metadata(release_filename, version, -def write_metadata_file(metadata, filename): +def write_metadata_file(metadata, filename, compression=None): """ Create the file containing the metadata. @@ -551,11 +558,17 @@ def write_metadata_file(metadata, filename): The filename (absolute path) of the metadata to be written (e.g., 'root.txt'). + compression: + Specify an algorithm as a string to compress the file; otherwise, the + file will be left uncompressed. Available options are 'gz' (gzip). + tuf.FormatError, if the arguments are improperly formatted. tuf.Error, if 'filename' doesn't exist. + Any other runtime (e.g. IO) exception. + The 'filename' file is created or overwritten if it exists. @@ -569,20 +582,44 @@ def write_metadata_file(metadata, filename): tuf.formats.SIGNABLE_SCHEMA.check_match(metadata) tuf.formats.PATH_SCHEMA.check_match(filename) - # Split 'filename' into head and tail. Verify that head exists. - check_directory(os.path.split(filename)[0]) + # Verify 'filename' directory. + check_directory(os.path.dirname(filename)) - logger.info('Writing to '+repr(filename)) - file_object = open(filename, 'w') + # We choose a file-like object that depends on the compression algorithm. + file_object = None + # We may modify the filename, depending on the compression algorithm, so we + # store it separately. + filename_with_compression = filename - # The metadata object is saved to 'file_object'. The keys - # of the objects are sorted and indentation is used. - json.dump(metadata, file_object, indent=1, sort_keys=True) + # Take care of compression. + if compression is None: + logger.info('No compression for '+str(filename)) + file_object = open(filename_with_compression, 'w') + elif compression == 'gz': + logger.info('gzip compression for '+str(filename)) + filename_with_compression += '.gz' + file_object = gzip.open(filename_with_compression, 'w') + else: + raise tuf.FormatError('Unknown compression algorithm: '+str(compression)) - file_object.write('\n') - file_object.close() + try: + tuf.formats.PATH_SCHEMA.check_match(filename_with_compression) + logger.info('Writing to '+str(filename_with_compression)) - return filename + # The metadata object is saved to 'file_object'. The keys + # of the objects are sorted and indentation is used. + json.dump(metadata, file_object, indent=1, sort_keys=True) + + file_object.write('\n') + except: + # Raise any runtime exception. + raise + else: + # Otherwise, return the written filename. + return filename_with_compression + finally: + # Always close the file. + file_object.close() @@ -1131,7 +1168,7 @@ def build_targets_file(target_paths, targets_keyids, metadata_directory, def build_release_file(release_keyids, metadata_directory, - version, expiration_date): + version, expiration_date, compress=False): """ Build the release metadata file using the signing keys in 'release_keyids'. @@ -1152,6 +1189,10 @@ def build_release_file(release_keyids, metadata_directory, The expiration date, in UTC, of the metadata file. Conformant to 'tuf.formats.TIME_SCHEMA'. + compress: + Should we *include* a compressed version of the release file? By default, + the answer is no. + tuf.FormatError, if any of the arguments are improperly formatted. @@ -1182,14 +1223,27 @@ def build_release_file(release_keyids, metadata_directory, version, expiration_date) signable = sign_metadata(release_metadata, release_keyids, release_filepath) - return write_metadata_file(signable, release_filepath) + # Should we also include a compressed version of release.txt? + if compress: + # If so, write a gzip version of release.txt. + compressed_written_filepath = \ + write_metadata_file(signable, release_filepath, compression='gz') + logger.info('Wrote '+str(compressed_written_filepath)) + else: + logger.debug('No compressed version of release metadata will be included.') + + written_filepath = write_metadata_file(signable, release_filepath) + logger.info('Wrote '+str(written_filepath)) + + return written_filepath def build_timestamp_file(timestamp_keyids, metadata_directory, - version, expiration_date): + version, expiration_date, + include_compressed_release=True): """ Build the timestamp metadata file using the signing keys in 'timestamp_keyids'. @@ -1209,6 +1263,10 @@ def build_timestamp_file(timestamp_keyids, metadata_directory, expiration_date: The expiration date, in UTC, of the metadata file. Conformant to 'tuf.formats.TIME_SCHEMA'. + + include_compressed_release: + Should the timestamp role *include* compression versions of the release + metadata, if any? We do this by default. tuf.FormatError, if any of the arguments are improperly formatted. @@ -1236,11 +1294,24 @@ def build_timestamp_file(timestamp_keyids, metadata_directory, release_filepath = os.path.join(metadata_directory, RELEASE_FILENAME) timestamp_filepath = os.path.join(metadata_directory, TIMESTAMP_FILENAME) + # Should we include compressed versions of release in timestamp? + compressions = () + if include_compressed_release: + # Presently, we include only gzip versions by default. + compressions = ('gz',) + logger.info('Including '+str(compressions)+' versions of release in '\ + 'timestamp.') + else: + logger.warn('No compressed versions of release will be included in '\ + 'timestamp.') + # Generate and sign the timestamp metadata. timestamp_metadata = generate_timestamp_metadata(release_filepath, version, - expiration_date) - signable = sign_metadata(timestamp_metadata, timestamp_keyids, timestamp_filepath) + expiration_date, + compressions=compressions) + signable = sign_metadata(timestamp_metadata, timestamp_keyids, + timestamp_filepath) return write_metadata_file(signable, timestamp_filepath) diff --git a/tuf/tests/system_tests/slow_retrieval_server.py b/tuf/tests/system_tests/slow_retrieval_server.py index 161277b6..dfddab79 100755 --- a/tuf/tests/system_tests/slow_retrieval_server.py +++ b/tuf/tests/system_tests/slow_retrieval_server.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ slow_retrieval_server.py @@ -24,7 +26,18 @@ import random from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -DELAY = 1 + + + + +# Modify the HTTPServer class to pass the test_mode argument to do_GET function. +class HTTPServer_Test(HTTPServer): + def __init__(self, server_address, Handler, test_mode): + HTTPServer.__init__(self, server_address, Handler) + self.test_mode = test_mode + + + # HTTP request handler. @@ -41,37 +54,49 @@ def do_GET(self): self.send_response(200) self.send_header('Content-length', str(len(data))) self.end_headers() - - # Throttle the file by sending a character every few seconds. - for i in range(len(data)): + + if self.server.test_mode == "mode_1": + # before sends any data, the server does nothing during a long time. + DELAY = 1000 time.sleep(DELAY) - self.wfile.write(data[i]) + self.wfile.write(data) - return + return + + else: # "mode_2" + DELAY = 1 + # Throttle the file by sending a character every few seconds. + for i in range(len(data)): + self.wfile.write(data[i]) + time.sleep(DELAY) + return except IOError, e: self.send_error(404, 'File Not Found!') + + def get_random_port(): port = random.randint(30000, 45000) return port -def run(port): + + +def run(port, test_mode): server_address = ('localhost', port) - httpd = HTTPServer(server_address, Handler) - print('Slow server is active on port: '+str(port)+' ...') + httpd = HTTPServer_Test(server_address, Handler, test_mode) httpd.handle_request() -if __name__ == '__main__': - if len(sys.argv) > 1: - port = int(sys.argv[1]) - else: - port = get_random_port() - run(port) + +if __name__ == '__main__': + port = int(sys.argv[1]) + test_mode = sys.argv[2] + assert test_mode in ("mode_1", "mode_2") + run(port, test_mode) diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py index 4cac5531..0fa49e1b 100755 --- a/tuf/tests/system_tests/test_endless_data_attack.py +++ b/tuf/tests/system_tests/test_endless_data_attack.py @@ -31,7 +31,7 @@ """ -# TODO:... +from __future__ import print_function import os import shutil @@ -41,23 +41,22 @@ import tuf from tuf.interposition import urllib_tuf - +from tuf.log import logger class EndlessDataAttack(Exception): pass -def _download(url, filename, tuf=False): - if tuf: +def _download(url, filename, TUF=False): + if TUF: urllib_tuf.urlretrieve(url, filename) - else: urllib.urlretrieve(url, filename) -def test_arbitrary_package_attack(TUF=False): +def test_arbitrary_package_attack(TUF=False, TIMESTAMP=False): """ TUF: @@ -85,13 +84,12 @@ def test_arbitrary_package_attack(TUF=False): file_basename = os.path.basename(filepath) url_to_repo = url+'reg_repo/'+file_basename downloaded_file = os.path.join(downloads, file_basename) - endless_data = 'A'*100 + endless_data = 'A'*100000 if TUF: # Update TUF metadata before attacker modifies anything. util_test_tools.tuf_refresh_repo(root_repo, keyids) - # Modify the url. Remember that the interposition will intercept # urls that have 'localhost:9999' hostname, which was specified in # the json interposition configuration file. Look for 'hostname' @@ -103,6 +101,13 @@ def test_arbitrary_package_attack(TUF=False): target = os.path.join(tuf_targets, file_basename) util_test_tools.modify_file_at_repository(target, endless_data) + # Attacker modifies the timestamp.txt metadata. + if TIMESTAMP: + metadata = os.path.join(tuf_repo, 'metadata') + timestamp = os.path.join(metadata, 'timestamp.txt') + # FIXME: This does not correctly "patch" the timestamp metadata. + util_test_tools.modify_file_at_repository(timestamp, endless_data) + # Attacker modifies the file at the regular repository. util_test_tools.modify_file_at_repository(filepath, endless_data) @@ -111,13 +116,28 @@ def test_arbitrary_package_attack(TUF=False): try: # Client downloads (tries to download) the file. - _download(url=url_to_repo, filename=downloaded_file, tuf=TUF) + _download(url=url_to_repo, filename=downloaded_file, TUF=TUF) - except tuf.DownloadError: - # If tuf.DownloadError is raised, this means that TUF has prevented - # the download of an unrecognized file. Enable the logging to see, - # what actually happened. - pass + except tuf.NoWorkingMirrorError, exception: + endless_data_attack = False + + for mirror_url, mirror_error in exception.mirror_errors.iteritems(): + # We would get a bad hash error if the file was actually larger than + # the metadata said it was. + if isinstance(mirror_error, tuf.BadHashError): + endless_data_attack = True + break + # We would get invalid metadata JSON if the server deliberately sent + # malformed JSON as part of an endless data attack. + elif isinstance(mirror_error, tuf.InvalidMetadataJSONError): + endless_data_attack = True + break + + # In case we did not detect what was likely an endless data attack, we + # reraise the exception to indicate that endless data attack detection + # failed. + if not endless_data_attack: + raise else: # Check whether the attack succeeded by inspecting the content of the @@ -136,7 +156,7 @@ def test_arbitrary_package_attack(TUF=False): try: - test_arbitrary_package_attack(TUF=False) + test_arbitrary_package_attack(TUF=False, TIMESTAMP=False) except EndlessDataAttack, error: print('Without TUF: '+str(error)) @@ -144,7 +164,20 @@ def test_arbitrary_package_attack(TUF=False): try: - test_arbitrary_package_attack(TUF=True) + test_arbitrary_package_attack(TUF=True, TIMESTAMP=False) except EndlessDataAttack, error: print('With TUF: '+str(error)) + + + +try: + # This test fails because the timestamp metadata has been extended with + # random data from its true length, thereby resulting in invalid JSON. + test_arbitrary_package_attack(TUF=True, TIMESTAMP=True) + +except EndlessDataAttack, error: + print('With TUF: '+str(error)) + + + diff --git a/tuf/tests/system_tests/test_slow_retrieval_attack.py b/tuf/tests/system_tests/test_slow_retrieval_attack.py index 7d25a7d2..92b46da7 100755 --- a/tuf/tests/system_tests/test_slow_retrieval_attack.py +++ b/tuf/tests/system_tests/test_slow_retrieval_attack.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ test_slow_retrieval_attack.py @@ -35,12 +37,17 @@ """ +from __future__ import print_function + +from multiprocessing import Process import os -import time -import urllib import random import subprocess -from multiprocessing import Process +import sys +import time +import tuf +import urllib + import tuf.tests.system_tests.util_test_tools as util_test_tools from tuf.interposition import urllib_tuf @@ -50,26 +57,42 @@ class SlowRetrievalAttackAlert(Exception): pass -def _download(url, filename, tuf=False): - if tuf: - urllib_tuf.urlretrieve(url, filename) - +def _download(url, filename, TUF=False): + if TUF: + try: + urllib_tuf.urlretrieve(url, filename) + except tuf.NoWorkingMirrorError, exception: + slow_retrieval = False + for mirror_url, mirror_error in exception.mirror_errors.iteritems(): + if isinstance(mirror_error, tuf.SlowRetrievalError): + slow_retrieval = True + break + + # We must fail due to a slow retrieval error; otherwise we will exit with + # a "successful termination" exit status to indicate that slow retrieval + # detection failed. + if slow_retrieval: + print('TUF stopped the update because it detected slow retrieval.') + sys.exit(-1) + else: + print('TUF stopped the update due to something other than slow retrieval.') + sys.exit(0) else: urllib.urlretrieve(url, filename) -def test_slow_retrieval_attack(TUF=False): +def test_slow_retrieval_attack(TUF=False, mode=None): - WAIT_TIME = 5 # Number of seconds to wait until download completes. - ERROR_MSG = '\tSlow Retrieval Attack was Successful!\n\n' + WAIT_TIME = 60 # Number of seconds to wait until download completes. + ERROR_MSG = 'Slow retrieval attack succeeded (TUF: '+str(TUF)+', mode: '+\ + str(mode)+').' # Launch the server. port = random.randint(30000, 45000) - command = ['python', 'slow_retrieval_server.py', str(port)] - server_process = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - time.sleep(.1) + command = ['python', 'slow_retrieval_server.py', str(port), mode] + server_process = subprocess.Popen(command, stderr=subprocess.PIPE) + time.sleep(1) try: # Setup. @@ -79,14 +102,13 @@ def test_slow_retrieval_attack(TUF=False): downloads = os.path.join(root_repo, 'downloads') # Add file to 'repo' directory: {root_repo} - filepath = util_test_tools.add_file_to_repository(reg_repo, 'A'*10) + filepath = util_test_tools.add_file_to_repository(reg_repo, 'A'*30) file_basename = os.path.basename(filepath) url_to_file = url+'reg_repo/'+file_basename downloaded_file = os.path.join(downloads, file_basename) if TUF: - print 'TUF ...' tuf_repo = os.path.join(root_repo, 'tuf_repo') # Update TUF metadata before attacker modifies anything. @@ -105,29 +127,50 @@ def test_slow_retrieval_attack(TUF=False): proc = Process(target=_download, args=(url_to_file, downloaded_file, TUF)) proc.start() proc.join(WAIT_TIME) - if proc.exitcode is None: + + # In case the process did not exit or successfully exited, we failed. + if not proc.exitcode: proc.terminate() raise SlowRetrievalAttackAlert(ERROR_MSG) - finally: - if server_process.returncode is None: - server_process.kill() - print 'Slow server terminated.\n' - + server_process.kill() util_test_tools.cleanup(root_repo, server_proc) +# Stimulates two kinds of slow retrieval attacks. +# mode_1: When download begins,the server blocks the download +# for a long time by doing nothing before it sends first byte of data. +# mode_2: During the download process, the server blocks the download +# by sending just several characters every few seconds. try: - test_slow_retrieval_attack(TUF=False) + test_slow_retrieval_attack(TUF=False, mode = "mode_1") except SlowRetrievalAttackAlert, error: - print error - + print(error) + print() try: - test_slow_retrieval_attack(TUF=True) + test_slow_retrieval_attack(TUF=False, mode = "mode_2") except SlowRetrievalAttackAlert, error: - print error + print(error) + print() + +try: + test_slow_retrieval_attack(TUF=True, mode = "mode_1") +except SlowRetrievalAttackAlert, error: + print(error) + print() + +try: + test_slow_retrieval_attack(TUF=True, mode = "mode_2") +except SlowRetrievalAttackAlert, error: + print(error) + print() + + + + + diff --git a/tuf/tests/system_tests/util_test_tools.py b/tuf/tests/system_tests/util_test_tools.py index 40024a42..d79cfeed 100755 --- a/tuf/tests/system_tests/util_test_tools.py +++ b/tuf/tests/system_tests/util_test_tools.py @@ -137,18 +137,25 @@ import subprocess import tuf +import tuf.client.updater import tuf.formats import tuf.interposition -import tuf.util -import tuf.client.updater +import tuf.log import tuf.repo.signercli as signercli import tuf.repo.signerlib as signerlib import tuf.repo.keystore as keystore +import tuf.util logger = logging.getLogger('tuf.tests.system_tests.util_test_tools') PASSWD = 'test' version = 1 +# Where we keep TUF configurations, if any, between every iteration. +tuf_configurations = None + + +def disable_console_logging(): + tuf.log.logger.removeHandler(tuf.log.console_handler) def init_repo(tuf=False, port=None): @@ -182,6 +189,7 @@ def init_repo(tuf=False, port=None): keyids = None if tuf: + disable_console_logging() keyids = init_tuf(root_repo) create_interposition_config(root_repo, url) @@ -192,6 +200,8 @@ def init_repo(tuf=False, port=None): def cleanup(root_repo, server_process=None): + global tuf_configurations + if server_process is not None: if server_process.returncode is None: server_process.kill() @@ -202,9 +212,9 @@ def cleanup(root_repo, server_process=None): keystore.clear_keystore() # Deconfigure interposition. - interpose_json = os.path.join(root_repo, 'tuf.interposition.json') - if os.path.exists(interpose_json): - tuf.interposition.deconfigure(filename=interpose_json) + if tuf_configurations is not None: + tuf.interposition.deconfigure(tuf_configurations) + tuf_configurations = None # Removing repository directory. try: @@ -361,7 +371,9 @@ def create_interposition_config(root_repo, url): (urllib_tuf replaces urllib module) urllib_tuf.urlretrieve(url, filename) - """ + """ + + global tuf_configurations tuf_repo = os.path.join(root_repo, 'tuf_repo') tuf_client = os.path.join(root_repo, 'tuf_client') @@ -392,7 +404,8 @@ def create_interposition_config(root_repo, url): with open(interpose_json, 'wb') as fileobj: tuf.util.json.dump(interposition_dict, fileobj) - tuf.interposition.configure(filename=interpose_json) + assert tuf_configurations is None + tuf_configurations = tuf.interposition.configure(filename=interpose_json) diff --git a/tuf/tests/test_download.py b/tuf/tests/test_download.py index d2e0e38d..77d0d5fd 100755 --- a/tuf/tests/test_download.py +++ b/tuf/tests/test_download.py @@ -23,20 +23,20 @@ """ -import os -import sys -import time -import random import hashlib import logging -import unittest +import os +import random import subprocess -import SocketServer -import SimpleHTTPServer +import time +import unittest +import urllib2 + import tuf -import tuf.log +import tuf.conf as conf import tuf.download as download +import tuf.log import tuf.tests.unittest_toolbox as unittest_toolbox logger = logging.getLogger('tuf.test_download') @@ -70,7 +70,7 @@ def setUp(self): # NOTE: Following error is raised if delay is not applied: # - time.sleep(.1) + time.sleep(1) # Computing hash of target file data. m = hashlib.md5() @@ -79,7 +79,6 @@ def setUp(self): self.target_hash = {'md5':digest} - # Stop server process and perform clean up. def tearDown(self): unittest_toolbox.Modified_TestCase.tearDown(self) @@ -89,84 +88,57 @@ def tearDown(self): self.target_fileobj.close() - # Unit Test. + # Test: Normal case. def test_download_url_to_tempfileobj(self): - # Test: Normal cases without supplying hash and/or length arguments. - temp_fileobj = download.download_url_to_tempfileobj(self.url) + + download_file = download.safe_download + + temp_fileobj = download_file(self.url, self.target_data_length) self.assertEquals(self.target_data, temp_fileobj.read()) self.assertEquals(self.target_data_length, len(temp_fileobj.read())) temp_fileobj.close_temp_file() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_length=self.target_data_length) + + # Test: Incorrect lengths. + def test_download_url_to_tempfileobj_and_lengths(self): + + # NOTE: We catch tuf.BadHashError here because the file, shorter by a byte, + # would not match the expected hashes. We log a warning when we find that + # the server-reported length of the file does not match our + # required_length. We also see that STRICT_REQUIRED_LENGTH does not change + # the outcome of the previous test. + download.safe_download(self.url, self.target_data_length - 1) + download.unsafe_download(self.url, self.target_data_length - 1) + + # NOTE: We catch tuf.DownloadError here because the STRICT_REQUIRED_LENGTH, + # which is True by default, mandates that we must download exactly what is + # required. + exception_message = 'Downloaded '+str(self.target_data_length)+\ + ' bytes, but expected '+\ + str(self.target_data_length+1)+\ + ' bytes. There is a difference of 1 bytes!' + self.assertRaisesRegexp(tuf.DownloadError, exception_message, + download.safe_download, self.url, + self.target_data_length + 1) + + # NOTE: However, we do not catch a tuf.DownloadError here for the same test + # as the previous one because we have disabled STRICT_REQUIRED_LENGTH. + temp_fileobj = download.unsafe_download(self.url, self.target_data_length + 1) self.assertEquals(self.target_data, temp_fileobj.read()) self.assertEquals(self.target_data_length, len(temp_fileobj.read())) temp_fileobj.close_temp_file() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash) - self.assertEquals(self.target_data, temp_fileobj.read()) - self.assertEquals(self.target_data_length, len(temp_fileobj.read())) - temp_fileobj.close_temp_file() - # Test: Normal case. - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length) - self.assertEquals(self.target_data, temp_fileobj.read()) - self.assertEquals(self.target_data_length, len(temp_fileobj.read())) - temp_fileobj.close_temp_file() - - # Test: Incorrect length. - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length - 1) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length + 1) - - # Test: Incorrect hashs. - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, self.url, - required_hashes={'md5':self.random_string()}, - required_length=self.target_data_length) - - # Test: Incorrect/Unreachable url. - self.assertRaises(tuf.FormatError, - download.download_url_to_tempfileobj, None, - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - 'http://localhost:'+str(self.PORT)+'/'+self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) - - self.assertRaises(tuf.DownloadError, - download.download_url_to_tempfileobj, - 'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), - required_hashes=self.target_hash, - required_length=self.target_data_length) + def test_download_url_to_tempfileobj_and_performance(self): """ # Measuring performance of 'auto_flush = False' vs. 'auto_flush = True' - # in download_url_to_tempfileobj() during write. No change was observed. + # in download._download_file() during write. No change was observed. star_cpu = time.clock() star_real = time.time() - temp_fileobj = download.download_url_to_tempfileobj(self.url, - required_hashes=self.target_hash, - required_length=self.target_data_length) + temp_fileobj = download_file(self.url, + self.target_data_length) end_cpu = time.clock() end_real = time.time() @@ -182,6 +154,28 @@ def test_download_url_to_tempfileobj(self): """ + # Test: Incorrect/Unreachable URLs. + def test_download_url_to_tempfileobj_and_urls(self): + + download_file = download.safe_download + + self.assertRaises(tuf.FormatError, + download_file, None, self.target_data_length) + + self.assertRaises(ValueError, + download_file, + self.random_string(), self.target_data_length) + + self.assertRaises(urllib2.HTTPError, + download_file, + 'http://localhost:'+str(self.PORT)+'/'+self.random_string(), + self.target_data_length) + + self.assertRaises(urllib2.URLError, + download_file, + 'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), + self.target_data_length) + # Run unit test. if __name__ == '__main__': diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py index ba7e234c..5dd7ed58 100755 --- a/tuf/tests/test_updater.py +++ b/tuf/tests/test_updater.py @@ -45,18 +45,28 @@ class guarantees the order of unit tests. So that, 'test_something_A' import tuf +import tuf.client.updater as updater +import tuf.conf import tuf.log -import tuf.util import tuf.formats +import tuf.keydb import tuf.repo.keystore as keystore import tuf.repo.signerlib as signerlib -import tuf.client.updater as updater +import tuf.roledb import tuf.tests.repository_setup as setup import tuf.tests.unittest_toolbox as unittest_toolbox +import tuf.util logger = logging.getLogger('tuf.test_updater') +# This is the default metadata that we would create for the timestamp role, +# because it has no signed metadata for itself. +DEFAULT_TIMESTAMP_FILEINFO = { + 'hashes': None, + 'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH +} + class TestUpdater_init_(unittest_toolbox.Modified_TestCase): @@ -203,7 +213,7 @@ def _mock_download_url_to_tempfileobj(self, output): """ - def _mock_download(url, hashes=None, length=None): + def _mock_download(url, length): if isinstance(output, (str, unicode)): file_path = output elif isinstance(output, list): @@ -213,8 +223,9 @@ def _mock_download(url, hashes=None, length=None): temp_fileobj.write(file_obj.read()) return temp_fileobj - # Patch tuf.download.download_url_to_tempfileobj(). - tuf.download.download_url_to_tempfileobj = _mock_download + # Patch tuf.download functions. + tuf.download.unsafe_download = _mock_download + tuf.download.safe_download = _mock_download @@ -327,7 +338,7 @@ def _get_list_of_target_paths(self, targets_directory, relative=True): def _update_top_level_roles(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Reference self.Repository._update_metadata_if_changed(). update_if_changed = self.Repository._update_metadata_if_changed @@ -478,9 +489,6 @@ def test_3__update_metadata(self): """ This unit test verifies the method's proper behaviour on the expected input. """ - - # Setup - original_download = tuf.download.download_url_to_tempfileobj # Since client's '.../metadata/current' will need to have separate # gzipped metadata file in order to test compressed file handling, @@ -504,13 +512,16 @@ def test_3__update_metadata(self): # Test: Invalid file downloaded. # Patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(self.release_filepath) - self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets') + + # TODO: Is this the original intent of this test? + self.assertRaises(TypeError, _update_metadata, 'targets', None) # Test: normal case. # Patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(self.targets_filepath) - _update_metadata('targets') + _update_metadata('targets', + signerlib.get_metadata_file_info(self.targets_filepath)) list_of_targets = self.Repository.metadata['current']['targets']['targets'] # Verify that the added target's path is listed in target's metadata. @@ -527,7 +538,12 @@ def test_3__update_metadata(self): # Re-patch 'download.download_url_to_tempfileobj' function. self._mock_download_url_to_tempfileobj(targets_filepath_compressed) - _update_metadata('targets', compression='gzip') + # TODO: Not convinced this is actually being tested correctly. + # See how we get fileinfo in tuf.client.updater._update_metadata_if_changed + _update_metadata('targets', + #signerlib.get_metadata_file_info(self.targets_filepath), + None, + compression='gzip') list_of_targets = self.Repository.metadata['current']['targets']['targets'] # Verify that the added target's path is listed in target's metadata. @@ -537,12 +553,9 @@ def test_3__update_metadata(self): # Restoring server's repository to the initial state. os.remove(targets_filepath_compressed) - os.remove(os.path.join(self.client_current_dir,'targets.txt.gz')) + os.remove(os.path.join(self.client_current_dir,'targets.txt')) self._remove_target_from_targets_dir(added_target_1) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - def test_1__update_fileinfo(self): @@ -601,9 +614,6 @@ def test_3__update_metadata_if_changed(self): """ This unit test verifies the method's proper behaviour on expected input. """ - - # Setup - original_download = tuf.download.download_url_to_tempfileobj # To test updater._update_metadata_if_changed, 'targets' metadata file is # going to be modified at the server's repository. @@ -623,7 +633,7 @@ def test_3__update_metadata_if_changed(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) # Update timestamp metadata, it will indicate change in release metadata. - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Save current release metadata before updating. It will be used to # verify the update. @@ -667,7 +677,7 @@ def test_3__update_metadata_if_changed(self): self._mock_download_url_to_tempfileobj(self.timestamp_filepath) # Update timestamp metadata, it will indicate change in release metadata. - self.Repository._update_metadata('timestamp') + self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO) # Save current release metadata before updating. It will be used to # verify the update. @@ -685,17 +695,19 @@ def test_3__update_metadata_if_changed(self): # Test: Invalid targets metadata file downloaded. # Patch 'download.download_url_to_tempfileobj' and update targets. self._mock_download_url_to_tempfileobj(self.root_filepath) - self.assertRaises(tuf.MetadataNotAvailableError, update_if_changed, - 'targets') + + # TODO: Is this the original intent of this test? + try: + update_if_changed('targets') + except tuf.NoWorkingMirrorError, exception: + for mirror_url, mirror_error in exception.mirror_errors.iteritems(): + assert isinstance(mirror_error, tuf.BadHashError) # Restoring repositories to the initial state. os.remove(release_filepath_compressed) os.remove(os.path.join(self.client_current_dir, 'release.txt.gz')) self._remove_target_from_targets_dir(added_target_1) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - @@ -752,8 +764,6 @@ def test_2__ensure_not_expired(self): def test_4_refresh(self): - # Setup. - original_download = tuf.download.download_url_to_tempfileobj # This unit test is based on adding an extra target file to the # server and rebuilding all server-side metadata. When 'refresh' @@ -785,16 +795,10 @@ def test_4_refresh(self): self._mock_download_url_to_tempfileobj(self.all_role_paths) setup.build_server_repository(self.server_repo_dir, self.targets_dir) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - def test_4__refresh_targets_metadata(self): - - # Setup - original_download = tuf.download.download_url_to_tempfileobj # To test this method a target file would be added to a delegated role, # and metadata on the server side would be rebuilt. @@ -850,9 +854,6 @@ def test_4__refresh_targets_metadata(self): shutil.rmtree(os.path.join(self.server_repo_dir, 'keystore')) setup.build_server_repository(self.server_repo_dir, self.targets_dir) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - @@ -879,12 +880,9 @@ def test_3__targets_of_role(self): def test_5_all_targets(self): - - # Setup - original_download = tuf.download.download_url_to_tempfileobj # As with '_refresh_targets_metadata()', tuf.roledb._roledb_dict - # has to be populated. The 'tuf.download.download_url_to_tempfileobj' method + # has to be populated. The 'tuf.download.safe_download' method # should be patched. The 'self.all_role_paths' argument is passed so that # the top-level roles and delegations may be all "downloaded" when # Repository.refresh() is called below. '_mock_download_url_to_tempfileobj' @@ -911,9 +909,6 @@ def test_5_all_targets(self): # targets in 'all_targets' should then be 6. self.assertTrue(len(all_targets) is 6) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - @@ -941,7 +936,7 @@ def test_5_targets_of_role(self): def test_6_target(self): # Requirements: make sure roledb_dict is populated and - # tuf.download.download_url_to_tempfileobj function is patched. + # tuf.download.safe_download function is patched. # Setup targets_dir_content = os.listdir(self.targets_dir) @@ -962,7 +957,7 @@ def test_6_target(self): # Test: invalid target path. - self.assertRaises(tuf.RepositoryError, target, self.random_path()) + self.assertRaises(tuf.UnknownTargetError, target, self.random_path()) @@ -970,11 +965,8 @@ def test_6_target(self): def test_6_download_target(self): - - # Setup: - original_download = tuf.download.download_url_to_tempfileobj - # 'tuf.download.download_url_to_tempfileobj' method should be patched. + # 'tuf.download.safe_download' method should be patched. target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir) # Create temporary directory that will be passed as an argument to the @@ -1011,27 +1003,25 @@ def test_6_download_target(self): # Patch 'download.download_url_to_tempfileobj' and verify that an # exception is raised. self._mock_download_url_to_tempfileobj(os.path.join(self.targets_dir, file_path)) - self.assertRaises(tuf.DownloadError, self.Repository.download_target, - target_info, - dest_dir) + + try: + self.Repository.download_target(target_info, dest_dir) + except tuf.NoWorkingMirrorError, exception: + # Ensure that no mirrors were found due to mismatch in confined target + # directories. + assert len(exception.mirror_errors) == 0 for mirror_name, mirror_info in mirrors.items(): mirrors[mirror_name]['confined_target_dirs'] = [''] - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - def test_7_updated_targets(self): - # Setup: - original_download = tuf.download.download_url_to_tempfileobj - # In this test, client will have two target files. Server will modify # one of them. As with 'all_targets' function, tuf.roledb._roledb_dict - # has to be populated. 'tuf.download.download_url_to_tempfileobj' method + # has to be populated. 'tuf.download.safe_download' method # should be patched. target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir) @@ -1089,17 +1079,11 @@ def test_7_updated_targets(self): msg = 'A file that need not to be updated is indicated as updated.' self.fail(msg) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - def test_8_remove_obsolete_targets(self): - # Setup: - original_download = tuf.download.download_url_to_tempfileobj - # This unit test should be last, because it removes target files from the # server's targets directory. It is done to avoid adding files, rebuilding # and updating metadata. @@ -1148,9 +1132,6 @@ def test_8_remove_obsolete_targets(self): self.Repository.remove_obsolete_targets(dest_dir) self.assertTrue(os.listdir(dest_dir), 2) - # RESTORE - tuf.download.download_url_to_tempfileobj = original_download - def tearDownModule(): # tearDownModule() is called after all the tests have run. diff --git a/tuf/tests/test_util.py b/tuf/tests/test_util.py index 03da2b93..561e1cc7 100755 --- a/tuf/tests/test_util.py +++ b/tuf/tests/test_util.py @@ -293,7 +293,7 @@ def test_B6_load_json_file(self): util.json.dump(data, fileobj) fileobj.close() self.assertEquals(data, util.load_json_file(filepath)) - Errors = (tuf.FormatError, tuf.Error) + Errors = (tuf.FormatError, IOError) for bogus_arg in ['a', 1, ['a'], {'a':'b'}]: self.assertRaises(Errors, util.load_json_file, bogus_arg) diff --git a/tuf/util.py b/tuf/util.py index 3c72023a..cd422db8 100755 --- a/tuf/util.py +++ b/tuf/util.py @@ -249,6 +249,8 @@ def decompress_temp_file_object(self, compression): tuf.Error: If an invalid compression is given. + tuf.DecompressionError: If the compression failed for any reason. + 'self._orig_file' is used to store the original data of 'temporary_file'. @@ -266,10 +268,17 @@ def decompress_temp_file_object(self, compression): if compression != 'gzip': raise tuf.Error('Only gzip compression is supported.') + self.seek(0) self._compression = compression self._orig_file = self.temporary_file - self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb') + + try: + self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb') + except: + raise tuf.DecompressionError(self.temporary_file) + + @@ -519,7 +528,7 @@ def load_json_file(filepath): tuf.FormatError: If 'filepath' is improperly formatted. - tuf.Error: If 'filepath' could not be opened. + IOError in case of runtime IO exceptions. None. @@ -532,13 +541,18 @@ def load_json_file(filepath): # Making sure that the format of 'filepath' is a path string. # tuf.FormatError is raised on incorrect format. tuf.formats.PATH_SCHEMA.check_match(filepath) - - try: + + # The file is mostly likely gzipped. + if filepath.endswith('.gz'): + logger.debug('gzip.open('+str(filepath)+')') + fileobject = gzip.open(filepath) + else: + logger.debug('open('+str(filepath)+')') fileobject = open(filepath) - except IOError, err: - raise tuf.Error(err) try: return json.load(fileobject) finally: fileobject.close() + +