diff --git a/docs/tuf-spec.txt b/docs/tuf-spec.txt index 06c83001..abd17cae 100644 --- a/docs/tuf-spec.txt +++ b/docs/tuf-spec.txt @@ -32,7 +32,10 @@ in all popular Linux package managers. More information and current versions of this document can be found at https://www.updateframework.com/ - The development of TUF is supported by GENI (http://www.geni.net/). + The Global Environment for Network Innovations (GENI) and the National + Science Foundation (NSF) have provided support for the development of TUF. + (http://www.geni.net/) + (http://www.nsf.gov/) TUF's Python implementation is based heavily on Thandy, the application updater for Tor (http://www.torproject.org/). Its design and this spec are @@ -409,26 +412,24 @@ 4.2. File formats: general principles All signed files are of the format: - { "signed" : X, + { "signed" : ROLE, "signatures" : [ - { "keyid" : K, - "method" : M, - "sig" : S } + { "keyid" : KEYID, + "method" : METHOD, + "sig" : SIGNATURE } , ... ] } - where: X is a list whose first element describes the signed object. - K is the identifier of a key signing the document - M is the method to be used to make the signature - S is a signature of the canonical encoding of X using the - identified key. + where: ROLE is a dictionary whose "_type" field describes the role type. + KEYID is the identifier of the key signing the ROLE dictionary. + METHOD is the key signing method used to generate the signature. + SIGNATURE is a signature of the canonical encoding of ROLE using the + signing key belonging to KEYID. We define one signing method at present: - sha256-pkcs1 : A base64 encoded signature of the SHA256 hash of the - canonical encoding of X, using PKCS-1 padding. + "evp" : An interface to OpenSSL's EVP functions. - All times are given as strings of the format "YYYY-MM-DD HH:MM:SS", - in UTC. + All times are given as strings of the format "YYYY-MM-DD HH:MM:SS UTC". All keys are of the format: { "keytype" : KEYTYPE, @@ -443,13 +444,12 @@ We define one keytype at present: 'rsa'. Its format is: { "keytype" : "rsa", - "keyval" : { "e" : E, - "n" : N } + "keyval" : { "public" : PUBLIC, + "private" : PRIVATE } } - where E and N are the binary representations of the exponent and - modulus, encoded as big-endian numbers in base64. All RSA keys must - be at least 2048 bits long. + where PUBLIC and PRIVATE are in PEM format and are strings. All RSA keys + must be at least 2048 bits long. 4.3. File formats: root.txt @@ -462,7 +462,7 @@ The format of root.txt is as follows: { "_type" : "Root", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "keys" : { KEYID : KEY @@ -474,12 +474,11 @@ , ... } } - The "ts" line describes when this file was updated. Clients - MUST NOT replace a file with an older one, and SHOULD NOT accept a - file too far in the future. + VERSION is an integer that is greater than 0. Clients MUST NOT replace a + metadata file with a version number less than the one currently trusted. - The "expires" line states when the metadata should be considered expired - and no longer trusted by clients. Clients MUST NOT trust an expired file. + EXPIRES determines when metadata should be considered expired and no longer + trusted by clients. Clients MUST NOT trust an expired file. A ROLE is one of "root", "release", "targets", "timestamp", or "mirrors". A role for each of "root", "release", "timestamp", and "targets" MUST be @@ -505,7 +504,7 @@ The format of release.txt is as follows: { "_type" : "Release", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -527,7 +526,7 @@ The format of targets.txt is as follows: { "_type" : "Targets", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "targets" : TARGETS, ("delegations" : DELEGATIONS) @@ -575,11 +574,14 @@ The "paths" list describes paths that the role is trusted to provide. Clients MUST check that a target is in one of the trusted paths of all roles in a delegation chain, not just in a trusted path of the role that describes - the target file. The format of a PATHPATTERN may be either a path to a - single file or a path to a directory. A path to a directory is used to - indicate all possible targets sharing that directory as a prefix; e.g. if - the directory is "targets/A", then targets which match that directory - include "targets/A/B.txt" and "targets/A/B/C.txt". + the target file. The format of a PATHPATTERN may be either a path to a single + file, or a path to a directory to indicate all files and/or subdirectories + under that directory. + + A path to a directory is used to indicate all possible targets sharing that + directory as a prefix; e.g. if the directory is "targets/A", then targets + which match that directory include "targets/A/B.txt" and + "targets/A/B/C.txt". The "path_hash_prefix" is used to succinctly describe a set of target paths. The target paths must meet this condition: each target path, when hashed @@ -588,6 +590,10 @@ split a large number of targets into separate bins identified by consistent hashing. + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role dictionary + the algorithm used for these generated hashed paths? + We are currently investigating a few "priority tag" schemes to resolve conflicts between delegated roles that share responsibility for overlapping target paths. One of the simplest of such schemes is for the client to @@ -621,7 +627,7 @@ The format of the timestamp file is as follows: { "_type" : "Timestamp", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -639,7 +645,7 @@ The format of mirrors.txt is as follows: { "_type" : "Mirrorlist", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "mirrors" : [ { "urlbase" : URLBASE, diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 30880664..835d7701 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -726,8 +726,7 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None): # Reject the metadata if any specified targets are not allowed. if metadata_signable['signed']['_type'] == 'Targets': - #self._ensure_all_targets_allowed(metadata_role, metadata_signable['signed']) - pass + self._ensure_all_targets_allowed(metadata_role, metadata_signable['signed']) # The metadata has been verified. Move the metadata file into place. # First, move the 'current' metadata file to the 'previous' directory @@ -916,7 +915,13 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): under 'paths'. A parent role may delegate trust to all files under a particular directory, including files in subdirectories, by simply listing the directory (e.g., 'packages/source/Django/', the equivalent - of 'packages/source/Django/*'). + of 'packages/source/Django/*'). Targets listed in hashed bins are + also validated (i.e., its calculated path hash prefix must be delegated + by the parent role. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? metadata_role: @@ -932,7 +937,8 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): tuf.RepositoryError: If the targets of 'metadata_role' are not allowed according to - the parent's metadata file. + the parent's metadata file. The 'paths' and 'path_hash_prefix' fields + are verified. None. @@ -941,7 +947,7 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): None. """ - + # Return if 'metadata_role' is 'targets'. 'targets' is not # a delegated role. if metadata_role == 'targets': @@ -959,29 +965,54 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): role_index = tuf.repo.signerlib.find_delegated_role(roles, metadata_role) # Ensure the delegated role exists prior to extracting trusted paths - # from the parent's 'paths'. + # from the parent's 'paths', or trusted path hash prefixes from the parent's + # 'path_hash_prefix'. if role_index is not None: role = roles[role_index] - allowed_child_paths = role['paths'] + allowed_child_paths = role.get('paths') + allowed_child_path_hash_prefix = role.get('path_hash_prefix') actual_child_targets = metadata_object['targets'].keys() - - # Check that each delegated target is either explicitly listed or a - # parent directory is found under role['paths'], otherwise raise an - # exception. If the parent role explicitly lists target file paths in - # 'paths', this loop will run in O(n^2). The repository maintainer will - # likely delegate entire directories, and opt for explicit file paths if - # the targets in a directory are delegated to different roles/developers. - for child_target in actual_child_targets: - for allowed_child_path in allowed_child_paths: - prefix = os.path.commonprefix([child_target, allowed_child_path]) - if prefix == allowed_child_path: - break - else: - message = 'Role '+repr(metadata_role)+' specifies target '+\ - repr(child_target)+' which is not an allowed path according '+\ - 'to the delegations set by '+repr(parent_role)+'.' - raise tuf.RepositoryError(message) + if allowed_child_path_hash_prefix is not None: + for child_target in actual_child_targets: + # Calculate the hash of 'child_target' to determine if it has been + # placed in the correct bin. + child_target_path_hash = self._get_target_hash(child_target) + + if not child_target_path_hash.startswith(allowed_child_path_hash_prefix): + message = 'Role '+repr(metadata_role)+' specifies target '+\ + repr(child_target)+ ' which does not have a path hash prefix '+\ + 'matching the prefix listed by the parent role '+\ + repr(parent_role)+'.' + raise tuf.RepositoryError(message) + elif allowed_child_paths is not None: + + # Check that each delegated target is either explicitly listed or a parent + # directory is found under role['paths'], otherwise raise an exception. + # If the parent role explicitly lists target file paths in 'paths', + # this loop will run in O(n^2), the worst-case. The repository + # maintainer will likely delegate entire directories, and opt for + # explicit file paths if the targets in a directory are delegated to + # different roles/developers. + for child_target in actual_child_targets: + for allowed_child_path in allowed_child_paths: + prefix = os.path.commonprefix([child_target, allowed_child_path]) + if prefix == allowed_child_path: + break + else: + message = 'Role '+repr(metadata_role)+' specifies target '+\ + repr(child_target)+' which is not an allowed path according '+\ + 'to the delegations set by '+repr(parent_role)+'.' + raise tuf.RepositoryError(message) + else: + + # 'role' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefix' fields should not be missing, + # so log a warning if this else clause is reached. + message = repr(role)+' unexpectedly did not contain one of '+\ + 'the required fields ("paths" or "path_hash_prefix").' + logger.warn(message) + # Raise an exception if the parent has not delegated to the specified # 'metadata_role' child role. else: @@ -1017,7 +1048,7 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo): dict conforms to 'tuf.formats.FILEINFO_SCHEMA' and has the form: {'length': 23423 - 'hashes': {'sha256': adfbc32343..}} + 'hashes': {'sha256': /dfbc32343..}} None. @@ -1500,10 +1531,7 @@ def targets_of_role(self, rolename='targets'): def target(self, target_filepath): """ - Return the target file information for 'target_filepath'. We interrogate - the tree of target delegations in order of appearance (which implicitly - order trustworthiness), and return the matching target found in the most - trusted role. + Return the target file information for 'target_filepath'. target_filepath: @@ -1533,6 +1561,54 @@ def target(self, target_filepath): # Raise 'tuf.FormatError' if there is a mismatch. tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) + # Get target by looking at roles in order of priority tags. + target = self._preorder_depth_first_walk(target_filepath) + + # Raise an exception if the target information could not be retrieved. + if target is None: + message = target_filepath+' not found.' + logger.error(message) + raise tuf.RepositoryError(message) + # Otherwise, return the found target. + else: + return target + + + + + + def _preorder_depth_first_walk(self, target_filepath): + """ + + Interrogate the tree of target delegations in order of appearance (which + implicitly order trustworthiness), and return the matching target + found in the most trusted role. + + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + tuf.FormatError: + If 'target_filepath' is improperly formatted. + + tuf.RepositoryError: + If 'target_filepath' was not found. + + + The metadata for updated delegated roles are downloaded and stored. + + + The target information for 'target_filepath', conformant to + 'tuf.formats.TARGETFILE_SCHEMA'. + + """ + + target = None + current_metadata = self.metadata['current'] + role_names = ['targets'] + # Ensure the client has the most up-to-date version of 'targets.txt'. # Raise 'tuf.MetadataNotAvailableError' if the changed metadata # cannot be successfully downloaded and 'tuf.RepositoryError' if the @@ -1541,21 +1617,9 @@ def target(self, target_filepath): # updater.refresh()). self._update_metadata_if_changed('targets') - # The target is assumed to be missing until proven otherwise. - target = None - - # According to the specification, the target_filepath must be hashed with - # the SHA256 hash function in order to be compared with the - # "path_hash_prefix" attribute. - target_filepath_digest = tuf.hash.digest(algorithm='sha256') - target_filepath_digest.update(target_filepath) - target_filepath_hash = target_filepath_digest.hexdigest() - - current_metadata = self.metadata['current'] - role_names = ['targets'] - # Preorder depth-first traversal of the tree of target delegations. while len(role_names) > 0 and target is None: + # Pop the role name from the top of the stack. role_name = role_names.pop(-1) @@ -1566,58 +1630,215 @@ def target(self, target_filepath): # expects _update_metadata_if_changed() to have already refreshed it, # which this function has checked above. self._refresh_targets_metadata(role_name, include_delegations=False) + role_metadata = current_metadata[role_name] targets = role_metadata['targets'] delegations = role_metadata.get('delegations', {}) child_roles = delegations.get('roles', []) + target = self._get_target_from_targets_role(role_name, targets, + target_filepath) - # Does the current role name have our target? - logger.info('Asking role '+role_name+' about target '+target_filepath) - for filepath, fileinfo in targets.iteritems(): - if filepath == target_filepath: - logger.info('Found target '+target_filepath+' in role '+role_name) - target = {'filepath': filepath, 'fileinfo': fileinfo} - break + if target is None: - # Push children in reverse order of appearance onto the stack. - for child_role in reversed(child_roles): - child_role_name = child_role['name'] - child_role_paths = child_role.get('paths') - child_role_path_hash_prefix = child_role.get('path_hash_prefix') - - # Ensure that we explore only delegated roles trusted with the target. - # We assume conservation of delegated paths in the complete tree of - # delegations. Note that the call to _ensure_all_targets_allowed in - # _update_metadata should already ensure that all targets metadata is - # valid; i.e. that the targets signed by a delegatee is a proper - # subset of the targets delegated to it by the delegator. - # Nevertheless, we check it again here for performance and safety - # reasons. - - if child_role_path_hash_prefix is not None: - if target_filepath_hash.startswith(child_role_path_hash_prefix): + # Push children in reverse order of appearance onto the stack. + # NOTE: This may be a slow operation if there are many delegated roles. + for child_role in reversed(child_roles): + child_role_name = self._visit_child_role(child_role, target_filepath) + if child_role_name is None: + logger.debug('Skipping child role '+repr(child_role_name)) + else: + logger.info('Adding child role '+repr(child_role_name)) role_names.append(child_role_name) - elif child_role_paths is not None: - # TODO: is child_role_paths directories or paths? - for child_role_path in child_role_paths: - if child_role_path.endswith('/'): - if target_filepath.startswith(child_role_path): - role_names.append(child_role_name) - else: - if target_filepath == child_role_path: - role_names.append(child_role_name) - else: - raise tuf.RepositoryError(str(child_role_name)+' has neither ' \ - '"paths" nor "path_hash_prefix"!') - # Raise an exception if the target information could not be retrieved. - if target is None: - message = target_filepath+' not found.' - logger.error(message) - raise tuf.RepositoryError(message) - # Otherwise, return the found target. + else: + logger.info('Found target in current role '+repr(role_name)) + + return target + + + + + + def _get_target_from_targets_role(self, role_name, targets, target_filepath): + """ + + Determine whether the targets role with the given 'role_name' has the + target with the name 'target_filepath'. + + + role_name: + The name of the targets role that we are inspecting. + + targets: + The targets of the Targets role with the name 'role_name'. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + The target information for 'target_filepath', conformant to + 'tuf.formats.TARGETFILE_SCHEMA'. + + """ + + target = None + + # Does the current role name have our target? + logger.info('Asking role '+role_name+' about target '+target_filepath) + for filepath, fileinfo in targets.iteritems(): + if filepath == target_filepath: + logger.info('Found target '+target_filepath+' in role '+role_name) + target = {'filepath': filepath, 'fileinfo': fileinfo} + break + else: + logger.debug('No target '+target_filepath+' in role '+role_name) + + return target + + + + + + + def _visit_child_role(self, child_role, target_filepath): + """ + + Determine whether the given 'child_role' has been delegated the target + with the name 'target_filepath'. + + Ensure that we explore only delegated roles trusted with the target. We + assume conservation of delegated paths in the complete tree of + delegations. Note that the call to _ensure_all_targets_allowed in + _update_metadata should already ensure that all targets metadata is + valid; i.e. that the targets signed by a delegatee is a proper subset of + the targets delegated to it by the delegator. Nevertheless, we check it + again here for performance and safety reasons. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefix, keys and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + + """ + + child_role_name = child_role['name'] + child_role_paths = child_role.get('paths') + child_role_path_hash_prefix = child_role.get('path_hash_prefix') + # A boolean indicator that tell us whether 'child_role' has been delegated + # the target with the name 'target_filepath'. + child_role_is_relevant = False + + if child_role_path_hash_prefix is not None: + target_filepath_hash = self._get_target_hash(target_filepath) + + if target_filepath_hash.startswith(child_role_path_hash_prefix): + logger.info('Child role '+repr(child_role_name)+' has target '+ + repr(target_filepath)) + child_role_is_relevant = True + else: + logger.debug('Child role '+repr(child_role_name)+ + ' does not have target '+repr(target_filepath)) + + elif child_role_paths is not None: + + for child_role_path in child_role_paths: + + # A child role path may be a filepath or directory. The child + # role 'child_role_name' is added if 'target_filepath' is located + # under 'child_role_path'. Explicit filepaths are also added. + prefix = os.path.commonprefix([target_filepath, child_role_path]) + + if prefix == child_role_path: + logger.info('Child role '+repr(child_role_name)+' has target '+ + repr(target_filepath)) + child_role_is_relevant = True + else: + logger.debug('Child role '+repr(child_role_name)+ + ' does not have target '+repr(target_filepath)) + else: - return target + + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefix' fields should not be missing, + # so log a warning if this else clause is reached. + raise tuf.FormatError(repr(child_role_name)+' has neither ' \ + '"paths" nor "path_hash_prefix"!') + + if child_role_is_relevant: + return child_role_name + else: + return None + + + + + + def _get_target_hash(self, target_filepath, hash_function='sha256'): + """ + + Compute the hash of 'target_filepath'. This is useful in conjunction with + the "path_hash_prefix" attribute in a delegated targets role, which tells + us which paths it is implicitly responsible for. + + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + hash_function: + The algorithm used by the repository to generate the hashes of the + target filepaths. The repository may optionally organize targets into + hashed bins to ease target delegations and role metadata management. + The use of consistent hashing allows for a uniform distribution of + targets into bins. + + + None. + + + None. + + + The hash of 'target_filepath'. + + """ + + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository uses + # 'hash_function' to generate hashes. + + digest_object = tuf.hash.digest(hash_function) + digest_object.update(target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash