From 45203d25d162f0a54ecab9130b86f9866c0e3d1f Mon Sep 17 00:00:00 2001 From: vladdd Date: Fri, 9 Aug 2013 08:29:57 -0400 Subject: [PATCH 1/3] Update tuf-spec.txt and implement "lazy bin walk" tuf-spec.txt was updated to include the latest metadata changes, such as version numbers, and the "lazy bin walk" scheme was implemented in updater.py. --- docs/tuf-spec.txt | 64 ++++++++++++++++++++-------------------- tuf/client/updater.py | 68 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/docs/tuf-spec.txt b/docs/tuf-spec.txt index 0c88c437..517d17e3 100644 --- a/docs/tuf-spec.txt +++ b/docs/tuf-spec.txt @@ -32,7 +32,10 @@ in all popular Linux package managers. More information and current versions of this document can be found at https://www.updateframework.com/ - The development of TUF is supported by GENI (http://www.geni.net/). + The Global Environment for Network Innovations (GENI) and the National + Science Foundation (NSF) have provided support for the development of TUF. + (http://www.geni.net/) + (http://www.nsf.gov/) TUF's Python implementation is based heavily on Thandy, the application updater for Tor (http://www.torproject.org/). Its design and this spec are @@ -409,26 +412,24 @@ 4.2. File formats: general principles All signed files are of the format: - { "signed" : X, + { "signed" : ROLE, "signatures" : [ - { "keyid" : K, - "method" : M, - "sig" : S } + { "keyid" : KEYID, + "method" : METHOD, + "sig" : SIGNATURE } , ... ] } - where: X is a list whose first element describes the signed object. - K is the identifier of a key signing the document - M is the method to be used to make the signature - S is a signature of the canonical encoding of X using the - identified key. + where: ROLE is a dictionary whose "_type" field describes the role type. + KEYID is the identifier of the key signing the ROLE dictionary. + METHOD is the key signing method used to generate the signature. + SIGNATURE is a signature of the canonical encoding of ROLE using the + signing key belonging to KEYID. We define one signing method at present: - sha256-pkcs1 : A base64 encoded signature of the SHA256 hash of the - canonical encoding of X, using PKCS-1 padding. + "evp" : An interface to OpenSSL's EVP functions. - All times are given as strings of the format "YYYY-MM-DD HH:MM:SS", - in UTC. + All times are given as strings of the format "YYYY-MM-DD HH:MM:SS UTC". All keys are of the format: { "keytype" : KEYTYPE, @@ -443,13 +444,12 @@ We define one keytype at present: 'rsa'. Its format is: { "keytype" : "rsa", - "keyval" : { "e" : E, - "n" : N } + "keyval" : { "public" : PUBLIC, + "private" : PRIVATE } } - where E and N are the binary representations of the exponent and - modulus, encoded as big-endian numbers in base64. All RSA keys must - be at least 2048 bits long. + where PUBLIC and PRIVATE are in PEM format and are strings. All RSA keys + must be at least 2048 bits long. 4.3. File formats: root.txt @@ -462,7 +462,7 @@ The format of root.txt is as follows: { "_type" : "Root", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "keys" : { KEYID : KEY @@ -474,12 +474,11 @@ , ... } } - The "ts" line describes when this file was updated. Clients - MUST NOT replace a file with an older one, and SHOULD NOT accept a - file too far in the future. + VERSION is an integer that is greater than 0. Clients MUST NOT replace a + metadata file with a version number less than the one currently trusted. - The "expires" line states when the metadata should be considered expired - and no longer trusted by clients. Clients MUST NOT trust an expired file. + EXPIRES determines when metadata should be considered expired and no longer + trusted by clients. Clients MUST NOT trust an expired file. A ROLE is one of "root", "release", "targets", "timestamp", or "mirrors". A role for each of "root", "release", "timestamp", and "targets" MUST be @@ -505,7 +504,7 @@ The format of release.txt is as follows: { "_type" : "Release", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -527,7 +526,7 @@ The format of targets.txt is as follows: { "_type" : "Targets", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "targets" : TARGETS, ("delegations" : DELEGATIONS) @@ -572,10 +571,9 @@ The "paths" list describes paths that the role is trusted to provide. Clients MUST check that a target is in one of the trusted paths of all roles in a delegation chain, not just in a trusted path of the role that describes - the target file. The format of a PATHPATTERN may be either a path to a - single file or a path to a directory and end with "/**" to indicate all - files under that directory. The value of "/**" by itself therefore means - all files. + the target file. The format of a PATHPATTERN may be either a path to a single + file, or a path to a directory to indicate all files and/or subdirectories + under that directory. We are currently investigating a few "priority tag" schemes to resolve conflicts between delegated roles that share responsibility for overlapping @@ -610,7 +608,7 @@ The format of the timestamp file is as follows: { "_type" : "Timestamp", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -628,7 +626,7 @@ The format of mirrors.txt is as follows: { "_type" : "Mirrorlist", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "mirrors" : [ { "urlbase" : URLBASE, diff --git a/tuf/client/updater.py b/tuf/client/updater.py index dce742b5..acc6daf5 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1534,6 +1534,13 @@ def target(self, target_filepath): # Raise 'tuf.FormatError' if there is a mismatch. tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) + # The algorithm used by the repository to generate the hashes of the + # target filepaths. The repository may optionally organize + # targets into hashed bins to ease target delegations and role metadata + # management. The use of consistent hashing allows for a uniform + # distribution of targets into bins. + HASH_PATH_ALGORITHM = 'sha256' + # Ensure the client has the most up-to-date version of 'targets.txt'. # Raise 'tuf.MetadataNotAvailableError' if the changed metadata # cannot be successfully downloaded and 'tuf.RepositoryError' if the @@ -1545,12 +1552,23 @@ def target(self, target_filepath): # The target is assumed to be missing until proven otherwise. target = None + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository uses + # 'HASH_PATH_ALGORITHM' to generate hashes. + # TODO: Should the TUF spec restrict the repository to one particular + # algorithm? Should we allow the repository to specify in the role + # dictionary the algorithm used for these generated hashed paths? + digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM) + digest_object.update(target_filepath) + target_file_path_hash = digest_object.hexdigest() + try: current_metadata = self.metadata['current'] role_names = ['targets'] # Preorder depth-first traversal of the tree of target delegations. while len(role_names) > 0 and target is None: + # Pop the role name from the top of the stack. role_name = role_names.pop(-1) @@ -1575,20 +1593,48 @@ def target(self, target_filepath): break # Push children in reverse order of appearance onto the stack. + # NOTE: This may be a slow operation if there are many delegated roles + # or bins. for child_role in reversed(child_roles): child_role_name = child_role['name'] - child_role_paths = child_role['paths'] + child_role_paths = child_role.get('paths') + child_role_path_hash_prefix = child_role.get('path_hash_prefix') - # Ensure that we explore only delegated roles trusted with the target. - # We assume conservation of delegated paths in the complete tree of - # delegations. Note that the call to _ensure_all_targets_allowed in - # _update_metadata should already ensure that all targets metadata is - # valid; i.e. that the targets signed by a delegatee is a proper - # subset of the targets delegated to it by the delegator. - # Nevertheless, we check it again here for performance and safety - # reasons. - if target_filepath in child_role_paths: - role_names.append(child_role_name) + if child_role_path_hash_prefix is not None: + if target_file_path_hash.startswith(child_role_path_hash_prefix): + + # Found a matching path hash prefix. The metadata for + # 'child_role_name' will be retrieved on the next iteration + # of the while-loop. + role_names.append(child_role_name) + elif child_role_paths is not None: + + # Ensure that we explore only delegated roles trusted with the target. + # We assume conservation of delegated paths in the complete tree of + # delegations. Note that the call to _ensure_all_targets_allowed in + # _update_metadata should already ensure that all targets metadata is + # valid; i.e. that the targets signed by a delegatee is a proper + # subset of the targets delegated to it by the delegator. + # Nevertheless, we check it again here for performance and safety + # reasons. + for child_role_path in child_role_paths: + + # A child role path may be a filepath or directory. Explore + # directories which may contain 'target_filepath'. + prefix = os.path.commonprefix([target_filepath, child_role_path]) + if target_filepath in child_role_paths: + + # The metadata for 'child_role_name' will be retrieved on the next + # iteration of the while-loop. + role_names.append(child_role_name) + else: + + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefix' fields should not be missing, + # but log a warning if this else clause is reached. + message = repr(child_role)+' unexpectedly did not contain one of '+\ + 'the required fields ("paths" or "path_hash_prefix").' + logger.warn(message) except: raise finally: From f214d9019e1938c586596b112bddbf28f13d3f15 Mon Sep 17 00:00:00 2001 From: vladdd Date: Fri, 9 Aug 2013 10:43:26 -0400 Subject: [PATCH 2/3] Expand comment and add missing prefix comparison in updater.target() --- tuf/client/updater.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tuf/client/updater.py b/tuf/client/updater.py index acc6daf5..5fe9d0a9 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1619,10 +1619,11 @@ def target(self, target_filepath): # reasons. for child_role_path in child_role_paths: - # A child role path may be a filepath or directory. Explore - # directories which may contain 'target_filepath'. + # A child role path may be a filepath or directory. The child + # role 'child_role_name' is added if 'target_filepath' is located + # under 'child_role_path'. Explicit filepaths are also added. prefix = os.path.commonprefix([target_filepath, child_role_path]) - if target_filepath in child_role_paths: + if prefix == child_role_path: # The metadata for 'child_role_name' will be retrieved on the next # iteration of the while-loop. From e5731749bfa1f3f715a7e8426273b287652d39cc Mon Sep 17 00:00:00 2001 From: vladdd Date: Fri, 9 Aug 2013 12:13:01 -0400 Subject: [PATCH 3/3] Modify _ensure_all_targets_allowed() to also work with path_hash_prefix --- tuf/client/updater.py | 88 +++++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 5fe9d0a9..83f5f878 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -912,7 +912,9 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): under 'paths'. A parent role may delegate trust to all files under a particular directory, including files in subdirectories, by simply listing the directory (e.g., 'packages/source/Django/', the equivalent - of 'packages/source/Django/*'). + of 'packages/source/Django/*'). Targets listed in hashed bins are + also validated (i.e., its calculated path hash prefix must be delegated + by the parent role. metadata_role: @@ -928,7 +930,8 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): tuf.RepositoryError: If the targets of 'metadata_role' are not allowed according to - the parent's metadata file. + the parent's metadata file. The 'paths' and 'path_hash_prefix' fields + are verified. None. @@ -938,6 +941,13 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): """ + # The algorithm used by the repository to generate the hashes of the + # target filepaths. The repository may optionally organize + # targets into hashed bins to ease target delegations and role metadata + # management. The use of consistent hashing allows for a uniform + # distribution of targets into bins. + HASH_PATH_ALGORITHM = 'sha256' + # Return if 'metadata_role' is 'targets'. 'targets' is not # a delegated role. if metadata_role == 'targets': @@ -955,30 +965,60 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object): role_index = tuf.repo.signerlib.find_delegated_role(roles, metadata_role) # Ensure the delegated role exists prior to extracting trusted paths - # from the parent's 'paths'. + # from the parent's 'paths', or trusted path hash prefixes from the parent's + # 'path_hash_prefix'. if role_index is not None: role = roles[role_index] - allowed_child_paths = role['paths'] + allowed_child_paths = role.get('paths') + allowed_child_path_hash_prefix = role.get('path_hash_prefix') actual_child_targets = metadata_object['targets'].keys() - - # Check that each delegated target is either explicitly listed or a parent - # directory is found under role['paths'], otherwise raise an exception. - # If the parent role explicitly lists target file paths in 'paths', - # this loop will run in O(n^2), the worst-case. The repository - # maintainer will likely delegate entire directories, and opt for - # explicit file paths if the targets in a directory are delegated to - # different roles/developers. - for child_target in actual_child_targets: - for allowed_child_path in allowed_child_paths: - prefix = os.path.commonprefix([child_target, allowed_child_path]) - if prefix == allowed_child_path: - break - else: - message = 'Role '+repr(metadata_role)+' specifies target '+\ - repr(child_target)+' which is not an allowed path according '+\ - 'to the delegations set by '+repr(parent_role)+'.' - raise tuf.RepositoryError(message) + if allowed_child_path_hash_prefix is not None: + for child_target in actual_child_targets: + # Calculate the hash of 'child_target' to determine if it has been + # placed in the correct bin. The client currently assumes the + # repository uses 'HASH_PATH_ALGORITHM' to generate hashes. + # TODO: Should the TUF spec restrict the repository to one particular + # algorithm? Should we allow the repository to specify in the role + # dictionary the algorithm used for these generated hashed paths? + digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM) + digest_object.update(child_target) + child_target_path_hash = digest_object.hexdigest() + + if not child_target_path_hash.startswith(allowed_child_path_hash_prefix): + message = 'Role '+repr(metadata_role)+' specifies target '+\ + repr(child_target)+ ' which does not have a path hash prefix '+\ + 'matching the prefix listed by the parent role '+\ + repr(parent_role)+'.' + raise tuf.RepositoryError(message) + elif allowed_child_paths is not None: + + # Check that each delegated target is either explicitly listed or a parent + # directory is found under role['paths'], otherwise raise an exception. + # If the parent role explicitly lists target file paths in 'paths', + # this loop will run in O(n^2), the worst-case. The repository + # maintainer will likely delegate entire directories, and opt for + # explicit file paths if the targets in a directory are delegated to + # different roles/developers. + for child_target in actual_child_targets: + for allowed_child_path in allowed_child_paths: + prefix = os.path.commonprefix([child_target, allowed_child_path]) + if prefix == allowed_child_path: + break + else: + message = 'Role '+repr(metadata_role)+' specifies target '+\ + repr(child_target)+' which is not an allowed path according '+\ + 'to the delegations set by '+repr(parent_role)+'.' + raise tuf.RepositoryError(message) + else: + + # 'role' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefix' fields should not be missing, + # so log a warning if this else clause is reached. + message = repr(role)+' unexpectedly did not contain one of '+\ + 'the required fields ("paths" or "path_hash_prefix").' + logger.warn(message) + # Raise an exception if the parent has not delegated to the specified # 'metadata_role' child role. else: @@ -1014,7 +1054,7 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo): dict conforms to 'tuf.formats.FILEINFO_SCHEMA' and has the form: {'length': 23423 - 'hashes': {'sha256': adfbc32343..}} + 'hashes': {'sha256': /dfbc32343..}} None. @@ -1632,7 +1672,7 @@ def target(self, target_filepath): # 'role_name' should have been validated when it was downloaded. # The 'paths' or 'path_hash_prefix' fields should not be missing, - # but log a warning if this else clause is reached. + # so log a warning if this else clause is reached. message = repr(child_role)+' unexpectedly did not contain one of '+\ 'the required fields ("paths" or "path_hash_prefix").' logger.warn(message)