From 609bbe084e70fa08f54902fbfe501dcba48c8182 Mon Sep 17 00:00:00 2001 From: vladdd Date: Fri, 9 Aug 2013 08:29:57 -0400 Subject: [PATCH] Update tuf-spec.txt and implement "lazy bin walk" tuf-spec.txt was updated to include the latest metadata changes, such as version numbers, and the "lazy bin walk" scheme was implemented in updater.py. --- docs/tuf-spec.txt | 64 ++++++++++++++++++++-------------------- tuf/client/updater.py | 68 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/docs/tuf-spec.txt b/docs/tuf-spec.txt index 0c88c437..517d17e3 100644 --- a/docs/tuf-spec.txt +++ b/docs/tuf-spec.txt @@ -32,7 +32,10 @@ in all popular Linux package managers. More information and current versions of this document can be found at https://www.updateframework.com/ - The development of TUF is supported by GENI (http://www.geni.net/). + The Global Environment for Network Innovations (GENI) and the National + Science Foundation (NSF) have provided support for the development of TUF. + (http://www.geni.net/) + (http://www.nsf.gov/) TUF's Python implementation is based heavily on Thandy, the application updater for Tor (http://www.torproject.org/). Its design and this spec are @@ -409,26 +412,24 @@ 4.2. File formats: general principles All signed files are of the format: - { "signed" : X, + { "signed" : ROLE, "signatures" : [ - { "keyid" : K, - "method" : M, - "sig" : S } + { "keyid" : KEYID, + "method" : METHOD, + "sig" : SIGNATURE } , ... ] } - where: X is a list whose first element describes the signed object. - K is the identifier of a key signing the document - M is the method to be used to make the signature - S is a signature of the canonical encoding of X using the - identified key. + where: ROLE is a dictionary whose "_type" field describes the role type. + KEYID is the identifier of the key signing the ROLE dictionary. + METHOD is the key signing method used to generate the signature. + SIGNATURE is a signature of the canonical encoding of ROLE using the + signing key belonging to KEYID. We define one signing method at present: - sha256-pkcs1 : A base64 encoded signature of the SHA256 hash of the - canonical encoding of X, using PKCS-1 padding. + "evp" : An interface to OpenSSL's EVP functions. - All times are given as strings of the format "YYYY-MM-DD HH:MM:SS", - in UTC. + All times are given as strings of the format "YYYY-MM-DD HH:MM:SS UTC". All keys are of the format: { "keytype" : KEYTYPE, @@ -443,13 +444,12 @@ We define one keytype at present: 'rsa'. Its format is: { "keytype" : "rsa", - "keyval" : { "e" : E, - "n" : N } + "keyval" : { "public" : PUBLIC, + "private" : PRIVATE } } - where E and N are the binary representations of the exponent and - modulus, encoded as big-endian numbers in base64. All RSA keys must - be at least 2048 bits long. + where PUBLIC and PRIVATE are in PEM format and are strings. All RSA keys + must be at least 2048 bits long. 4.3. File formats: root.txt @@ -462,7 +462,7 @@ The format of root.txt is as follows: { "_type" : "Root", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "keys" : { KEYID : KEY @@ -474,12 +474,11 @@ , ... } } - The "ts" line describes when this file was updated. Clients - MUST NOT replace a file with an older one, and SHOULD NOT accept a - file too far in the future. + VERSION is an integer that is greater than 0. Clients MUST NOT replace a + metadata file with a version number less than the one currently trusted. - The "expires" line states when the metadata should be considered expired - and no longer trusted by clients. Clients MUST NOT trust an expired file. + EXPIRES determines when metadata should be considered expired and no longer + trusted by clients. Clients MUST NOT trust an expired file. A ROLE is one of "root", "release", "targets", "timestamp", or "mirrors". A role for each of "root", "release", "timestamp", and "targets" MUST be @@ -505,7 +504,7 @@ The format of release.txt is as follows: { "_type" : "Release", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -527,7 +526,7 @@ The format of targets.txt is as follows: { "_type" : "Targets", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "targets" : TARGETS, ("delegations" : DELEGATIONS) @@ -572,10 +571,9 @@ The "paths" list describes paths that the role is trusted to provide. Clients MUST check that a target is in one of the trusted paths of all roles in a delegation chain, not just in a trusted path of the role that describes - the target file. The format of a PATHPATTERN may be either a path to a - single file or a path to a directory and end with "/**" to indicate all - files under that directory. The value of "/**" by itself therefore means - all files. + the target file. The format of a PATHPATTERN may be either a path to a single + file, or a path to a directory to indicate all files and/or subdirectories + under that directory. We are currently investigating a few "priority tag" schemes to resolve conflicts between delegated roles that share responsibility for overlapping @@ -610,7 +608,7 @@ The format of the timestamp file is as follows: { "_type" : "Timestamp", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "meta" : METAFILES } @@ -628,7 +626,7 @@ The format of mirrors.txt is as follows: { "_type" : "Mirrorlist", - "ts" : TIME, + "version" : VERSION, "expires" : EXPIRES, "mirrors" : [ { "urlbase" : URLBASE, diff --git a/tuf/client/updater.py b/tuf/client/updater.py index dce742b5..acc6daf5 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1534,6 +1534,13 @@ def target(self, target_filepath): # Raise 'tuf.FormatError' if there is a mismatch. tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) + # The algorithm used by the repository to generate the hashes of the + # target filepaths. The repository may optionally organize + # targets into hashed bins to ease target delegations and role metadata + # management. The use of consistent hashing allows for a uniform + # distribution of targets into bins. + HASH_PATH_ALGORITHM = 'sha256' + # Ensure the client has the most up-to-date version of 'targets.txt'. # Raise 'tuf.MetadataNotAvailableError' if the changed metadata # cannot be successfully downloaded and 'tuf.RepositoryError' if the @@ -1545,12 +1552,23 @@ def target(self, target_filepath): # The target is assumed to be missing until proven otherwise. target = None + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository uses + # 'HASH_PATH_ALGORITHM' to generate hashes. + # TODO: Should the TUF spec restrict the repository to one particular + # algorithm? Should we allow the repository to specify in the role + # dictionary the algorithm used for these generated hashed paths? + digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM) + digest_object.update(target_filepath) + target_file_path_hash = digest_object.hexdigest() + try: current_metadata = self.metadata['current'] role_names = ['targets'] # Preorder depth-first traversal of the tree of target delegations. while len(role_names) > 0 and target is None: + # Pop the role name from the top of the stack. role_name = role_names.pop(-1) @@ -1575,20 +1593,48 @@ def target(self, target_filepath): break # Push children in reverse order of appearance onto the stack. + # NOTE: This may be a slow operation if there are many delegated roles + # or bins. for child_role in reversed(child_roles): child_role_name = child_role['name'] - child_role_paths = child_role['paths'] + child_role_paths = child_role.get('paths') + child_role_path_hash_prefix = child_role.get('path_hash_prefix') - # Ensure that we explore only delegated roles trusted with the target. - # We assume conservation of delegated paths in the complete tree of - # delegations. Note that the call to _ensure_all_targets_allowed in - # _update_metadata should already ensure that all targets metadata is - # valid; i.e. that the targets signed by a delegatee is a proper - # subset of the targets delegated to it by the delegator. - # Nevertheless, we check it again here for performance and safety - # reasons. - if target_filepath in child_role_paths: - role_names.append(child_role_name) + if child_role_path_hash_prefix is not None: + if target_file_path_hash.startswith(child_role_path_hash_prefix): + + # Found a matching path hash prefix. The metadata for + # 'child_role_name' will be retrieved on the next iteration + # of the while-loop. + role_names.append(child_role_name) + elif child_role_paths is not None: + + # Ensure that we explore only delegated roles trusted with the target. + # We assume conservation of delegated paths in the complete tree of + # delegations. Note that the call to _ensure_all_targets_allowed in + # _update_metadata should already ensure that all targets metadata is + # valid; i.e. that the targets signed by a delegatee is a proper + # subset of the targets delegated to it by the delegator. + # Nevertheless, we check it again here for performance and safety + # reasons. + for child_role_path in child_role_paths: + + # A child role path may be a filepath or directory. Explore + # directories which may contain 'target_filepath'. + prefix = os.path.commonprefix([target_filepath, child_role_path]) + if target_filepath in child_role_paths: + + # The metadata for 'child_role_name' will be retrieved on the next + # iteration of the while-loop. + role_names.append(child_role_name) + else: + + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefix' fields should not be missing, + # but log a warning if this else clause is reached. + message = repr(child_role)+' unexpectedly did not contain one of '+\ + 'the required fields ("paths" or "path_hash_prefix").' + logger.warn(message) except: raise finally: