#!/usr/bin/env python """ formats.py Geremy Condra Vladimir Diaz Refactored April 30, 2012. -vladimir.v.diaz See LICENSE for licensing information. A central location for all format-related checking of TUF objects. Note: 'formats.py' depends heavily on 'schema.py', so the 'schema.py' module should be read and understood before tackling this module. 'formats.py' can be broken down into three sections. (1) Schemas and object matching. (2) Classes that represent Role Metadata and help produce correctly formatted files. (3) Functions that help produce or verify TUF objects. The first section deals with schemas and object matching based on format. There are two ways of checking the format of objects. The first method raises a 'tuf.FormatError' exception if the match fails and the other returns a Boolean result. tuf.formats..check_match(object) tuf.formats..matches(object) Example: rsa_key = {'keytype': 'rsa' 'keyid': 34892fc465ac76bc3232fab 'keyval': {'public': 'public_key', 'private': 'private_key'} tuf.formats.RSAKEY_SCHEMA.check_match(rsa_key) tuf.formats.RSAKEY_SCHEMA.matches(rsa_key) In this example, if a dict key or dict value is missing or incorrect, the match fails. There are numerous variations of object checking provided by 'formats.py' and 'schema.py'. The second section deals with the role metadata classes. There are multiple top-level roles, each with differing metadata formats. Example: root_object = tuf.formats.RootFile.from_metadata(root_metadata_file) targets_metadata = tuf.formats.TargetsFile.make_metadata(...) The input and output of these classes are checked against their respective schema to ensure correctly formatted metadata. The last section contains miscellaneous functions related to the format of TUF objects. Example: signable_object = make_signable(unsigned_object) """ # Help with Python 3 compatibility, where the print statement is a function, an # implicit relative import is invalid, and the '/' operator performs true # division. Example: print 'hello world' raises a 'SyntaxError' exception. from __future__ import print_function from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals import binascii import calendar import re import string import datetime import time import tuf import tuf.schema as SCHEMA import six # Note that in the schema definitions below, the 'SCHEMA.Object' types allow # additional keys which are not defined. Thus, any additions to them will be # easily backwards compatible with clients that are already deployed. # A datetime in 'YYYY-MM-DDTHH:MM:SSZ' ISO 8601 format. The "Z" zone designator # for the zero UTC offset is always used (i.e., a numerical offset is not # supported.) Example: '2015-10-21T13:20:00Z'. Note: This is a simple format # check, and an ISO8601 string should be fully verified when it is parsed. ISO8601_DATETIME_SCHEMA = SCHEMA.RegularExpression(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z') # A Unix/POSIX time format. An integer representing the number of seconds # since the epoch (January 1, 1970.) Metadata uses this format for the # 'expires' field. Set 'hi' to the upper timestamp limit (year 2038), the max # value of an int. UNIX_TIMESTAMP_SCHEMA = SCHEMA.Integer(lo=0, hi=2147483647) # A hexadecimal value in '23432df87ab..' format. HASH_SCHEMA = SCHEMA.RegularExpression(r'[a-fA-F0-9]+') # A dict in {'sha256': '23432df87ab..', 'sha512': '34324abc34df..', ...} format. HASHDICT_SCHEMA = SCHEMA.DictOf( key_schema = SCHEMA.AnyString(), value_schema = HASH_SCHEMA) # A hexadecimal value in '23432df87ab..' format. HEX_SCHEMA = SCHEMA.RegularExpression(r'[a-fA-F0-9]+') # A key identifier (e.g., a hexadecimal value identifying an RSA key). KEYID_SCHEMA = HASH_SCHEMA # A list of KEYID_SCHEMA. KEYIDS_SCHEMA = SCHEMA.ListOf(KEYID_SCHEMA) # The method used for a generated signature (e.g., 'RSASSA-PSS'). SIG_METHOD_SCHEMA = SCHEMA.AnyString() # A relative file path (e.g., 'metadata/root/'). RELPATH_SCHEMA = SCHEMA.AnyString() RELPATHS_SCHEMA = SCHEMA.ListOf(RELPATH_SCHEMA) # An absolute path. PATH_SCHEMA = SCHEMA.AnyString() PATHS_SCHEMA = SCHEMA.ListOf(PATH_SCHEMA) # Uniform Resource Locator identifier (e.g., 'https://www.updateframework.com/'). URL_SCHEMA = SCHEMA.AnyString() # A dictionary holding version information. VERSION_SCHEMA = SCHEMA.Object( object_name = 'VERSION_SCHEMA', major = SCHEMA.Integer(lo=0), minor = SCHEMA.Integer(lo=0), fix = SCHEMA.Integer(lo=0)) # An integer representing the numbered version of a metadata file. # Must be 1, or greater. METADATAVERSION_SCHEMA = SCHEMA.Integer(lo=0) # An integer representing length. Must be 0, or greater. LENGTH_SCHEMA = SCHEMA.Integer(lo=0) # An integer representing logger levels, such as logging.CRITICAL (=50). # Must be between 0 and 50. LOGLEVEL_SCHEMA = SCHEMA.Integer(lo=0, hi=50) # A string representing a named object. NAME_SCHEMA = SCHEMA.AnyString() NAMES_SCHEMA = SCHEMA.ListOf(NAME_SCHEMA) # A byte string representing data. DATA_SCHEMA = SCHEMA.AnyBytes() # Supported hash algorithms. HASHALGORITHMS_SCHEMA = SCHEMA.ListOf(SCHEMA.OneOf( [SCHEMA.String('md5'), SCHEMA.String('sha1'), SCHEMA.String('sha224'), SCHEMA.String('sha256'), SCHEMA.String('sha384'), SCHEMA.String('sha512')])) # The contents of an encrypted TUF key. Encrypted TUF keys are saved to files # in this format. ENCRYPTEDKEY_SCHEMA = SCHEMA.AnyBytes() # A value that is either True or False, on or off, etc. BOOLEAN_SCHEMA = SCHEMA.Boolean() # A role's threshold value (i.e., the minimum number # of signatures required to sign a metadata file). # Must be 1 and greater. THRESHOLD_SCHEMA = SCHEMA.Integer(lo=1) # A string representing a role's name. ROLENAME_SCHEMA = SCHEMA.AnyString() # The minimum number of bits for an RSA key. Must be 2048 bits, or greater # (recommended by TUF). Crypto modules like 'pycrypto_keys.py' may set further # restrictions on keys (e.g., the number of bits must be a multiple of 256). # Recommended RSA key sizes: # http://www.emc.com/emc-plus/rsa-labs/historical/twirl-and-rsa-key-size.htm#table1 RSAKEYBITS_SCHEMA = SCHEMA.Integer(lo=2048) # The number of hashed bins, or the number of delegated roles. See # delegate_hashed_bins() in 'repository_tool.py' for an example. Note: # Tools may require further restrictions on the number of bins, such # as requiring them to be a power of 2. NUMBINS_SCHEMA = SCHEMA.Integer(lo=1) # A PyCrypto signature. PYCRYPTOSIGNATURE_SCHEMA = SCHEMA.AnyBytes() # A pyca-cryptography signature. PYCACRYPTOSIGNATURE_SCHEMA = SCHEMA.AnyBytes() # An RSA key in PEM format. PEMRSA_SCHEMA = SCHEMA.AnyString() # A string representing a password. PASSWORD_SCHEMA = SCHEMA.AnyString() # A list of passwords. PASSWORDS_SCHEMA = SCHEMA.ListOf(PASSWORD_SCHEMA) # The actual values of a key, as opposed to meta data such as a key type and # key identifier ('rsa', 233df889cb). For RSA keys, the key value is a pair of # public and private keys in PEM Format stored as strings. KEYVAL_SCHEMA = SCHEMA.Object( object_name = 'KEYVAL_SCHEMA', public = SCHEMA.AnyString(), private = SCHEMA.Optional(SCHEMA.AnyString())) # Supported TUF key types. KEYTYPE_SCHEMA = SCHEMA.OneOf( [SCHEMA.String('rsa'), SCHEMA.String('ed25519')]) # A generic TUF key. All TUF keys should be saved to metadata files in this # format. KEY_SCHEMA = SCHEMA.Object( object_name = 'KEY_SCHEMA', keytype = SCHEMA.AnyString(), keyval = KEYVAL_SCHEMA) # A TUF key object. This schema simplifies validation of keys that may be # one of the supported key types. # Supported key types: 'rsa', 'ed25519'. ANYKEY_SCHEMA = SCHEMA.Object( object_name = 'ANYKEY_SCHEMA', keytype = KEYTYPE_SCHEMA, keyid = KEYID_SCHEMA, keyval = KEYVAL_SCHEMA) # A list of TUF key objects. ANYKEYLIST_SCHEMA = SCHEMA.ListOf(ANYKEY_SCHEMA) # An RSA TUF key. RSAKEY_SCHEMA = SCHEMA.Object( object_name = 'RSAKEY_SCHEMA', keytype = SCHEMA.String('rsa'), keyid = KEYID_SCHEMA, keyval = KEYVAL_SCHEMA) # An ED25519 raw public key, which must be 32 bytes. ED25519PUBLIC_SCHEMA = SCHEMA.LengthBytes(32) # An ED25519 raw seed key, which must be 32 bytes. ED25519SEED_SCHEMA = SCHEMA.LengthBytes(32) # An ED25519 raw signature, which must be 64 bytes. ED25519SIGNATURE_SCHEMA = SCHEMA.LengthBytes(64) # Required installation libraries expected by the repository tools and other # cryptography modules. REQUIRED_LIBRARIES_SCHEMA = SCHEMA.ListOf(SCHEMA.OneOf( [SCHEMA.String('general'), SCHEMA.String('ed25519'), SCHEMA.String('rsa')])) # An ed25519 TUF key. ED25519KEY_SCHEMA = SCHEMA.Object( object_name = 'ED25519KEY_SCHEMA', keytype = SCHEMA.String('ed25519'), keyid = KEYID_SCHEMA, keyval = KEYVAL_SCHEMA) # Information about target files, like file length and file hash(es). This # schema allows the storage of multiple hashes for the same file (e.g., sha256 # and sha512 may be computed for the same file and stored). FILEINFO_SCHEMA = SCHEMA.Object( object_name = 'FILEINFO_SCHEMA', length = LENGTH_SCHEMA, hashes = HASHDICT_SCHEMA, custom = SCHEMA.Optional(SCHEMA.Object())) # Version information specified in "snapshot.json" for each role available on # the TUF repository. The 'FILEINFO_SCHEMA' object was previously listed in # the snapshot role, but was switched to this object format to reduce the # amount of metadata that needs to be downloaded. Listing version numbers in # "snapshot.json" also prevents rollback attacks for roles that clients have # not downloaded. VERSIONINFO_SCHEMA = SCHEMA.Object( object_name = 'VERSIONINFO_SCHEMA', version = METADATAVERSION_SCHEMA) # A dict holding the version information for a particular metadata role. The # dict keys hold the relative file paths, and the dict values the corresponding # version numbers. VERSIONDICT_SCHEMA = SCHEMA.DictOf( key_schema = RELPATH_SCHEMA, value_schema = VERSIONINFO_SCHEMA) # A dict holding the information for a particular target / file. The dict keys # hold the relative file paths, and the dict values the corresponding file # information. FILEDICT_SCHEMA = SCHEMA.DictOf( key_schema = RELPATH_SCHEMA, value_schema = FILEINFO_SCHEMA) # A dict holding a target file. TARGETFILE_SCHEMA = SCHEMA.Object( object_name = 'TARGETFILE_SCHEMA', filepath = RELPATH_SCHEMA, fileinfo = FILEINFO_SCHEMA) # A list of TARGETFILE_SCHEMA. TARGETFILES_SCHEMA = SCHEMA.ListOf(TARGETFILE_SCHEMA) # A single signature of an object. Indicates the signature, the ID of the # signing key, and the signing method. # I debated making the signature schema not contain the key ID and instead have # the signatures of a file be a dictionary with the key being the keyid and the # value being the signature schema without the keyid. That would be under # the argument that a key should only be able to sign a file once. However, # one can imagine that maybe a key wants to sign multiple times with different # signature methods. SIGNATURE_SCHEMA = SCHEMA.Object( object_name = 'SIGNATURE_SCHEMA', keyid = KEYID_SCHEMA, method = SIG_METHOD_SCHEMA, sig = HEX_SCHEMA) # List of SIGNATURE_SCHEMA. SIGNATURES_SCHEMA = SCHEMA.ListOf(SIGNATURE_SCHEMA) # A schema holding the result of checking the signatures of a particular # 'SIGNABLE_SCHEMA' role. # For example, how many of the signatures for the 'Target' role are # valid? This SCHEMA holds this information. See 'sig.py' for # more information. SIGNATURESTATUS_SCHEMA = SCHEMA.Object( object_name = 'SIGNATURESTATUS_SCHEMA', threshold = SCHEMA.Integer(), good_sigs = KEYIDS_SCHEMA, bad_sigs = KEYIDS_SCHEMA, unknown_sigs = KEYIDS_SCHEMA, untrusted_sigs = KEYIDS_SCHEMA, unknown_method_sigs = KEYIDS_SCHEMA) # A signable object. Holds the signing role and its associated signatures. SIGNABLE_SCHEMA = SCHEMA.Object( object_name = 'SIGNABLE_SCHEMA', signed = SCHEMA.Any(), signatures = SCHEMA.ListOf(SIGNATURE_SCHEMA)) # A dict where the dict keys hold a keyid and the dict values a key object. KEYDICT_SCHEMA = SCHEMA.DictOf( key_schema = KEYID_SCHEMA, value_schema = KEY_SCHEMA) # The format used by the key database to store keys. The dict keys hold a key # identifier and the dict values any object. The key database should store # key objects in the values (e.g., 'RSAKEY_SCHEMA', 'DSAKEY_SCHEMA'). KEYDB_SCHEMA = SCHEMA.DictOf( key_schema = KEYID_SCHEMA, value_schema = SCHEMA.Any()) # The format of the resulting "scp config dict" after extraction from the # push configuration file (i.e., push.cfg). In the case of a config file # utilizing the scp transfer module, it must contain the 'general' and 'scp' # sections, where 'general' must contain a 'transfer_module' and # 'metadata_path' entry, and 'scp' the 'host', 'user', 'identity_file', and # 'remote_directory' entries. See 'tuf/pushtools/pushtoolslib.py' and # 'tuf/pushtools/push.py'. SCPCONFIG_SCHEMA = SCHEMA.Object( object_name = 'SCPCONFIG_SCHEMA', general = SCHEMA.Object( object_name = '[general]', transfer_module = SCHEMA.String('scp'), metadata_path = PATH_SCHEMA, targets_directory = PATH_SCHEMA), scp=SCHEMA.Object( object_name = '[scp]', host = URL_SCHEMA, user = NAME_SCHEMA, identity_file = PATH_SCHEMA, remote_directory = PATH_SCHEMA)) # The format of the resulting "receive config dict" after extraction from the # receive configuration file (i.e., receive.cfg). The receive config file # must contain a 'general' section, and this section the 'pushroots', # 'repository_directory', 'metadata_directory', 'targets_directory', and # 'backup_directory' entries. # see 'tuf/pushtools/pushtoolslib.py' and 'tuf/pushtools/receive/receive.py' RECEIVECONFIG_SCHEMA = SCHEMA.Object( object_name = 'RECEIVECONFIG_SCHEMA', general=SCHEMA.Object( object_name = '[general]', pushroots = SCHEMA.ListOf(PATH_SCHEMA), repository_directory = PATH_SCHEMA, metadata_directory = PATH_SCHEMA, targets_directory = PATH_SCHEMA, backup_directory = PATH_SCHEMA)) # A path hash prefix is a hexadecimal string. PATH_HASH_PREFIX_SCHEMA = HEX_SCHEMA # A list of path hash prefixes. PATH_HASH_PREFIXES_SCHEMA = SCHEMA.ListOf(PATH_HASH_PREFIX_SCHEMA) # Role object in {'keyids': [keydids..], 'name': 'ABC', 'threshold': 1, # 'paths':[filepaths..]} format. ROLE_SCHEMA = SCHEMA.Object( object_name = 'ROLE_SCHEMA', name = SCHEMA.Optional(ROLENAME_SCHEMA), keyids = KEYIDS_SCHEMA, threshold = THRESHOLD_SCHEMA, backtrack = SCHEMA.Optional(BOOLEAN_SCHEMA), paths = SCHEMA.Optional(RELPATHS_SCHEMA), path_hash_prefixes = SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA)) # A dict of roles where the dict keys are role names and the dict values holding # the role data/information. ROLEDICT_SCHEMA = SCHEMA.DictOf( key_schema = ROLENAME_SCHEMA, value_schema = ROLE_SCHEMA) # Like ROLEDICT_SCHEMA, except that ROLE_SCHEMA instances are stored in order. ROLELIST_SCHEMA = SCHEMA.ListOf(ROLE_SCHEMA) # The delegated roles of a Targets role (a parent). DELEGATIONS_SCHEMA = SCHEMA.Object( keys = KEYDICT_SCHEMA, roles = ROLELIST_SCHEMA) # Supported compression extension (e.g., 'gz'). COMPRESSION_SCHEMA = SCHEMA.OneOf([SCHEMA.String(''), SCHEMA.String('gz')]) # List of supported compression extensions. COMPRESSIONS_SCHEMA = SCHEMA.ListOf( SCHEMA.OneOf([SCHEMA.String(''), SCHEMA.String('gz')])) # The fileinfo format of targets specified in the repository and # developer tools. The second element of this list holds custom data about the # target, such as file permissions, author(s), last modified, etc. CUSTOM_SCHEMA = SCHEMA.Object() PATH_FILEINFO_SCHEMA = SCHEMA.DictOf( key_schema = RELPATH_SCHEMA, value_schema = CUSTOM_SCHEMA) # tuf.roledb ROLEDB_SCHEMA = SCHEMA.Object( object_name = 'ROLEDB_SCHEMA', keyids = KEYIDS_SCHEMA, signing_keyids = SCHEMA.Optional(KEYIDS_SCHEMA), threshold = THRESHOLD_SCHEMA, version = SCHEMA.Optional(METADATAVERSION_SCHEMA), expires = SCHEMA.Optional(ISO8601_DATETIME_SCHEMA), signatures = SCHEMA.Optional(SIGNATURES_SCHEMA), compressions = SCHEMA.Optional(COMPRESSIONS_SCHEMA), paths = SCHEMA.Optional(SCHEMA.OneOf([RELPATHS_SCHEMA, PATH_FILEINFO_SCHEMA])), path_hash_prefixes = SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA), delegations = SCHEMA.Optional(DELEGATIONS_SCHEMA), partial_loaded = SCHEMA.Optional(BOOLEAN_SCHEMA)) # Root role: indicates root keys and top-level roles. ROOT_SCHEMA = SCHEMA.Object( object_name = 'ROOT_SCHEMA', _type = SCHEMA.String('Root'), version = METADATAVERSION_SCHEMA, consistent_snapshot = BOOLEAN_SCHEMA, compression_algorithms = COMPRESSIONS_SCHEMA, expires = ISO8601_DATETIME_SCHEMA, keys = KEYDICT_SCHEMA, roles = ROLEDICT_SCHEMA) # Targets role: Indicates targets and delegates target paths to other roles. TARGETS_SCHEMA = SCHEMA.Object( object_name = 'TARGETS_SCHEMA', _type = SCHEMA.String('Targets'), version = METADATAVERSION_SCHEMA, expires = ISO8601_DATETIME_SCHEMA, targets = FILEDICT_SCHEMA, delegations = SCHEMA.Optional(DELEGATIONS_SCHEMA)) # Snapshot role: indicates the latest versions of all metadata (except timestamp). SNAPSHOT_SCHEMA = SCHEMA.Object( object_name = 'SNAPSHOT_SCHEMA', _type = SCHEMA.String('Snapshot'), version = METADATAVERSION_SCHEMA, expires = ISO8601_DATETIME_SCHEMA, meta = VERSIONDICT_SCHEMA) # Timestamp role: indicates the latest version of the snapshot file. TIMESTAMP_SCHEMA = SCHEMA.Object( object_name = 'TIMESTAMP_SCHEMA', _type = SCHEMA.String('Timestamp'), version = METADATAVERSION_SCHEMA, expires = ISO8601_DATETIME_SCHEMA, meta = VERSIONDICT_SCHEMA) # project.cfg file: stores information about the project in a json dictionary PROJECT_CFG_SCHEMA = SCHEMA.Object( object_name = 'PROJECT_CFG_SCHEMA', project_name = SCHEMA.AnyString(), layout_type = SCHEMA.OneOf([SCHEMA.String('repo-like'), SCHEMA.String('flat')]), targets_location = PATH_SCHEMA, metadata_location = PATH_SCHEMA, prefix = PATH_SCHEMA, public_keys = KEYDICT_SCHEMA, threshold = SCHEMA.Integer(lo = 0, hi = 2) ) # A schema containing information a repository mirror may require, # such as a url, the path of the directory metadata files, etc. MIRROR_SCHEMA = SCHEMA.Object( object_name = 'MIRROR_SCHEMA', url_prefix = URL_SCHEMA, metadata_path = RELPATH_SCHEMA, targets_path = RELPATH_SCHEMA, confined_target_dirs = RELPATHS_SCHEMA, custom = SCHEMA.Optional(SCHEMA.Object())) # A dictionary of mirrors where the dict keys hold the mirror's name and # and the dict values the mirror's data (i.e., 'MIRROR_SCHEMA'). # The repository class of 'updater.py' accepts dictionaries # of this type provided by the TUF client. MIRRORDICT_SCHEMA = SCHEMA.DictOf( key_schema = SCHEMA.AnyString(), value_schema = MIRROR_SCHEMA) # A Mirrorlist: indicates all the live mirrors, and what documents they # serve. MIRRORLIST_SCHEMA = SCHEMA.Object( object_name = 'MIRRORLIST_SCHEMA', _type = SCHEMA.String('Mirrors'), version = METADATAVERSION_SCHEMA, expires = ISO8601_DATETIME_SCHEMA, mirrors = SCHEMA.ListOf(MIRROR_SCHEMA)) # Any of the role schemas (e.g., TIMESTAMP_SCHEMA, SNAPSHOT_SCHEMA, etc.) ANYROLE_SCHEMA = SCHEMA.OneOf([ROOT_SCHEMA, TARGETS_SCHEMA, SNAPSHOT_SCHEMA, TIMESTAMP_SCHEMA, MIRROR_SCHEMA]) class MetaFile(object): """ Base class for all metadata file classes. Classes representing metadata files such as RootFile and SnapshotFile all inherit from MetaFile. The __eq__, __ne__, perform 'equal' and 'not equal' comparisons between Metadata File objects. """ info = None def __eq__(self, other): return isinstance(other, MetaFile) and self.info == other.info __hash__ = None def __ne__(self, other): return not self.__eq__(other) def __getattr__(self, name): """ Allow all metafile objects to have their interesting attributes referred to directly without the info dict. The info dict is just to be able to do the __eq__ comparison generically. """ if name in self.info: return self.info[name] else: raise AttributeError(name) class TimestampFile(MetaFile): def __init__(self, version, expires, versiondict): self.info = {} self.info['version'] = version self.info['expires'] = expires self.info['meta'] = versiondict @staticmethod def from_metadata(object): # Is 'object' a Timestamp metadata file? # Raise tuf.FormatError if not. TIMESTAMP_SCHEMA.check_match(object) version = object['version'] expires = object['expires'] versiondict = object['meta'] return TimestampFile(version, expires, versiondict) @staticmethod def make_metadata(version, expiration_date, versiondict): result = {'_type' : 'Timestamp'} result['version'] = version result['expires'] = expiration_date result['meta'] = versiondict # Is 'result' a Timestamp metadata file? # Raise 'tuf.FormatError' if not. TIMESTAMP_SCHEMA.check_match(result) return result class RootFile(MetaFile): def __init__(self, version, expires, keys, roles, consistent_snapshot, compression_algorithms): self.info = {} self.info['version'] = version self.info['expires'] = expires self.info['keys'] = keys self.info['roles'] = roles self.info['consistent_snapshot'] = consistent_snapshot self.info['compression_algorithms'] = compression_algorithms @staticmethod def from_metadata(object): # Is 'object' a Root metadata file? # Raise 'tuf.FormatError' if not. ROOT_SCHEMA.check_match(object) version = object['version'] expires = object['expires'] keys = object['keys'] roles = object['roles'] consistent_snapshot = object['consistent_snapshot'] compression_algorithms = object['compression_algorithms'] return RootFile(version, expires, keys, roles, consistent_snapshot, compression_algorithms) @staticmethod def make_metadata(version, expiration_date, keydict, roledict, consistent_snapshot, compression_algorithms): result = {'_type' : 'Root'} result['version'] = version result['expires'] = expiration_date result['keys'] = keydict result['roles'] = roledict result['consistent_snapshot'] = consistent_snapshot result['compression_algorithms'] = compression_algorithms # Is 'result' a Root metadata file? # Raise 'tuf.FormatError' if not. ROOT_SCHEMA.check_match(result) return result class SnapshotFile(MetaFile): def __init__(self, version, expires, versiondict): self.info = {} self.info['version'] = version self.info['expires'] = expires self.info['meta'] = versiondict @staticmethod def from_metadata(object): # Is 'object' a Snapshot metadata file? # Raise 'tuf.FormatError' if not. SNAPSHOT_SCHEMA.check_match(object) version = object['version'] expires = object['expires'] versiondict = object['meta'] return SnapshotFile(version, expires, versiondict) @staticmethod def make_metadata(version, expiration_date, versiondict): result = {'_type' : 'Snapshot'} result['version'] = version result['expires'] = expiration_date result['meta'] = versiondict # Is 'result' a Snapshot metadata file? # Raise 'tuf.FormatError' if not. SNAPSHOT_SCHEMA.check_match(result) return result class TargetsFile(MetaFile): def __init__(self, version, expires, filedict=None, delegations=None): if filedict is None: filedict = {} if delegations is None: delegations = {} self.info = {} self.info['version'] = version self.info['expires'] = expires self.info['targets'] = filedict self.info['delegations'] = delegations @staticmethod def from_metadata(object): # Is 'object' a Targets metadata file? # Raise tuf.FormatError if not. TARGETS_SCHEMA.check_match(object) version = object['version'] expires = object['expires'] filedict = object.get('targets') delegations = object.get('delegations') return TargetsFile(version, expires, filedict, delegations) @staticmethod def make_metadata(version, expiration_date, filedict=None, delegations=None): if filedict is None and delegations is None: raise tuf.Error('We don\'t allow completely empty targets metadata.') result = {'_type' : 'Targets'} result['version'] = version result['expires'] = expiration_date result['targets'] = {} if filedict is not None: result['targets'] = filedict if delegations is not None: result['delegations'] = delegations # Is 'result' a Targets metadata file? # Raise 'tuf.FormatError' if not. TARGETS_SCHEMA.check_match(result) return result class MirrorsFile(MetaFile): def __init__(self, version, expires): self.info = {} self.info['version'] = version self.info['expires'] = expires @staticmethod def from_metadata(object): raise NotImplementedError @staticmethod def make_metadata(): raise NotImplementedError # A dict holding the recognized schemas for the top-level roles. SCHEMAS_BY_TYPE = { 'Root' : ROOT_SCHEMA, 'Targets' : TARGETS_SCHEMA, 'Snapshot' : SNAPSHOT_SCHEMA, 'Timestamp' : TIMESTAMP_SCHEMA, 'Mirrors' : MIRRORLIST_SCHEMA} # A dict holding the recognized class names for the top-level roles. # That is, the role classes listed in this module (e.g., class TargetsFile()). ROLE_CLASSES_BY_TYPE = { 'Root' : RootFile, 'Targets' : TargetsFile, 'Snapshot' : SnapshotFile, 'Timestamp' : TimestampFile, 'Mirrors' : MirrorsFile} def datetime_to_unix_timestamp(datetime_object): """ Convert 'datetime_object' (in datetime.datetime()) format) to a Unix/POSIX timestamp. For example, Python's time.time() returns a Unix timestamp, and includes the number of microseconds. 'datetime_object' is converted to UTC. >>> datetime_object = datetime.datetime(1985, 10, 26, 1, 22) >>> timestamp = datetime_to_unix_timestamp(datetime_object) >>> timestamp 499137720 datetime_object: The datetime.datetime() object to convert to a Unix timestamp. tuf.FormatError, if 'datetime_object' is not a datetime.datetime() object. None. A unix (posix) timestamp (e.g., 499137660). """ # Is 'datetime_object' a datetime.datetime() object? # Raise 'tuf.FormatError' if not. if not isinstance(datetime_object, datetime.datetime): message = repr(datetime_object) + ' is not a datetime.datetime() object.' raise tuf.FormatError(message) unix_timestamp = calendar.timegm(datetime_object.timetuple()) return unix_timestamp def unix_timestamp_to_datetime(unix_timestamp): """ Convert 'unix_timestamp' (i.e., POSIX time, in UNIX_TIMESTAMP_SCHEMA format) to a datetime.datetime() object. 'unix_timestamp' is the number of seconds since the epoch (January 1, 1970.) >>> datetime_object = unix_timestamp_to_datetime(1445455680) >>> datetime_object datetime.datetime(2015, 10, 21, 19, 28) unix_timestamp: An integer representing the time (e.g., 1445455680). Conformant to 'tuf.formats.UNIX_TIMESTAMP_SCHEMA'. tuf.FormatError, if 'unix_timestamp' is improperly formatted. None. A datetime.datetime() object corresponding to 'unix_timestamp'. """ # Is 'unix_timestamp' properly formatted? # Raise 'tuf.FormatError' if there is a mismatch. UNIX_TIMESTAMP_SCHEMA.check_match(unix_timestamp) # Convert 'unix_timestamp' to a 'time.struct_time', in UTC. The Daylight # Savings Time (DST) flag is set to zero. datetime.fromtimestamp() is not # used because it returns a local datetime. struct_time = time.gmtime(unix_timestamp) # Extract the (year, month, day, hour, minutes, seconds) arguments for the # datetime object to be returned. datetime_object = datetime.datetime(*struct_time[:6]) return datetime_object def format_base64(data): """ Return the base64 encoding of 'data' with whitespace and '=' signs omitted. data: Binary or buffer of data to convert. tuf.FormatError, if the base64 encoding fails or the argument is invalid. None. A base64-encoded string. """ try: return binascii.b2a_base64(data).decode('utf-8').rstrip('=\n ') except (TypeError, binascii.Error) as e: raise tuf.FormatError('Invalid base64 encoding: ' + str(e)) def parse_base64(base64_string): """ Parse a base64 encoding with whitespace and '=' signs omitted. base64_string: A string holding a base64 value. tuf.FormatError, if 'base64_string' cannot be parsed due to an invalid base64 encoding. None. A byte string representing the parsed based64 encoding of 'base64_string'. """ if not isinstance(base64_string, six.string_types): message = 'Invalid argument: '+repr(base64_string) raise tuf.FormatError(message) extra = len(base64_string) % 4 if extra: padding = '=' * (4 - extra) base64_string = base64_string + padding try: return binascii.a2b_base64(base64_string.encode('utf-8')) except (TypeError, binascii.Error) as e: raise tuf.FormatError('Invalid base64 encoding: ' + str(e)) def make_signable(object): """ Return the role metadata 'object' in 'SIGNABLE_SCHEMA' format. 'object' is added to the 'signed' key, and an empty list initialized to the 'signatures' key. The caller adds signatures to this second field. Note: check_signable_object_format() should be called after make_signable() and signatures added to ensure the final signable object has a valid format (i.e., a signable containing a supported role metadata). object: A role schema dict (e.g., 'ROOT_SCHEMA', 'SNAPSHOT_SCHEMA'). None. None. A dict in 'SIGNABLE_SCHEMA' format. """ if not isinstance(object, dict) or 'signed' not in object: return { 'signed' : object, 'signatures' : [] } else: return object def make_fileinfo(length, hashes, custom=None): """ Create a dictionary conformant to 'FILEINFO_SCHEMA'. This dict describes both metadata and target files. length: An integer representing the size of the file. hashes: A dict of hashes in 'HASHDICT_SCHEMA' format, which has the form: {'sha256': 123df8a9b12, 'sha512': 324324dfc121, ...} custom: An optional object providing additional information about the file. tuf.FormatError, if the 'FILEINFO_SCHEMA' to be returned does not have the correct format. If any of the arguments are incorrectly formatted, the dict returned will be checked for formatting errors, and if found, will raise a 'tuf.FormatError' exception. A dictionary conformant to 'FILEINFO_SCHEMA', representing the file information of a metadata or target file. """ fileinfo = {'length' : length, 'hashes' : hashes} if custom is not None: fileinfo['custom'] = custom # Raise 'tuf.FormatError' if the check fails. FILEINFO_SCHEMA.check_match(fileinfo) return fileinfo def make_versioninfo(version_number): """ Create a dictionary conformant to 'VERSIONINFO_SCHEMA'. This dict describes both metadata and target files. version_number: An integer representing the version of a particular metadata role. The dictionary returned by this function is expected to be included in Snapshot metadata. tuf.FormatError, if the dict to be returned does not have the correct format (i.e., VERSIONINFO_SCHEMA). None. A dictionary conformant to 'VERSIONINFO_SCHEMA', containing the version information of a metadata role. """ versioninfo = {'version' : version_number} # Raise 'tuf.FormatError' if 'versioninfo' is improperly formatted. try: VERSIONINFO_SCHEMA.check_match(versioninfo) except: raise else: return versioninfo def make_role_metadata(keyids, threshold, name=None, paths=None, path_hash_prefixes=None): """ Create a dictionary conforming to 'tuf.formats.ROLE_SCHEMA', representing the role with 'keyids', 'threshold', and 'paths' as field values. 'paths' is optional (i.e., used only by the 'Target' role). keyids: a list of key ids. threshold: An integer denoting the number of required keys for the signing role. name: A string that is the name of this role. paths: The 'Target' role stores the paths of target files in its metadata file. 'paths' is a list of file paths. path_hash_prefixes: The 'Target' role stores the paths of target files in its metadata file. 'path_hash_prefixes' is a succint way to describe a set of paths to target files. tuf.FormatError, if the returned role meta is formatted incorrectly. If any of the arguments do not have a proper format, a tuf.formats exception is raised when the 'ROLE_SCHEMA' dict is created. A properly formatted role meta dict, conforming to 'ROLE_SCHEMA'. """ role_meta = {} role_meta['keyids'] = keyids role_meta['threshold'] = threshold if name is not None: role_meta['name'] = name # According to the specification, the 'paths' and 'path_hash_prefixes' must # be mutually exclusive. However, at the time of writing we do not always # ensure that this is the case with the schema checks (see #83). Therefore, # we must do it for ourselves. if paths is not None and path_hash_prefixes is not None: raise \ tuf.FormatError('Both "paths" and "path_hash_prefixes" are specified.') if path_hash_prefixes is not None: role_meta['path_hash_prefixes'] = path_hash_prefixes elif paths is not None: role_meta['paths'] = paths # Does 'role_meta' have the correct type? # This check ensures 'role_meta' conforms to # tuf.formats.ROLE_SCHEMA. ROLE_SCHEMA.check_match(role_meta) return role_meta def get_role_class(expected_rolename): """ Return the role class corresponding to 'expected_rolename'. The role name returned by expected_meta_rolename() should be the name passed as an argument to this function. If 'expected_rolename' is 'Root', the class RootFile is returned. expected_rolename: The role name used to determine which role class to return. tuf.FormatError, if 'expected_rolename' is not a supported role. None. The class corresponding to 'expected_rolename'. E.g., 'Snapshot' as an argument to this function causes SnapshotFile' to be returned. """ # Does 'expected_rolename' have the correct type? # This check ensures 'expected_rolename' conforms to # 'tuf.formats.NAME_SCHEMA'. # Raise 'tuf.FormatError' if there is a mismatch. NAME_SCHEMA.check_match(expected_rolename) try: role_class = ROLE_CLASSES_BY_TYPE[expected_rolename] except KeyError: raise tuf.FormatError(repr(expected_rolename) + ' not supported.') else: return role_class def expected_meta_rolename(meta_rolename): """ Ensure 'meta_rolename' is properly formatted. 'targets' is returned as 'Targets'. 'targets role1' is returned as 'Targets Role1'. The words in the string (i.e., separated by whitespace) are capitalized. meta_rolename: A string representing the rolename. E.g., 'root', 'targets'. tuf.FormatError, if 'meta_rolename' is improperly formatted. None. A string (e.g., 'Root', 'Targets'). """ # Does 'meta_rolename' have the correct type? # This check ensures 'meta_rolename' conforms to # 'tuf.formats.NAME_SCHEMA'. # Raise 'tuf.FormatError' if there is a mismatch. NAME_SCHEMA.check_match(meta_rolename) return string.capwords(meta_rolename) def check_signable_object_format(object): """ Ensure 'object' is properly formatted, conformant to 'tuf.formats.SIGNABLE_SCHEMA'. Return the signing role on success. Note: The 'signed' field of a 'SIGNABLE_SCHEMA' is checked against tuf.schema.Any(). The 'signed' field, however, should actually hold one of the supported role schemas (e.g., 'ROOT_SCHEMA', 'TARGETS_SCHEMA'). The role schemas all differ in their format, so this function determines exactly which schema is listed in the 'signed' field. object: The object compare against 'SIGNABLE.SCHEMA'. tuf.FormatError, if 'object' does not have the correct format. None. A string representing the signing role (e.g., 'root', 'targets'). The role string is returned with characters all lower case. """ # Does 'object' have the correct type? # This check ensures 'object' conforms to # 'tuf.formats.SIGNABLE_SCHEMA'. SIGNABLE_SCHEMA.check_match(object) try: role_type = object['signed']['_type'] except (KeyError, TypeError): raise tuf.FormatError('Untyped object') try: schema = SCHEMAS_BY_TYPE[role_type] except KeyError: raise tuf.FormatError('Unrecognized type ' + repr(role_type)) # 'tuf.FormatError' raised if 'object' does not have a properly # formatted role schema. schema.check_match(object['signed']) return role_type.lower() def _canonical_string_encoder(string): """ Encode 'string' to canonical string format. string: The string to encode. None. None. A string with the canonical-encoded 'string' embedded. """ string = '"%s"' % re.sub(r'(["\\])', r'\\\1', string) return string def _encode_canonical(object, output_function): # Helper for encode_canonical. Older versions of json.encoder don't # even let us replace the separators. if isinstance(object, six.string_types): output_function(_canonical_string_encoder(object)) elif object is True: output_function("true") elif object is False: output_function("false") elif object is None: output_function("null") elif isinstance(object, six.integer_types): output_function(str(object)) elif isinstance(object, (tuple, list)): output_function("[") if len(object): for item in object[:-1]: _encode_canonical(item, output_function) output_function(",") _encode_canonical(object[-1], output_function) output_function("]") elif isinstance(object, dict): output_function("{") if len(object): items = sorted(six.iteritems(object)) for key, value in items[:-1]: output_function(_canonical_string_encoder(key)) output_function(":") _encode_canonical(value, output_function) output_function(",") key, value = items[-1] output_function(_canonical_string_encoder(key)) output_function(":") _encode_canonical(value, output_function) output_function("}") else: raise tuf.FormatError('I cannot encode '+repr(object)) def encode_canonical(object, output_function=None): """ Encode 'object' in canonical JSON form, as specified at http://wiki.laptop.org/go/Canonical_JSON . It's a restricted dialect of JSON in which keys are always lexically sorted, there is no whitespace, floats aren't allowed, and only quote and backslash get escaped. The result is encoded in UTF-8, and the resulting bits are passed to output_function (if provided), or joined into a string and returned. Note: This function should be called prior to computing the hash or signature of a JSON object in TUF. For example, generating a signature of a signing role object such as 'ROOT_SCHEMA' is required to ensure repeatable hashes are generated across different json module versions and platforms. Code elsewhere is free to dump JSON objects in any format they wish (e.g., utilizing indentation and single quotes around object keys). These objects are only required to be in "canonical JSON" format when their hashes or signatures are needed. >>> encode_canonical("") '""' >>> encode_canonical([1, 2, 3]) '[1,2,3]' >>> encode_canonical([]) '[]' >>> encode_canonical({"A": [99]}) '{"A":[99]}' >>> encode_canonical({"x" : 3, "y" : 2}) '{"x":3,"y":2}' object: The object to be encoded. output_function: The result will be passed as arguments to 'output_function' (e.g., output_function('result')). tuf.FormatError, if 'object' cannot be encoded or 'output_function' is not callable. The results are fed to 'output_function()' if 'output_function' is set. A string representing the 'object' encoded in canonical JSON form. """ result = None # If 'output_function' is unset, treat it as # appending to a list. if output_function is None: result = [] output_function = result.append try: _encode_canonical(object, output_function) except (TypeError, tuf.FormatError) as e: message = 'Could not encode ' + repr(object) + ': ' + str(e) raise tuf.FormatError(message) # Return the encoded 'object' as a string. # Note: Implies 'output_function' is None, # otherwise results are sent to 'output_function'. if result is not None: return ''.join(result) if __name__ == '__main__': # The interactive sessions of the documentation strings can # be tested by running formats.py as a standalone module. # python -B formats.py import doctest doctest.testmod()