From e86520a6677058d409cd39034694324bb53b2270 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Sun, 22 Nov 2020 12:01:44 +0200 Subject: [PATCH 1/2] Updater: Avoid reading whole target file in memory We don't want to read the whole file in memory as it can be huge. Use digest_fileobject() instead: This way Securesystemslib will read the file in chunks. Securesystemslib already takes care of seeking to beginning of file. Fixes #1215 Signed-off-by: Jussi Kukkonen --- tuf/client/updater.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tuf/client/updater.py b/tuf/client/updater.py index ccab7530..494ae6b9 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1196,12 +1196,8 @@ def _check_hashes(self, file_object, trusted_hashes): # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply # return. for algorithm, trusted_hash in six.iteritems(trusted_hashes): - digest_object = securesystemslib.hash.digest(algorithm) - # Ensure we read from the beginning of the file object - # TODO: should we store file position (before the loop) and reset after we - # seek about? - file_object.seek(0) - digest_object.update(file_object.read()) + digest_object = securesystemslib.hash.digest_fileobject(file_object, + algorithm) computed_hash = digest_object.hexdigest() # Raise an exception if any of the hashes are incorrect. From fcdae97b8a13e5528270ded811c8522fd7671218 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Sun, 22 Nov 2020 12:16:45 +0200 Subject: [PATCH 2/2] Updater: clean up _check_hashes() comments Remove duplicate/obvious comments, tighten other comments and a logline Signed-off-by: Jussi Kukkonen --- tuf/client/updater.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 494ae6b9..b79ed4ba 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1167,11 +1167,7 @@ def neither_403_nor_404(mirror_error): def _check_hashes(self, file_object, trusted_hashes): """ - Non-public method that verifies multiple secure hashes of the downloaded - file 'file_object'. If any of these fail it raises an exception. This is - to conform with the TUF spec, which support clients with different hashing - algorithms. The 'hash.py' module is used to compute the hashes of - 'file_object'. + Non-public method that verifies multiple secure hashes of 'file_object'. file_object: @@ -1193,21 +1189,18 @@ def _check_hashes(self, file_object, trusted_hashes): None. """ - # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply - # return. + # Verify each hash, raise an exception if any hash fails to verify for algorithm, trusted_hash in six.iteritems(trusted_hashes): digest_object = securesystemslib.hash.digest_fileobject(file_object, algorithm) computed_hash = digest_object.hexdigest() - # Raise an exception if any of the hashes are incorrect. if trusted_hash != computed_hash: raise securesystemslib.exceptions.BadHashError(trusted_hash, computed_hash) else: - logger.info('The file\'s ' + algorithm + ' hash is' - ' correct: ' + trusted_hash) + logger.info('Verified ' + algorithm + ' hash: ' + trusted_hash)