From b7dc3fccea9b110ab5b884bf1fa4f62272796946 Mon Sep 17 00:00:00 2001
From: "syrttgump@gmail.com" <tiantian@tiantian-ThinkPad-T400.(none)>
Date: Fri, 26 Jul 2013 15:46:43 -0400
Subject: [PATCH 01/19] Endless attack test fix

---
 tuf/client/updater.py                              | 1 +
 tuf/download.py                                    | 8 +++++++-
 tuf/tests/system_tests/test_endless_data_attack.py | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index ed697e70..dca94cff 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -1727,6 +1727,7 @@ def download_target(self, target, destination_directory):
                                            trusted_length)
         break
       except (tuf.DownloadError, tuf.FormatError), e:
+	raise
         logger.warn('Download failed from '+mirror_url+'.')
         target_file_object = None
         continue
diff --git a/tuf/download.py b/tuf/download.py
index 162c0b7e..b9f52261 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -394,7 +394,7 @@ def download_url_to_tempfileobj(url, required_hashes=None,
     if required_length is not None and file_length != required_length:
       message = 'Incorrect length for '+url+'. Expected '+str(required_length)+ \
                 ', got '+str(file_length)+' bytes.'
-      raise tuf.DownloadError(message)
+      logger.warn(message)
 
     # For readibility, we perform the download in a separate function, which
     # returns the total number of downloaded bytes; this number should be equal
@@ -402,6 +402,12 @@ def download_url_to_tempfileobj(url, required_hashes=None,
     total_downloaded = _download_fixed_amount_of_data(connection, temp_file,
                                                       file_length,
                                                       required_length)
+  
+    # Does 'total_downloaded' match 'required_length'?
+    if total_downloaded != required_length:
+      message = 'Total downloded length '+str(total_downloaded)+ \
+		' bytes doesn\'t match required length '+str(required_length)+' bytes.'
+      raise tuf.DownloadError(message)
  
     # We appear to have downloaded the correct amount.  Check the hashes.
     if required_length is not None and required_hashes is not None: 
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index 9366accf..f58d9ecf 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -123,7 +123,8 @@ def test_arbitrary_package_attack(TUF=False):
       # If tuf.DownloadError is raised, this means that TUF has prevented
       # the download of an unrecognized file.  Enable the logging to see,
       # what actually happened.
-      pass
+      #pass
+      raise
 
     else:
       # Check whether the attack succeeded by inspecting the content of the

From d24567814817b93ee968757b91927c174e6f83a0 Mon Sep 17 00:00:00 2001
From: "syrttgump@gmail.com" <tiantian@tiantian-ThinkPad-T400.(none)>
Date: Fri, 26 Jul 2013 17:00:40 -0400
Subject: [PATCH 02/19] Endless attack test fix

---
 tuf/download.py                                    | 2 +-
 tuf/tests/system_tests/test_endless_data_attack.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tuf/download.py b/tuf/download.py
index b9f52261..8741f79c 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -404,7 +404,7 @@ def download_url_to_tempfileobj(url, required_hashes=None,
                                                       required_length)
   
     # Does 'total_downloaded' match 'required_length'?
-    if total_downloaded != required_length:
+    if required_length is not None and total_downloaded != required_length:
       message = 'Total downloded length '+str(total_downloaded)+ \
 		' bytes doesn\'t match required length '+str(required_length)+' bytes.'
       raise tuf.DownloadError(message)
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index f58d9ecf..0e820e78 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -97,7 +97,7 @@ def test_arbitrary_package_attack(TUF=False):
     if TUF:
       # Update TUF metadata before attacker modifies anything.
       util_test_tools.tuf_refresh_repo(root_repo, keyids)
-
+      print "refresh finished"
       # Modify the url.  Remember that the interposition will intercept 
       # urls that have 'localhost:9999' hostname, which was specified in
       # the json interposition configuration file.  Look for 'hostname'

From a2a8ba0217d87f9add064bf31c09ffc0b5049ebd Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Tue, 30 Jul 2013 14:31:28 -0400
Subject: [PATCH 03/19] modified update.py

---
 tuf/client/updater.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index dca94cff..34a686fe 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -1719,6 +1719,12 @@ def download_target(self, target, destination_directory):
     trusted_hashes = target['fileinfo']['hashes']
 
     target_file_object = None
+    
+    # Store mirrors which doesn't work well
+    mirror_errors = {}
+    
+    logger.info('Trying to download: '+repr(target_filepath))
+
     # Iterate through the repositority mirrors until we successfully
     # download a target.
     for mirror_url in get_mirrors('target', target_filepath, self.mirrors):
@@ -1727,13 +1733,13 @@ def download_target(self, target, destination_directory):
                                            trusted_length)
         break
       except (tuf.DownloadError, tuf.FormatError), e:
-	raise
+	mirror_errors[mirror_url] = e
         logger.warn('Download failed from '+mirror_url+'.')
         target_file_object = None
         continue
     # We have gone through all the mirrors.  Did we get a target file object?
     if target_file_object == None: 
-      raise tuf.DownloadError('No download locations known.')
+      raise tuf.DownloadError('No download locations known: '+repr(mirror_errors))
    
     # We acquired a target file object from a mirror.  Move the file into
     # place (i.e., locally to 'destination_directory').

From c5be2cd69eecfa172f251c6fae21510f6c3d44a8 Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Wed, 31 Jul 2013 12:00:36 -0400
Subject: [PATCH 04/19] endless_attack_test_fix

---
 tuf/client/updater.py                              | 2 +-
 tuf/download.py                                    | 4 ++--
 tuf/tests/system_tests/test_endless_data_attack.py | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index 34a686fe..8cf0a2b7 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -1733,7 +1733,7 @@ def download_target(self, target, destination_directory):
                                            trusted_length)
         break
       except (tuf.DownloadError, tuf.FormatError), e:
-	mirror_errors[mirror_url] = e
+        mirror_errors[mirror_url] = e
         logger.warn('Download failed from '+mirror_url+'.')
         target_file_object = None
         continue
diff --git a/tuf/download.py b/tuf/download.py
index 8741f79c..561cbae0 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -404,8 +404,8 @@ def download_url_to_tempfileobj(url, required_hashes=None,
                                                       required_length)
   
     # Does 'total_downloaded' match 'required_length'?
-    if required_length is not None and total_downloaded != required_length:
-      message = 'Total downloded length '+str(total_downloaded)+ \
+    if total_downloaded != required_length:
+      message = 'Total downloaded length '+str(total_downloaded)+ \
 		' bytes doesn\'t match required length '+str(required_length)+' bytes.'
       raise tuf.DownloadError(message)
  
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index 0e820e78..18d999e4 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -97,13 +97,14 @@ def test_arbitrary_package_attack(TUF=False):
     if TUF:
       # Update TUF metadata before attacker modifies anything.
       util_test_tools.tuf_refresh_repo(root_repo, keyids)
-      print "refresh finished"
       # Modify the url.  Remember that the interposition will intercept 
       # urls that have 'localhost:9999' hostname, which was specified in
       # the json interposition configuration file.  Look for 'hostname'
       # in 'util_test_tools.py'. Further, the 'file_basename' is the target
       # path relative to 'targets_dir'. 
+      print url_to_repo
       url_to_repo = 'http://localhost:9999/'+file_basename
+      print url_to_repo
 
       # Attacker modifies the file at the targets repository.
       target = os.path.join(tuf_targets, file_basename)
@@ -119,12 +120,11 @@ def test_arbitrary_package_attack(TUF=False):
       # Client downloads (tries to download) the file.
       _download(url=url_to_repo, filename=downloaded_file, tuf=TUF)
 
-    except tuf.DownloadError:
+    except tuf.DownloadError,e:
       # If tuf.DownloadError is raised, this means that TUF has prevented
       # the download of an unrecognized file.  Enable the logging to see,
       # what actually happened.
-      #pass
-      raise
+      logger.warn('Download failed: '+repr(e))
 
     else:
       # Check whether the attack succeeded by inspecting the content of the

From 1f5a1e53eaf8e155215673b574f0fd8508c48abb Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Wed, 31 Jul 2013 12:02:37 -0400
Subject: [PATCH 05/19] endless_attack_test_fix

---
 tuf/tests/system_tests/test_endless_data_attack.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index 18d999e4..0530f128 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -102,9 +102,7 @@ def test_arbitrary_package_attack(TUF=False):
       # the json interposition configuration file.  Look for 'hostname'
       # in 'util_test_tools.py'. Further, the 'file_basename' is the target
       # path relative to 'targets_dir'. 
-      print url_to_repo
       url_to_repo = 'http://localhost:9999/'+file_basename
-      print url_to_repo
 
       # Attacker modifies the file at the targets repository.
       target = os.path.join(tuf_targets, file_basename)

From 0c83799c855416c214050047a94fa48103a0058f Mon Sep 17 00:00:00 2001
From: zhengyuyu <nuistzyy@gmail.com>
Date: Tue, 23 Jul 2013 03:18:11 -0400
Subject: [PATCH 06/19] Fix the endless data attack issue

modification of updater.py for download.py

modification of conf.py for fix

modification of test_download.py for download.py

modification of test_updater.py for download.py

add a new test of endless data attack to metadata timestamp.txt

more readable and fix the endless data attack issue.
---
 tuf/client/updater.py                         |  24 +-
 tuf/conf.py                                   |   7 +
 tuf/download.py                               | 210 +++++++++++++-----
 .../system_tests/test_endless_data_attack.py  |  29 ++-
 tuf/tests/test_download.py                    |  22 +-
 tuf/tests/test_updater.py                     |  17 +-
 6 files changed, 222 insertions(+), 87 deletions(-)
 mode change 100755 => 100644 tuf/tests/system_tests/test_endless_data_attack.py

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index ed697e70..57c001f1 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -564,12 +564,17 @@ def refresh(self):
       None.
     
     """
-        
+    
+    DEFAULT_TIMESTAMP_FILEINFO = {'length': tuf.conf.DEFAULT_TIMESTAMP_LENGTH, 'hashes':None}  
+
     # Update the top-level metadata.  The _update_metadata_if_changed() and
     # _update_metadata() calls below do NOT perform an update if there
     # is insufficient trusted signatures for the specified metadata.
     # Raise 'tuf.RepositoryError' if an update fails.
-    self._update_metadata('timestamp')
+
+    # Set a default length for timestamp metadata.
+    self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO, 
+                         HARD_LIMIT_REQUIRED_LENGTH=False)
 
     self._update_metadata_if_changed('release', referenced_metadata='timestamp')
 
@@ -587,7 +592,8 @@ def refresh(self):
 
 
 
-  def _update_metadata(self, metadata_role, fileinfo=None, compression=None):
+  def _update_metadata(self, metadata_role, fileinfo, compression=None,
+                       HARD_LIMIT_REQUIRED_LENGTH=True):
     """
     <Purpose>
       Download, verify, and 'install' the metadata belonging to 'metadata_role'.
@@ -606,6 +612,10 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None):
         Ex: {"hashes": {"sha256": "3a5a6ec1f353...dedce36e0"}, 
              "length": 1340}
 
+      HARD_LIMIT_REQUIRED_LENGTH:
+        A boolean value which indicates if the required_length passed into this 
+        function is a default length.
+
       compression:
         A string designating the compression type of 'metadata_role'.
         The 'release' metadata file may be optionally downloaded and stored in
@@ -662,8 +672,8 @@ def _update_metadata(self, metadata_role, fileinfo=None, compression=None):
     metadata_signable = None
     for mirror_url in get_mirrors('meta', metadata_filename.encode("utf-8"), self.mirrors):
       try:
-        metadata_file_object = download_file(mirror_url, file_hashes, 
-                                             file_length)
+        metadata_file_object = download_file(mirror_url, file_length, file_hashes,
+                                             HARD_LIMIT_REQUIRED_LENGTH)
       except tuf.DownloadError, e:
         logger.warn('Download failed from '+mirror_url+'.')
         continue
@@ -1723,8 +1733,8 @@ def download_target(self, target, destination_directory):
     # download a target.
     for mirror_url in get_mirrors('target', target_filepath, self.mirrors):
       try: 
-        target_file_object = download_file(mirror_url, trusted_hashes,
-                                           trusted_length)
+        target_file_object = download_file(mirror_url, trusted_length,
+                                           trusted_hashes)
         break
       except (tuf.DownloadError, tuf.FormatError), e:
         logger.warn('Download failed from '+mirror_url+'.')
diff --git a/tuf/conf.py b/tuf/conf.py
index 0b897afb..8b86484a 100755
--- a/tuf/conf.py
+++ b/tuf/conf.py
@@ -36,3 +36,10 @@
 # https://en.wikipedia.org/wiki/Certificate_authority
 # http://docs.python.org/2/library/ssl.html#certificates
 ssl_certificates = None
+
+# A default value used in the tuf.download.download_url_to_tempfileobj
+# function. When metadata does not tell what the length of target file
+# is(for example, the timestamp.txt), set it with this default value 
+# to avoid endless data attack. the default length is set based on the 
+# timestamp.txt with two signature.
+DEFAULT_TIMESTAMP_LENGTH = 1995
diff --git a/tuf/download.py b/tuf/download.py
index 162c0b7e..86a5398d 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -226,13 +226,12 @@ def _check_hashes(input_file, trusted_hashes):
 
 
 
-def _download_fixed_amount_of_data(connection, temp_file, file_length,
-                                   required_length):
+def _download_fixed_amount_of_data(connection, temp_file, required_length):
   """
   <Purpose>
     This is a helper function, where the download really happens. While-block
     reads data from connection a fixed chunk of data at a time, or less, until
-    'file_length' is reached.
+    'required_length' is reached.
   
   <Arguments>
     connection:
@@ -243,9 +242,6 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length,
       A temporary file where the contents at the URL specified by the
       'connection' object will be stored.
 
-    file_length:
-      The number of bytes that the server claims is the size of the file.
-
     required_length:
       The number of bytes that we must download for the file.  This is almost
       always specified by the TUF metadata for the data file in question
@@ -276,28 +272,24 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length,
       # We download a fixed chunk of data in every round. This is so that we
       # can defend against slow retrieval attacks. Furthermore, we do not wish
       # to download an extremely large file in one shot.
-      data = connection.read(min(BLOCK_SIZE, file_length-total_downloaded))
+      data = connection.read(min(BLOCK_SIZE, required_length-total_downloaded))
 
       # We might have no more data to read. Check number of bytes downloaded. 
       if not data:
         message = 'Downloaded '+str(total_downloaded)+'/'+ \
-          str(file_length)+' bytes.'
+          str(required_length)+' bytes.'
         logger.debug(message)
 
-        # Did we download the correct amount indicated by 'Content-Length'
-        # or user? Because file_length is always eaqual to required_length
-        # we just need check one of them. 
-        if total_downloaded != file_length:
-          message = 'Downloaded '+str(total_downloaded)+'.  Expected '+ \
-            str(file_length)+' for '+url
-          raise tuf.DownloadError(message)
-
         # Finally, we signal that the download is complete.
         break
 
       # Data successfully read from the connection.  Store it. 
       temp_file.write(data)
       total_downloaded = total_downloaded + len(data)
+
+      # This is to make sure we did not make a mistake!
+      #if total_downloaded > required_length:
+      #  logger.error('This should NEVER happen!')
   except:
     raise
   else:
@@ -309,8 +301,141 @@ def _download_fixed_amount_of_data(connection, temp_file, file_length,
 
 
 
-def download_url_to_tempfileobj(url, required_hashes=None,
-                                required_length=None):
+def _get_content_length(connection):
+  """
+  <Purpose>
+    Helper function thst get the file length from server, if any of these fail,
+    the length reported by server will be simply set to None.
+  
+  <Arguments>
+    connection:
+      The object that the _open_connection returns for communicating with the
+      server about the contents of a URL.
+  
+  <Side Effects>
+    Length from server will be written to 'reported_length'.
+ 
+  <Exceptions>
+    Runtime or network exceptions will be raised without question.
+ 
+  <Returns>
+    reported_length:
+      The total number of bytes reported by server.
+
+  """
+
+  try:
+  # info().get('Content-Length') gets the length of the url file.
+    reported_length = connection.info().get('Content-Length')
+    reported_length = int(reported_length, 10)
+  except:
+    reported_length = None
+   
+  return reported_length
+
+
+
+
+
+def _check_content_length(reported_length, required_length):
+  """
+  <Purpose>
+    Helper function that checks whether the length reported by server is equal 
+    to the length we expected. If the reported length is larger than we expected,
+    it will rise tuf.DownloadError exception to avoid the endless data attack.
+  
+  <Arguments>
+    reported_length:
+      The total number of bytes reported by server.
+
+    required_length:
+      The total number of bytes obtained from metadata or default value.
+
+  <Side Effects>
+    None.
+ 
+  <Exceptions>
+    tuf.DownloadError, if reported_length is more than required_length.
+ 
+  <Returns>
+    None.
+
+  """
+
+  # The length of downloading file obtained from server is larger than which 
+  # obtained from metadata or default length. So it could be a endless data 
+  # attack.
+  if reported_length is not None:
+    if reported_length != required_length:
+      if reported_length > required_length:
+        message = 'Incorrect length for '+url+'. The length reported by server is'+ \
+                  ' larger than expected. Expected '+str(required_length)+', got '+ \
+                  str(reported_length)+' bytes. It could be an endless data attack!'
+        raise tuf.DownloadError(message)
+      else:
+        message = 'The length reported by server is smaller than expected!'
+        logger.warn(message)
+    else:
+      logger.info('Everything is OK. Download will start!')
+  else:
+     logger.warn('Server is being crappy, DownloadError will start!')
+
+
+
+
+
+  
+def _check_downloaded_length(total_downloaded, required_length, HARD_LIMIT_REQUIRED_LENGTH):
+  """
+  <Purpose>
+    This is a helper function, which checks if the length of downloaded is equal to the length
+    we expected. 
+  
+  <Arguments>
+    reported_length:
+      The total number of bytes reported by server.
+
+    required_length:
+      The total number of bytes obtained from metadata or default value.
+      
+    HARD_LIMIT_REQUIRED_LENGTH:
+      A boolean value which indicates if the required_length passed into this 
+      function is a default length.
+  
+  <Side Effects>
+    None.
+ 
+  <Exceptions>
+    tuf.DownloadError, if HARD_LIMIT_REQUIRED_LENGTH is set to True and total_downloaded 
+    is not equal required_length.
+ 
+  <Returns>
+    None.
+
+  """
+
+  # If the required_length is not the default value, we will check whether
+  # the total_downloaded is equal to required_length.
+  if HARD_LIMIT_REQUIRED_LENGTH:  
+    if total_downloaded != required_length:
+      message = 'Downloaded '+str(total_downloaded)+'. Expected '+str(required_length)+\
+                ' for '+url+'. There are still '+str(required_length-total_downloaded)+\
+                'bytes expected to be downloaded!'
+      logger.error(message)          
+      raise tuf.DownloadError(message)
+    else:
+      logger.info('Successful download!')
+  
+  else:  
+    message = 'Required_length is default value, skip the safety check of total downloaded.'
+    logger.warn(message)
+
+
+
+
+def download_url_to_tempfileobj(url, required_length,
+                                required_hashes=None,
+                                HARD_LIMIT_REQUIRED_LENGTH=True):
   """
   <Purpose>
     Given the url, hashes and length of the desired file, this function 
@@ -333,6 +458,10 @@ def download_url_to_tempfileobj(url, required_hashes=None,
   
     required_length:
       An integer value representing the length of the file.
+
+    HARD_LIMIT_REQUIRED_LENGTH:
+      A boolean value which indicates if the required_length passed into this 
+      function is a default length.
   
   <Side Effects>
     'tuf.util.TempFile' object is created.
@@ -363,48 +492,23 @@ def download_url_to_tempfileobj(url, required_hashes=None,
   connection = _open_connection(url)
   temp_file = tuf.util.TempFile()
 
-
   try:
-    # info().get('Content-Length') gets the length of the url file.
-    file_length = connection.info().get('Content-Length')
-
-    # If the HTTP server did not specify a Content-Length...
-    if file_length is None:
-        # Do we know what is the required_length for this file?
-        if required_length is None:
-            # No, we do not know this. Raise this to the user!
-            message = 'Do not know anything about how much to download for "' + url + '"!'
-            raise tuf.DownloadError(message)
-        else:
-            # Okay, the HTTP server has not told us the Content-Length,
-            # but we know how much we are required to download.
-            file_length = required_length
-    else:
-        # Do we know what is the required_length for this file?
-        if required_length is None:
-            # No, we do not know this. Avoid falling for an arbitrary-length data attack (#26).
-            message = 'Do not know how much is required to download for "' + url + '"!'
-            logger.debug(message)
-            file_length = int(file_length, 10)
-        else:
-            # Okay, we do know this. Go ahead with checks.
-            file_length = int(file_length, 10)
-
-    # Does the url's 'file_length' match 'required_length'?
-    if required_length is not None and file_length != required_length:
-      message = 'Incorrect length for '+url+'. Expected '+str(required_length)+ \
-                ', got '+str(file_length)+' bytes.'
-      raise tuf.DownloadError(message)
+    reported_length = _get_content_length(connection)
+    # call the function to check whether the length reported by server is equal 
+    # to expected.
+    _check_content_length(reported_length, required_length)
 
     # For readibility, we perform the download in a separate function, which
     # returns the total number of downloaded bytes; this number should be equal
     # to required_length. 
-    total_downloaded = _download_fixed_amount_of_data(connection, temp_file,
-                                                      file_length,
+    total_downloaded = _download_fixed_amount_of_data(connection, temp_file, 
                                                       required_length)
- 
+    # call the function to check whether the length of total_downloaded is equal to 
+    # expected.
+    _check_downloaded_length(total_downloaded, required_length, HARD_LIMIT_REQUIRED_LENGTH)
+      
     # We appear to have downloaded the correct amount.  Check the hashes.
-    if required_length is not None and required_hashes is not None: 
+    if required_hashes is not None: 
       _check_hashes(temp_file, required_hashes)
 
   # Exception is a base class for all non-exiting exceptions.
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
old mode 100755
new mode 100644
index 9366accf..796863d9
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -63,7 +63,7 @@ def _download(url, filename, tuf=False):
 
 
 
-def test_arbitrary_package_attack(TUF=False):
+def test_arbitrary_package_attack(TUF=False, TIMESTAMP=False):
   """
   <Arguments>
     TUF:
@@ -91,7 +91,7 @@ def test_arbitrary_package_attack(TUF=False):
     file_basename = os.path.basename(filepath)
     url_to_repo = url+'reg_repo/'+file_basename
     downloaded_file = os.path.join(downloads, file_basename)
-    endless_data = 'A'*100
+    endless_data = 'A'*100000
 
 
     if TUF:
@@ -108,6 +108,11 @@ def test_arbitrary_package_attack(TUF=False):
       # Attacker modifies the file at the targets repository.
       target = os.path.join(tuf_targets, file_basename)
       util_test_tools.modify_file_at_repository(target, endless_data)
+      # Attacker modifies the timestamp.txt metadata.
+      if TIMESTAMP:
+        metadata = os.path.join(tuf_repo, 'metadata')
+        timestamp = os.path.join(metadata, 'timestamp.txt')
+        util_test_tools.modify_file_at_repository(timestamp, endless_data)  
 
     # Attacker modifies the file at the regular repository.
     util_test_tools.modify_file_at_repository(filepath, endless_data)
@@ -119,10 +124,10 @@ def test_arbitrary_package_attack(TUF=False):
       # Client downloads (tries to download) the file.
       _download(url=url_to_repo, filename=downloaded_file, tuf=TUF)
 
-    except tuf.DownloadError:
-      # If tuf.DownloadError is raised, this means that TUF has prevented
-      # the download of an unrecognized file.  Enable the logging to see,
-      # what actually happened.
+    except (tuf.DownloadError,tuf.RepositoryError), e:
+      # If tuf.DownloadError or tuf.RepositoryError is raised, this means
+      # that TUF has prevented the download of an unrecognized file. Enable
+      # the logging to see, what actually happened.
       pass
 
     else:
@@ -142,7 +147,7 @@ def test_arbitrary_package_attack(TUF=False):
 
 
 try:
-  test_arbitrary_package_attack(TUF=False)
+  test_arbitrary_package_attack(TUF=False, TIMESTAMP=False)
 
 except EndlessDataAttack, error:
   print('Without TUF: '+str(error))
@@ -150,7 +155,15 @@ def test_arbitrary_package_attack(TUF=False):
 
 
 try:
-  test_arbitrary_package_attack(TUF=True)
+  test_arbitrary_package_attack(TUF=True, TIMESTAMP=False)
 
 except EndlessDataAttack, error:
   print('With TUF: '+str(error))
+
+
+
+try:
+  test_arbitrary_package_attack(TUF=True, TIMESTAMP=True)
+
+except EndlessDataAttack, error:
+  print('With TUF: '+str(error))  
\ No newline at end of file
diff --git a/tuf/tests/test_download.py b/tuf/tests/test_download.py
index cb773e78..4e1273ae 100755
--- a/tuf/tests/test_download.py
+++ b/tuf/tests/test_download.py
@@ -24,6 +24,7 @@
 import tuf
 import tuf.download as download
 import tuf.tests.unittest_toolbox as unittest_toolbox
+import tuf.conf
 
 import os
 import sys
@@ -90,11 +91,7 @@ def tearDown(self):
 
   # Unit Test.
   def test_download_url_to_tempfileobj(self):
-    # Test: Normal cases without supplying hash and/or length arguments.
-    temp_fileobj = download.download_url_to_tempfileobj(self.url) 
-    self.assertEquals(self.target_data, temp_fileobj.read())
-    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
-    temp_fileobj.close_temp_file()
+    # Test: Normal cases without supplying hash arguments.
 
     temp_fileobj = download.download_url_to_tempfileobj(self.url,
                       required_length=self.target_data_length)
@@ -102,12 +99,6 @@ def test_download_url_to_tempfileobj(self):
     self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
     temp_fileobj.close_temp_file()
 
-    temp_fileobj = download.download_url_to_tempfileobj(self.url,
-                      required_hashes=self.target_hash) 
-    self.assertEquals(self.target_data, temp_fileobj.read())
-    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
-    temp_fileobj.close_temp_file()
-
     # Test: Normal case.
     temp_fileobj = download.download_url_to_tempfileobj(self.url,
                       required_hashes=self.target_hash, 
@@ -157,6 +148,15 @@ def test_download_url_to_tempfileobj(self):
                       required_hashes=self.target_hash, 
                       required_length=self.target_data_length)
 
+    # Test: Set the required_length to default value.
+
+    temp_fileobj = download.download_url_to_tempfileobj(self.url,
+                                                        required_length=tuf.conf.DEFAULT_TIMESTAMP_LENGTH,
+                                                        HARD_LIMIT_REQUIRED_LENGTH=False)
+    self.assertEquals(self.target_data, temp_fileobj.read())
+    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
+    temp_fileobj.close_temp_file();
+
     """
     # Measuring performance of 'auto_flush = False' vs. 'auto_flush = True'
     # in download_url_to_tempfileobj() during write. No change was observed.
diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py
index 75c7dc77..e2506aaf 100755
--- a/tuf/tests/test_updater.py
+++ b/tuf/tests/test_updater.py
@@ -42,6 +42,7 @@ class guarantees the order of unit tests.  So that, 'test_something_A'
 import tempfile
 import logging
 
+import tuf.conf
 import tuf.util
 import tuf.formats
 import tuf.repo.keystore as keystore
@@ -60,7 +61,7 @@ class guarantees the order of unit tests.  So that, 'test_something_A'
 roledb = tuf.roledb
 keydb = tuf.keydb
 
-
+DEFAULT_TIMESTAMP_FILEINFO = {'length': tuf.conf.DEFAULT_TIMESTAMP_LENGTH, 'hashes':None}  
 
 class TestUpdater_init_(unittest_toolbox.Modified_TestCase):
 
@@ -200,7 +201,7 @@ def _mock_download_url_to_tempfileobj(self, output):
 
     """
 
-    def _mock_download(url, hashes=None, length=None):
+    def _mock_download(url, length, hashes=None, HARD_LIMIT_REQUIRED_LENGTH=True):
       if isinstance(output, (str, unicode)):
         file_path = output
       elif isinstance(output, list):
@@ -324,7 +325,7 @@ def _get_list_of_target_paths(self, targets_directory, relative=True):
 
   def _update_top_level_roles(self):
     self._mock_download_url_to_tempfileobj(self.timestamp_filepath)
-    self.Repository._update_metadata('timestamp')
+    self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO)
 
     # Reference self.Repository._update_metadata_if_changed().
     update_if_changed = self.Repository._update_metadata_if_changed    
@@ -501,13 +502,13 @@ def test_3__update_metadata(self):
     # Test: Invalid file downloaded.
     #  Patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(self.release_filepath)
-    self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets')
+    self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets', None)
 
 
     # Test: normal case.
     #  Patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(self.targets_filepath)
-    _update_metadata('targets')
+    _update_metadata('targets', None)
     list_of_targets = self.Repository.metadata['current']['targets']['targets']
 
     #  Verify that the added target's path is listed in target's metadata.
@@ -524,7 +525,7 @@ def test_3__update_metadata(self):
 
     #  Re-patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(targets_filepath_compressed)
-    _update_metadata('targets', compression='gzip')
+    _update_metadata('targets', None, compression='gzip')
     list_of_targets = self.Repository.metadata['current']['targets']['targets']
 
     #  Verify that the added target's path is listed in target's metadata.
@@ -620,7 +621,7 @@ def test_3__update_metadata_if_changed(self):
     self._mock_download_url_to_tempfileobj(self.timestamp_filepath)
 
     #  Update timestamp metadata, it will indicate change in release metadata.
-    self.Repository._update_metadata('timestamp')
+    self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO)
 
     #  Save current release metadata before updating.  It will be used to
     #  verify the update.
@@ -664,7 +665,7 @@ def test_3__update_metadata_if_changed(self):
     self._mock_download_url_to_tempfileobj(self.timestamp_filepath)
 
     #  Update timestamp metadata, it will indicate change in release metadata.
-    self.Repository._update_metadata('timestamp')
+    self.Repository._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO)
 
     #  Save current release metadata before updating.  It will be used to
     #  verify the update.

From 3c18b58b71f518a498cc55b62f30cb9ee676bdc0 Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Tue, 6 Aug 2013 13:40:24 -0400
Subject: [PATCH 07/19] Adapt Zheng Yuyu's changes.

---
 tuf/client/updater.py      |  38 ++++--
 tuf/conf.py                |   9 +-
 tuf/download.py            | 271 ++++++++++++++++++++-----------------
 tuf/tests/test_download.py | 161 +++++++++++++---------
 tuf/tests/test_updater.py  |   5 +-
 5 files changed, 274 insertions(+), 210 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index 57c001f1..dac17a6c 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -564,17 +564,24 @@ def refresh(self):
       None.
     
     """
-    
-    DEFAULT_TIMESTAMP_FILEINFO = {'length': tuf.conf.DEFAULT_TIMESTAMP_LENGTH, 'hashes':None}  
+
+    # The timestamp role does not have signed metadata about it; otherwise we
+    # would need an infinite regress of metadata. Therefore, we use some
+    # default, sane metadata about it.
+    DEFAULT_TIMESTAMP_FILEINFO = {
+      'hashes':None,
+      'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH
+    }
 
     # Update the top-level metadata.  The _update_metadata_if_changed() and
     # _update_metadata() calls below do NOT perform an update if there
     # is insufficient trusted signatures for the specified metadata.
     # Raise 'tuf.RepositoryError' if an update fails.
 
-    # Set a default length for timestamp metadata.
+    # Use default but sane information for timestamp metadata, and do not
+    # require strict checks on its required length.
     self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO, 
-                         HARD_LIMIT_REQUIRED_LENGTH=False)
+                          STRICT_REQUIRED_LENGTH=False)
 
     self._update_metadata_if_changed('release', referenced_metadata='timestamp')
 
@@ -593,7 +600,7 @@ def refresh(self):
 
 
   def _update_metadata(self, metadata_role, fileinfo, compression=None,
-                       HARD_LIMIT_REQUIRED_LENGTH=True):
+                       STRICT_REQUIRED_LENGTH=True):
     """
     <Purpose>
       Download, verify, and 'install' the metadata belonging to 'metadata_role'.
@@ -612,9 +619,12 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
         Ex: {"hashes": {"sha256": "3a5a6ec1f353...dedce36e0"}, 
              "length": 1340}
 
-      HARD_LIMIT_REQUIRED_LENGTH:
-        A boolean value which indicates if the required_length passed into this 
-        function is a default length.
+      STRICT_REQUIRED_LENGTH:
+        A Boolean indicator used to signal whether we should perform strict
+        checking of the required length in 'fileinfo'. True by default.  True
+        by default. We explicitly set this to False when we know that we want
+        to turn this off for downloading the timestamp metadata, which has no
+        signed required_length.
 
       compression:
         A string designating the compression type of 'metadata_role'.
@@ -670,10 +680,13 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
     # 'tuf.formats.SIGNABLE_SCHEMA'.
     metadata_file_object = None
     metadata_signable = None
-    for mirror_url in get_mirrors('meta', metadata_filename.encode("utf-8"), self.mirrors):
+    for mirror_url in get_mirrors('meta',
+                                  metadata_filename.encode("utf-8"),
+                                  self.mirrors):
       try:
-        metadata_file_object = download_file(mirror_url, file_length, file_hashes,
-                                             HARD_LIMIT_REQUIRED_LENGTH)
+        metadata_file_object = \
+          download_file(mirror_url, file_length, file_hashes,
+                        STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH)
       except tuf.DownloadError, e:
         logger.warn('Download failed from '+mirror_url+'.')
         continue
@@ -691,7 +704,8 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
         # but this is a workaround for Unicode messages. We need a long-term
         # solution with #61.
         # http://bugs.python.org/issue2517
-        message = 'Unable to verify '+metadata_filename+':'+e.message.encode("utf-8")
+        message = 'Unable to verify '+metadata_filename+':'+\
+                  e.message.encode("utf-8")
         logger.exception(message)
         metadata_signable = None
         continue
diff --git a/tuf/conf.py b/tuf/conf.py
index 8b86484a..e601726d 100755
--- a/tuf/conf.py
+++ b/tuf/conf.py
@@ -37,9 +37,6 @@
 # http://docs.python.org/2/library/ssl.html#certificates
 ssl_certificates = None
 
-# A default value used in the tuf.download.download_url_to_tempfileobj
-# function. When metadata does not tell what the length of target file
-# is(for example, the timestamp.txt), set it with this default value 
-# to avoid endless data attack. the default length is set based on the 
-# timestamp.txt with two signature.
-DEFAULT_TIMESTAMP_LENGTH = 1995
+# Since the timestamp role does not have signed metadata about itself, we set a
+# default but sane upper bound for the number of bytes required to download it.
+DEFAULT_TIMESTAMP_REQUIRED_LENGTH = 2048
diff --git a/tuf/download.py b/tuf/download.py
index 86a5398d..cfe8ac22 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -182,17 +182,18 @@ def _open_connection(url):
 
 
 
-def _check_hashes(input_file, trusted_hashes):
+def _check_hashes(input_file, trusted_hashes=None):
   """
   <Purpose>
-    Helper function that verifies multiple secure hashes of the downloaded file.
-    If any of these fail it raises an exception.  This is to conform with the 
-    TUF specs, which support clients with different hashing algorithms. The
-    'hash.py' module is used to compute the hashes of the 'input_file'. 
+    A helper function that verifies multiple secure hashes of the downloaded
+    file.  If any of these fail it raises an exception.  This is to conform
+    with the TUF specs, which support clients with different hashing
+    algorithms. The 'hash.py' module is used to compute the hashes of the
+    'input_file'. 
 
   <Arguments>
     input_file:
-      A file or file-like object.
+      A file-like object.
     
     trusted_hashes: 
       A dictionary with hash-algorithm names as keys and hashes as dict values.
@@ -206,21 +207,24 @@ def _check_hashes(input_file, trusted_hashes):
     
   <Returns>
     None.
-    
+
   """
-  # Verify each trusted hash of 'trusted_hashes'.  Raise exception if
-  # any of the hashes are incorrect and return if all are correct.
-  for algorithm, trusted_hash in trusted_hashes.items():
-    digest_object = tuf.hash.digest(algorithm)
-    digest_object.update(input_file.read())
-    computed_hash = digest_object.hexdigest()
-    if trusted_hash != computed_hash:
-      msg = 'Hashes do not match. Expected '+trusted_hash+' got '+computed_hash
-      raise tuf.BadHashError(msg)
-    else:
-      logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
-  
-  return
+
+  if trusted_hashes:
+    # Verify each trusted hash of 'trusted_hashes'.  Raise exception if
+    # any of the hashes are incorrect and return if all are correct.
+    for algorithm, trusted_hash in trusted_hashes.items():
+      digest_object = tuf.hash.digest(algorithm)
+      digest_object.update(input_file.read())
+      computed_hash = digest_object.hexdigest()
+      if trusted_hash != computed_hash:
+        raise tuf.BadHashError('Hashes do not match! Expected '+
+                               trusted_hash+' got '+computed_hash)
+      else:
+        logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
+  else:
+    logger.warn('No trusted hashes supplied to verify file at: '+
+                str(input_file))
 
 
 
@@ -286,15 +290,12 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
       # Data successfully read from the connection.  Store it. 
       temp_file.write(data)
       total_downloaded = total_downloaded + len(data)
-
-      # This is to make sure we did not make a mistake!
-      #if total_downloaded > required_length:
-      #  logger.error('This should NEVER happen!')
   except:
     raise
   else:
     return total_downloaded
   finally:
+    # Whatever happens, make sure that we always close the connection.
     connection.close()
 
 
@@ -304,34 +305,39 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
 def _get_content_length(connection):
   """
   <Purpose>
-    Helper function thst get the file length from server, if any of these fail,
-    the length reported by server will be simply set to None.
+    A helper function that gets the purported file length from server.
   
   <Arguments>
     connection:
-      The object that the _open_connection returns for communicating with the
-      server about the contents of a URL.
+      The object that the _open_connection function returns for communicating
+      with the server about the contents of a URL.
   
   <Side Effects>
-    Length from server will be written to 'reported_length'.
+    No known side effects.
  
   <Exceptions>
-    Runtime or network exceptions will be raised without question.
+    Runtime exceptions will be suppressed but logged.
  
   <Returns>
     reported_length:
-      The total number of bytes reported by server.
+      The total number of bytes reported by server. If the process fails, we
+      return None; otherwise we would return a nonnegative integer.
 
   """
 
   try:
-  # info().get('Content-Length') gets the length of the url file.
+    # What is the length of this document according to the HTTP spec?
     reported_length = connection.info().get('Content-Length')
+    # Try casting it as a decimal number.
     reported_length = int(reported_length, 10)
+    # Make sure that it is a nonnegative integer.
+    assert reported_length > -1
   except:
+    logger.exception('Could not get content length about '+str(connection)+
+                     ' from server!')
     reported_length = None
-   
-  return reported_length
+  finally:
+    return reported_length
 
 
 
@@ -340,73 +346,70 @@ def _get_content_length(connection):
 def _check_content_length(reported_length, required_length):
   """
   <Purpose>
-    Helper function that checks whether the length reported by server is equal 
-    to the length we expected. If the reported length is larger than we expected,
-    it will rise tuf.DownloadError exception to avoid the endless data attack.
+    A helper function that checks whether the length reported by server is
+    equal to the length we expected.
   
   <Arguments>
     reported_length:
-      The total number of bytes reported by server.
+      The total number of bytes reported by the server.
 
     required_length:
-      The total number of bytes obtained from metadata or default value.
+      The total number of bytes obtained from (possibly default) metadata.
 
   <Side Effects>
-    None.
+    No known side effects.
  
   <Exceptions>
-    tuf.DownloadError, if reported_length is more than required_length.
+    No known exceptions.
  
   <Returns>
     None.
 
   """
 
-  # The length of downloading file obtained from server is larger than which 
-  # obtained from metadata or default length. So it could be a endless data 
-  # attack.
-  if reported_length is not None:
-    if reported_length != required_length:
-      if reported_length > required_length:
-        message = 'Incorrect length for '+url+'. The length reported by server is'+ \
-                  ' larger than expected. Expected '+str(required_length)+', got '+ \
-                  str(reported_length)+' bytes. It could be an endless data attack!'
-        raise tuf.DownloadError(message)
-      else:
-        message = 'The length reported by server is smaller than expected!'
-        logger.warn(message)
+  try:
+    if reported_length < required_length:
+      logger.warn('reported_length ('+str(reported_length)+
+                  ') < required_length ('+str(required_length)+')')
+    elif reported_length > required_length:
+      logger.warn('reported_length ('+str(reported_length)+
+                  ') > required_length ('+str(required_length)+')')
     else:
-      logger.info('Everything is OK. Download will start!')
-  else:
-     logger.warn('Server is being crappy, DownloadError will start!')
-
+      logger.debug('reported_length ('+str(reported_length)+
+                   ') == required_length ('+str(required_length)+')')
+  except:
+    logger.exception('Could not check reported and required lengths!')
 
 
 
 
   
-def _check_downloaded_length(total_downloaded, required_length, HARD_LIMIT_REQUIRED_LENGTH):
+def _check_downloaded_length(total_downloaded, required_length,
+                             STRICT_REQUIRED_LENGTH=True):
   """
   <Purpose>
-    This is a helper function, which checks if the length of downloaded is equal to the length
-    we expected. 
-  
+    A helper function which checks whether the total number of downloaded bytes
+    matches our expectation. 
+ 
   <Arguments>
-    reported_length:
-      The total number of bytes reported by server.
+    total_downloaded:
+      The total number of bytes supposedly downloaded for the file in question.
 
     required_length:
-      The total number of bytes obtained from metadata or default value.
-      
-    HARD_LIMIT_REQUIRED_LENGTH:
-      A boolean value which indicates if the required_length passed into this 
-      function is a default length.
+      The total number of bytes expected of the file as seen from its (possibly
+      default) metadata.
+
+    STRICT_REQUIRED_LENGTH:
+      A Boolean indicator used to signal whether we should perform strict
+      checking of required_length. True by default. We explicitly set this to
+      False when we know that we want to turn this off for downloading the
+      timestamp metadata, which has no signed required_length.
   
   <Side Effects>
     None.
  
   <Exceptions>
-    tuf.DownloadError, if HARD_LIMIT_REQUIRED_LENGTH is set to True and total_downloaded 
+    tuf.DownloadError, if STRICT_REQUIRED_LENGTH is True and total_downloaded
     is not equal required_length.
  
   <Returns>
@@ -414,28 +417,34 @@ def _check_downloaded_length(total_downloaded, required_length, HARD_LIMIT_REQUI
 
   """
 
-  # If the required_length is not the default value, we will check whether
-  # the total_downloaded is equal to required_length.
-  if HARD_LIMIT_REQUIRED_LENGTH:  
-    if total_downloaded != required_length:
-      message = 'Downloaded '+str(total_downloaded)+'. Expected '+str(required_length)+\
-                ' for '+url+'. There are still '+str(required_length-total_downloaded)+\
-                'bytes expected to be downloaded!'
+  if total_downloaded == required_length:
+    logger.debug('total_downloaded == required_length == '+
+                 str(required_length))
+  else:
+    difference_in_bytes = abs(total_downloaded-required_length)
+    message = 'Downloaded '+str(total_downloaded)+' bytes, but expected '+\
+              str(required_length)+' bytes. There is a difference of '+\
+              str(difference_in_bytes)+' bytes!'
+
+    # What we downloaded is not equal to the required length, but did we ask
+    # for strict checking of required length?
+    if STRICT_REQUIRED_LENGTH:  
+      # This must be due to a programming error, and must never happen!
       logger.error(message)          
       raise tuf.DownloadError(message)
     else:
-      logger.info('Successful download!')
-  
-  else:  
-    message = 'Required_length is default value, skip the safety check of total downloaded.'
-    logger.warn(message)
+      # We specifically disabled strict checking of required length, but we
+      # will log a warning anyway. This is useful when we wish to download the
+      # timestamp metadata, for which we have no signed metadata; so, we must
+      # guess a reasonable required_length for it.
+      logger.warn(message)
 
 
 
 
-def download_url_to_tempfileobj(url, required_length,
-                                required_hashes=None,
-                                HARD_LIMIT_REQUIRED_LENGTH=True):
+
+def download_url_to_tempfileobj(url, required_length, required_hashes=None,
+                                STRICT_REQUIRED_LENGTH=True):
   """
   <Purpose>
     Given the url, hashes and length of the desired file, this function 
@@ -447,7 +456,10 @@ def download_url_to_tempfileobj(url, required_length,
   
   <Arguments>
     url:
-      A url string that represents the location of the file. 
+      A URL string that represents the location of the file. 
+  
+    required_length:
+      An integer value representing the length of the file.
   
     required_hashes:
       A dictionary, where the keys represent the hashing algorithm used to 
@@ -455,69 +467,80 @@ def download_url_to_tempfileobj(url, required_length,
   
       For instance, a hash pair might look something like this:
       {'md5': '37544f383be1fc1a32f42801c9c4b4d6'}
-  
-    required_length:
-      An integer value representing the length of the file.
 
-    HARD_LIMIT_REQUIRED_LENGTH:
-      A boolean value which indicates if the required_length passed into this 
-      function is a default length.
-  
+    STRICT_REQUIRED_LENGTH:
+      A Boolean indicator used to signal whether we should perform strict
+      checking of required_length. True by default. We explicitly set this to
+      False when we know that we want to turn this off for downloading the
+      timestamp metadata, which has no signed required_length.
+
   <Side Effects>
-    'tuf.util.TempFile' object is created.
+    A 'tuf.util.TempFile' object is created on disk to store the contents of
+    'url'.
  
   <Exceptions>
     tuf.DownloadError, if there was an error while downloading the file.
-    
-    tuf.FormatError, if any of the arguments are improperly formatted. 
+ 
+    tuf.FormatError, if any of the arguments are improperly formatted.
+
+    tuf.BadHashError, if the hashes don't match.
+
+    Any other unforeseen runtime exception.
  
   <Returns>
-    'tuf.util.TempFile' instance.
+    A 'tuf.util.TempFile' file-like object which points to the contents of
+    'url'.
 
   """
 
   # Do all of the arguments have the appropriate format?
   # Raise 'tuf.FormatError' if there is a mismatch.
   tuf.formats.URL_SCHEMA.check_match(url)
-  if required_hashes is not None:
-    tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes)
-  if required_length is not None:
-    tuf.formats.LENGTH_SCHEMA.check_match(required_length)
+  tuf.formats.LENGTH_SCHEMA.check_match(required_length)
 
-  # 'url.replace()' is for compatibility with Windows-based systems because they 
-  # might put back-slashes in place of forward-slashes.  This converts it to the
-  # common format. 
-  url = url.replace('\\','/')
-  logger.info('Downloading: '+url)
+  if required_hashes:
+    tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes)
+  else:
+    logger.warn('Missing hashes for: '+str(url))
+
+  # 'url.replace()' is for compatibility with Windows-based systems because
+  # they might put back-slashes in place of forward-slashes.  This converts it
+  # to the common format. 
+  url = url.replace('\\', '/')
+  logger.info('Downloading: '+str(url))
   connection = _open_connection(url)
   temp_file = tuf.util.TempFile()
 
   try:
+    # We ask the server about how big it thinks this file should be.
     reported_length = _get_content_length(connection)
-    # call the function to check whether the length reported by server is equal 
-    # to expected.
+
+    # Then, we check whether the required length matches the reported length.
     _check_content_length(reported_length, required_length)
 
-    # For readibility, we perform the download in a separate function, which
-    # returns the total number of downloaded bytes; this number should be equal
-    # to required_length. 
+    # Download the contents of the URL, up to the required length, to a
+    # temporary file, and get the total number of downloaded bytes.
     total_downloaded = _download_fixed_amount_of_data(connection, temp_file, 
                                                       required_length)
-    # call the function to check whether the length of total_downloaded is equal to 
-    # expected.
-    _check_downloaded_length(total_downloaded, required_length, HARD_LIMIT_REQUIRED_LENGTH)
-      
-    # We appear to have downloaded the correct amount.  Check the hashes.
-    if required_hashes is not None: 
-      _check_hashes(temp_file, required_hashes)
 
-  # Exception is a base class for all non-exiting exceptions.
-  except Exception, e:
-    # Closing 'temp_file'.  The 'temp_file' data is destroyed.
+    # Does the total number of downloaded bytes match the required length?
+    _check_downloaded_length(total_downloaded, required_length,
+                             STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH)
+
+    # Finally, check the hashes expected of the file.
+    _check_hashes(temp_file, trusted_hashes=required_hashes)
+
+  except:
+    # Something unfortunately went wrong, so we will close 'temp_file'; that
+    # means any data written to it will be lost.
     temp_file.close_temp_file()
-    logger.error(str(e))
-    raise tuf.DownloadError(e)
+    logger.exception('Could not download URL: '+str(url))
+    raise
+
+  else:
+    return temp_file
+
+
 
-  return temp_file
 
 
diff --git a/tuf/tests/test_download.py b/tuf/tests/test_download.py
index 4e1273ae..b5bc535d 100755
--- a/tuf/tests/test_download.py
+++ b/tuf/tests/test_download.py
@@ -22,20 +22,18 @@
 """
 
 import tuf
+import tuf.conf as conf
 import tuf.download as download
 import tuf.tests.unittest_toolbox as unittest_toolbox
-import tuf.conf
 
-import os
-import sys
-import time
-import random
 import hashlib
 import logging
-import unittest
+import os
+import random
 import subprocess
-import SocketServer
-import SimpleHTTPServer
+import time
+import unittest
+
 
 # Disable/Enable logging.  Comment-out to Enable logging.
 logging.getLogger('tuf')
@@ -79,7 +77,6 @@ def setUp(self):
     self.target_hash = {'md5':digest}  
 
 
-
   # Stop server process and perform clean up.
   def tearDown(self):
     unittest_toolbox.Modified_TestCase.tearDown(self)
@@ -89,73 +86,75 @@ def tearDown(self):
     self.target_fileobj.close()
 
 
-  # Unit Test.
+  # Test: Normal case.
   def test_download_url_to_tempfileobj(self):
+
+    download_file = download.download_url_to_tempfileobj
+
+    temp_fileobj = download_file(self.url, self.target_data_length,
+                                 required_hashes=self.target_hash)
+    self.assertEquals(self.target_data, temp_fileobj.read())
+    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
+    temp_fileobj.close_temp_file()
+
+
+  # Test: Incorrect hashes.
+  def test_download_url_to_tempfileobj_and_hashes(self):
+
+    download_file = download.download_url_to_tempfileobj
+
     # Test: Normal cases without supplying hash arguments.
-
-    temp_fileobj = download.download_url_to_tempfileobj(self.url,
-                      required_length=self.target_data_length)
+    temp_fileobj = download_file(self.url, self.target_data_length)
     self.assertEquals(self.target_data, temp_fileobj.read())
     self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
     temp_fileobj.close_temp_file()
 
-    # Test: Normal case.
-    temp_fileobj = download.download_url_to_tempfileobj(self.url,
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
+    # What happens when we pass bad hashes to check the downloaded file?
+    self.assertRaises(tuf.BadHashError,
+                      download_file, self.url, self.target_data_length,
+                      required_hashes={'md5':self.random_string()})
+
+
+  # Test: Incorrect lengths.
+  def test_download_url_to_tempfileobj_and_lengths(self):
+
+    download_file = download.download_url_to_tempfileobj
+
+    # NOTE: We catch tuf.BadHashError here because the file, shorter by a byte,
+    # would not match the expected hashes. We log a warning when we find that
+    # the server-reported length of the file does not match our
+    # required_length. We also see that STRICT_REQUIRED_LENGTH does not change
+    # the outcome of the previous test.
+    for strict_required_length in (True, False):
+      self.assertRaises(tuf.BadHashError, 
+                        download_file, self.url, self.target_data_length - 1,
+                        required_hashes=self.target_hash,
+                        STRICT_REQUIRED_LENGTH=strict_required_length)
+
+    # NOTE: We catch tuf.DownloadError here because the STRICT_REQUIRED_LENGTH,
+    # which is True by default, mandates that we must download exactly what is
+    # required.
+    exception_message = 'Downloaded '+str(self.target_data_length)+\
+                        ' bytes, but expected '+\
+                        str(self.target_data_length+1)+\
+                        ' bytes. There is a difference of 1 bytes!'
+    self.assertRaisesRegexp(tuf.DownloadError, exception_message,
+                      download_file, self.url, self.target_data_length + 1,
+                      required_hashes=self.target_hash)
+
+    # NOTE: However, we do not catch a tuf.DownloadError here for the same test
+    # as the previous one because we have disabled STRICT_REQUIRED_LENGTH.
+    temp_fileobj = download_file(self.url, self.target_data_length + 1,
+                                 required_hashes=self.target_hash,
+                                 STRICT_REQUIRED_LENGTH=False)
     self.assertEquals(self.target_data, temp_fileobj.read())
     self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
     temp_fileobj.close_temp_file()
 
-    # Test: Incorrect length.
-    self.assertRaises(tuf.DownloadError, 
-                      download.download_url_to_tempfileobj, self.url,
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length - 1)
 
-    self.assertRaises(tuf.DownloadError, 
-                      download.download_url_to_tempfileobj, self.url,
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length + 1)
+  def test_download_url_to_tempfileobj_and_performance(self):
 
-    # Test: Incorrect hashs.
-    self.assertRaises(tuf.DownloadError, 
-                      download.download_url_to_tempfileobj, self.url,
-                      required_hashes={'md5':self.random_string()},
-                      required_length=self.target_data_length)
-
-    # Test: Incorrect/Unreachable url.
-    self.assertRaises(tuf.FormatError,
-                      download.download_url_to_tempfileobj, None,
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
-
-    self.assertRaises(tuf.DownloadError,
-                      download.download_url_to_tempfileobj,
-                      self.random_string(),
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
-
-    self.assertRaises(tuf.DownloadError,
-                      download.download_url_to_tempfileobj,
-                      'http://localhost:'+str(self.PORT)+'/'+self.random_string(),
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
-
-    self.assertRaises(tuf.DownloadError,
-                      download.download_url_to_tempfileobj,
-                      'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(),
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
-
-    # Test: Set the required_length to default value.
-
-    temp_fileobj = download.download_url_to_tempfileobj(self.url,
-                                                        required_length=tuf.conf.DEFAULT_TIMESTAMP_LENGTH,
-                                                        HARD_LIMIT_REQUIRED_LENGTH=False)
-    self.assertEquals(self.target_data, temp_fileobj.read())
-    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
-    temp_fileobj.close_temp_file();
+    download_file = download.download_url_to_tempfileobj
 
     """
     # Measuring performance of 'auto_flush = False' vs. 'auto_flush = True'
@@ -163,9 +162,9 @@ def test_download_url_to_tempfileobj(self):
     star_cpu = time.clock()
     star_real = time.time()
 
-    temp_fileobj = download.download_url_to_tempfileobj(self.url,
-                      required_hashes=self.target_hash, 
-                      required_length=self.target_data_length)
+    temp_fileobj = download_file(self.url, 
+                                 self.target_data_length,
+                                 required_hashes=self.target_hash)
 
     end_cpu = time.clock()
     end_real = time.time()  
@@ -181,7 +180,35 @@ def test_download_url_to_tempfileobj(self):
     """
 
 
+  # Test: Incorrect/Unreachable URLs.
+  def test_download_url_to_tempfileobj_and_urls(self):
+
+    download_file = download.download_url_to_tempfileobj
+
+    self.assertRaises(tuf.FormatError,
+                      download_file, None, self.target_data_length,
+                      required_hashes=self.target_hash)
+
+    self.assertRaises(tuf.DownloadError,
+                      download_file,
+                      self.random_string(), self.target_data_length,
+                      required_hashes=self.target_hash)
+
+    self.assertRaises(tuf.DownloadError,
+                      download_file,
+                      'http://localhost:'+str(self.PORT)+'/'+self.random_string(), 
+                      self.target_data_length,
+                      required_hashes=self.target_hash)
+
+    self.assertRaises(tuf.DownloadError,
+                      download_file,
+                      'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), 
+                      self.target_data_length,
+                      required_hashes=self.target_hash)
+
 
 # Run unit test.
 suite = unittest.TestLoader().loadTestsFromTestCase(TestDownload)
 unittest.TextTestRunner(verbosity=2).run(suite)
+
+
diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py
index e2506aaf..ae4e7bbd 100755
--- a/tuf/tests/test_updater.py
+++ b/tuf/tests/test_updater.py
@@ -61,7 +61,10 @@ class guarantees the order of unit tests.  So that, 'test_something_A'
 roledb = tuf.roledb
 keydb = tuf.keydb
 
-DEFAULT_TIMESTAMP_FILEINFO = {'length': tuf.conf.DEFAULT_TIMESTAMP_LENGTH, 'hashes':None}  
+DEFAULT_TIMESTAMP_FILEINFO = {
+  'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH,
+  'hashes':None
+}
 
 class TestUpdater_init_(unittest_toolbox.Modified_TestCase):
 

From 8edf2fc3f50c77ffb6e9d3ea0ba6cbcbbf79b952 Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Tue, 6 Aug 2013 14:31:21 -0400
Subject: [PATCH 08/19] Removed an unsafe edge case, but updater unit tests
 need to be fixed.

Specifically, we do not intentionally set any file metadata to be None
and then download the file unsafely.

Some of the tuf.client.updater unit tests fail because it was previously
possible to unsafely download metadata for any role. We need to fix
this.
---
 tuf/client/updater.py     | 24 ++++++------------------
 tuf/tests/test_updater.py | 12 +++++++++---
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index dac17a6c..427d271f 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -117,7 +117,7 @@
 import tuf.sig
 import tuf.util
 
-logger = logging.getLogger('tuf')
+from tuf.log import logger
 
 
 class Updater(object):
@@ -664,12 +664,8 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
 
     # Extract file length and file hashes.  They will be passed as arguments
     # to 'download_file' function.
-    if fileinfo is not None:
-      file_length=fileinfo['length']
-      file_hashes=fileinfo['hashes']
-    else:
-      file_length=None
-      file_hashes=None
+    file_length=fileinfo['length']
+    file_hashes=fileinfo['hashes']
 
     # Attempt a file download from each mirror until the file is downloaded and
     # verified.  If the signature of the downloaded file is valid, proceed,
@@ -836,12 +832,9 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas
     # The 'root' role may be updated without having 'release'
     # available.  
     if referenced_metadata not in self.metadata['current']:
-      if metadata_role == 'root':
-        new_fileinfo = None
-      else:
-        message = 'Cannot update '+repr(metadata_role)+' because ' \
-                  +referenced_metadata+' is missing.'
-        raise tuf.RepositoryError(message)
+      message = 'Cannot update '+repr(metadata_role)+' because ' \
+                +referenced_metadata+' is missing.'
+      raise tuf.RepositoryError(message)
     # The referenced metadata has been loaded.  Extract the new
     # fileinfo for 'metadata_role' from it. 
     else:
@@ -1019,11 +1012,6 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo):
     if self.fileinfo.get(metadata_filename) is None:
       return True
 
-    # 'new_fileinfo' should only be 'None' if updating 'root.txt'
-    # without having 'release.txt'.
-    if new_fileinfo is None:
-      return True
-
     current_fileinfo = self.fileinfo[metadata_filename]
 
     if current_fileinfo['length'] != new_fileinfo['length']:
diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py
index ae4e7bbd..70376a02 100755
--- a/tuf/tests/test_updater.py
+++ b/tuf/tests/test_updater.py
@@ -61,11 +61,14 @@ class guarantees the order of unit tests.  So that, 'test_something_A'
 roledb = tuf.roledb
 keydb = tuf.keydb
 
+# This is the default metadata that we would create for the timestamp role,
+# because it has no signed metadata for itself.
 DEFAULT_TIMESTAMP_FILEINFO = {
-  'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH,
-  'hashes':None
+  'hashes': None,
+  'length': tuf.conf.DEFAULT_TIMESTAMP_REQUIRED_LENGTH
 }
 
+
 class TestUpdater_init_(unittest_toolbox.Modified_TestCase):
 
   def test__init__exceptions(self):
@@ -204,7 +207,7 @@ def _mock_download_url_to_tempfileobj(self, output):
 
     """
 
-    def _mock_download(url, length, hashes=None, HARD_LIMIT_REQUIRED_LENGTH=True):
+    def _mock_download(url, length, hashes=None, STRICT_REQUIRED_LENGTH=True):
       if isinstance(output, (str, unicode)):
         file_path = output
       elif isinstance(output, list):
@@ -505,12 +508,14 @@ def test_3__update_metadata(self):
     # Test: Invalid file downloaded.
     #  Patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(self.release_filepath)
+    # TODO: Set fileinfo to a valid object.
     self.assertRaises(tuf.RepositoryError, _update_metadata, 'targets', None)
 
 
     # Test: normal case.
     #  Patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(self.targets_filepath)
+    # TODO: Set fileinfo to a valid object.
     _update_metadata('targets', None)
     list_of_targets = self.Repository.metadata['current']['targets']['targets']
 
@@ -528,6 +533,7 @@ def test_3__update_metadata(self):
 
     #  Re-patch 'download.download_url_to_tempfileobj' function.
     self._mock_download_url_to_tempfileobj(targets_filepath_compressed)
+    # TODO: Set fileinfo to a valid object.
     _update_metadata('targets', None, compression='gzip')
     list_of_targets = self.Repository.metadata['current']['targets']['targets']
 

From eccc6b3201fb7705066eafe24ded433e0813ea29 Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Thu, 8 Aug 2013 16:24:03 -0400
Subject: [PATCH 09/19] unicode fix in python2

---
 tuf/repo/signercli.py                              | 4 ++++
 tuf/tests/system_tests/test_endless_data_attack.py | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/tuf/repo/signercli.py b/tuf/repo/signercli.py
index d3d7b1d9..a4c76f86 100755
--- a/tuf/repo/signercli.py
+++ b/tuf/repo/signercli.py
@@ -116,6 +116,10 @@ def _prompt(message, result_type=str):
     caller.
 
   """
+  # we need to use unicode in python 2
+  python_version = sys.version_info[0]
+  if python_version==2:
+    str = unicode
 
   return result_type(raw_input(message))
 
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index 0530f128..99dbd6dd 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -42,11 +42,14 @@
 import tuf
 from tuf.interposition import urllib_tuf
 
+import logging
+
 
 
 # Disable logging.
 util_test_tools.disable_logging()
 
+logger = logging.getLogger('tuf')
 
 
 class EndlessDataAttack(Exception):

From 1bf884ab683864dfaa550673b888d293faa3ccf3 Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Tue, 13 Aug 2013 14:57:03 -0400
Subject: [PATCH 10/19] Pull from upstream

---
 docs/tuf-spec.txt                             |  64 +++---
 tuf/client/updater.py                         | 155 +++++++++++----
 tuf/log.py                                    | 187 +++++++++++++++---
 tuf/repo/keystore.py                          |  12 +-
 .../test_extraneous_dependencies_attack.py    |   1 +
 tuf/tests/test_keystore.py                    |   7 +
 tuf/tests/test_push.py                        |   0
 tuf/tests/test_pushtoolslib.py                |   0
 8 files changed, 323 insertions(+), 103 deletions(-)
 mode change 100644 => 100755 tuf/tests/test_push.py
 mode change 100644 => 100755 tuf/tests/test_pushtoolslib.py

diff --git a/docs/tuf-spec.txt b/docs/tuf-spec.txt
index 0c88c437..517d17e3 100644
--- a/docs/tuf-spec.txt
+++ b/docs/tuf-spec.txt
@@ -32,7 +32,10 @@
    in all popular Linux package managers.  More information and current
    versions of this document can be found at https://www.updateframework.com/
 
-   The development of TUF is supported by GENI (http://www.geni.net/).
+   The Global Environment for Network Innovations (GENI) and the National
+   Science Foundation (NSF) have provided support for the development of TUF.
+   (http://www.geni.net/)
+   (http://www.nsf.gov/)
 
    TUF's Python implementation is based heavily on Thandy, the application
    updater for Tor (http://www.torproject.org/). Its design and this spec are
@@ -409,26 +412,24 @@
 4.2. File formats: general principles
 
    All signed files are of the format:
-       { "signed" : X,
+       { "signed" : ROLE,
          "signatures" : [
-            { "keyid" : K,
-              "method" : M,
-              "sig" : S }
+            { "keyid" : KEYID,
+              "method" : METHOD,
+              "sig" : SIGNATURE }
             , ... ]
        }
 
-   where: X is a list whose first element describes the signed object.
-          K is the identifier of a key signing the document
-          M is the method to be used to make the signature
-          S is a signature of the canonical encoding of X using the
-          identified key.
+   where: ROLE is a dictionary whose "_type" field describes the role type.
+          KEYID is the identifier of the key signing the ROLE dictionary.
+          METHOD is the key signing method used to generate the signature.
+          SIGNATURE is a signature of the canonical encoding of ROLE using the
+          signing key belonging to KEYID.
 
    We define one signing method at present:
-       sha256-pkcs1 : A base64 encoded signature of the SHA256 hash of the
-         canonical encoding of X, using PKCS-1 padding.
+       "evp" : An interface to OpenSSL's EVP functions.
 
-   All times are given as strings of the format "YYYY-MM-DD HH:MM:SS",
-   in UTC.
+   All times are given as strings of the format "YYYY-MM-DD HH:MM:SS UTC".
 
    All keys are of the format:
       { "keytype" : KEYTYPE,
@@ -443,13 +444,12 @@
 
    We define one keytype at present: 'rsa'.  Its format is:
       { "keytype" : "rsa",
-        "keyval" : { "e" : E,
-                     "n" : N }
+        "keyval" : { "public" : PUBLIC,
+                     "private" : PRIVATE }
       }
 
-   where E and N are the binary representations of the exponent and
-   modulus, encoded as big-endian numbers in base64.  All RSA keys must
-   be at least 2048 bits long.
+   where PUBLIC and PRIVATE are in PEM format and are strings.  All RSA keys
+   must be at least 2048 bits long.
 
 4.3. File formats: root.txt
 
@@ -462,7 +462,7 @@
    The format of root.txt is as follows:
 
      { "_type" : "Root",
-       "ts" : TIME,
+       "version" : VERSION,
        "expires" : EXPIRES,
        "keys" : {
            KEYID : KEY
@@ -474,12 +474,11 @@
            , ... }
      }
 
-   The "ts" line describes when this file was updated.  Clients
-   MUST NOT replace a file with an older one, and SHOULD NOT accept a
-   file too far in the future.
+   VERSION is an integer that is greater than 0.  Clients MUST NOT replace a
+   metadata file with a version number less than the one currently trusted.
 
-   The "expires" line states when the metadata should be considered expired
-   and no longer trusted by clients.  Clients MUST NOT trust an expired file.
+   EXPIRES determines when metadata should be considered expired and no longer
+   trusted by clients.  Clients MUST NOT trust an expired file.
 
    A ROLE is one of "root", "release", "targets", "timestamp", or "mirrors".
    A role for each of "root", "release", "timestamp", and "targets" MUST be
@@ -505,7 +504,7 @@
    The format of release.txt is as follows:
 
      { "_type" : "Release",
-       "ts" : TIME,
+       "version" : VERSION,
        "expires" : EXPIRES,
        "meta" : METAFILES
      }
@@ -527,7 +526,7 @@
    The format of targets.txt is as follows:
 
      { "_type" : "Targets",
-       "ts" : TIME,
+       "version" : VERSION,
        "expires" : EXPIRES,
        "targets" : TARGETS,
        ("delegations" : DELEGATIONS)
@@ -572,10 +571,9 @@
    The "paths" list describes paths that the role is trusted to provide.
    Clients MUST check that a target is in one of the trusted paths of all roles
    in a delegation chain, not just in a trusted path of the role that describes
-   the target file.  The format of a PATHPATTERN may be either a path to a
-   single file or a path to a directory and end with "/**" to indicate all
-   files under that directory.  The value of "/**" by itself therefore means
-   all files.
+   the target file.  The format of a PATHPATTERN may be either a path to a single
+   file, or a path to a directory to indicate all files and/or subdirectories
+   under that directory.
 
    We are currently investigating a few "priority tag" schemes to resolve
    conflicts between delegated roles that share responsibility for overlapping
@@ -610,7 +608,7 @@
    The format of the timestamp file is as follows:
 
      { "_type" : "Timestamp",
-       "ts" : TIME,
+       "version" : VERSION,
        "expires" : EXPIRES,
        "meta" : METAFILES
      }
@@ -628,7 +626,7 @@
    The format of mirrors.txt is as follows:
 
      { "_type" : "Mirrorlist",
-       "ts" : TIME,
+       "version" : VERSION,
        "expires" : EXPIRES,
        "mirrors" : [
           { "urlbase" : URLBASE,
diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index b976ecf5..c0710e73 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -912,7 +912,9 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object):
       under 'paths'.  A parent role may delegate trust to all files under a 
       particular directory, including files in subdirectories, by simply
       listing the directory (e.g., 'packages/source/Django/', the equivalent
-      of 'packages/source/Django/*').
+      of 'packages/source/Django/*').  Targets listed in hashed bins are
+      also validated (i.e., its calculated path hash prefix must be delegated
+      by the parent role.
 
     <Arguments>
       metadata_role:
@@ -928,7 +930,8 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object):
     <Exceptions>
       tuf.RepositoryError:
         If the targets of 'metadata_role' are not allowed according to
-        the parent's metadata file.
+        the parent's metadata file.  The 'paths' and 'path_hash_prefix' fields
+        are verified.
     
     <Side Effects>
       None.
@@ -938,6 +941,13 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object):
     
     """
 
+    # The algorithm used by the repository to generate the hashes of the
+    # target filepaths.  The repository may optionally organize
+    # targets into hashed bins to ease target delegations and role metadata
+    # management.  The use of consistent hashing allows for a uniform
+    # distribution of targets into bins. 
+    HASH_PATH_ALGORITHM = 'sha256'
+    
     # Return if 'metadata_role' is 'targets'.  'targets' is not
     # a delegated role.
     if metadata_role == 'targets':
@@ -955,30 +965,60 @@ def _ensure_all_targets_allowed(self, metadata_role, metadata_object):
     role_index = tuf.repo.signerlib.find_delegated_role(roles, metadata_role)
 
     # Ensure the delegated role exists prior to extracting trusted paths
-    # from the parent's 'paths'.
+    # from the parent's 'paths', or trusted path hash prefixes from the parent's
+    # 'path_hash_prefix'.
     if role_index is not None:
       role = roles[role_index] 
-      allowed_child_paths = role['paths']
+      allowed_child_paths = role.get('paths')
+      allowed_child_path_hash_prefix = role.get('path_hash_prefix') 
       actual_child_targets = metadata_object['targets'].keys()
-      
-      # Check that each delegated target is either explicitly listed or a parent
-      # directory is found under role['paths'], otherwise raise an exception.
-      # If the parent role explicitly lists target file paths in 'paths',
-      # this loop will run in O(n^2), the worst-case.  The repository
-      # maintainer will likely delegate entire directories, and opt for
-      # explicit file paths if the targets in a directory are delegated to 
-      # different roles/developers.
-      for child_target in actual_child_targets:
-        for allowed_child_path in allowed_child_paths:
-          prefix = os.path.commonprefix([child_target, allowed_child_path])
-          if prefix == allowed_child_path:
-            break
-        else: 
-          message = 'Role '+repr(metadata_role)+' specifies target '+\
-            repr(child_target)+' which is not an allowed path according '+\
-            'to the delegations set by '+repr(parent_role)+'.'
-          raise tuf.RepositoryError(message)
     
+      if allowed_child_path_hash_prefix is not None:
+        for child_target in actual_child_targets:
+          # Calculate the hash of 'child_target' to determine if it has been
+          # placed in the correct bin.  The client currently assumes the
+          # repository uses 'HASH_PATH_ALGORITHM' to generate hashes.
+          # TODO: Should the TUF spec restrict the repository to one particular
+          # algorithm?  Should we allow the repository to specify in the role
+          # dictionary the algorithm used for these generated hashed paths?
+          digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM)
+          digest_object.update(child_target)
+          child_target_path_hash = digest_object.hexdigest()
+
+          if not child_target_path_hash.startswith(allowed_child_path_hash_prefix):
+            message = 'Role '+repr(metadata_role)+' specifies target '+\
+              repr(child_target)+ ' which does not have a path hash prefix '+\
+              'matching the prefix listed by the parent role '+\
+              repr(parent_role)+'.'
+            raise tuf.RepositoryError(message)
+      elif allowed_child_paths is not None: 
+
+        # Check that each delegated target is either explicitly listed or a parent
+        # directory is found under role['paths'], otherwise raise an exception.
+        # If the parent role explicitly lists target file paths in 'paths',
+        # this loop will run in O(n^2), the worst-case.  The repository
+        # maintainer will likely delegate entire directories, and opt for
+        # explicit file paths if the targets in a directory are delegated to 
+        # different roles/developers.
+        for child_target in actual_child_targets:
+          for allowed_child_path in allowed_child_paths:
+            prefix = os.path.commonprefix([child_target, allowed_child_path])
+            if prefix == allowed_child_path:
+              break
+          else: 
+            message = 'Role '+repr(metadata_role)+' specifies target '+\
+              repr(child_target)+' which is not an allowed path according '+\
+              'to the delegations set by '+repr(parent_role)+'.'
+            raise tuf.RepositoryError(message)
+      else:
+        
+        # 'role' should have been validated when it was downloaded.
+        # The 'paths' or 'path_hash_prefix' fields should not be missing,
+        # so log a warning if this else clause is reached. 
+        message = repr(role)+' unexpectedly did not contain one of '+\
+          'the required fields ("paths" or "path_hash_prefix").'
+        logger.warn(message)
+
     # Raise an exception if the parent has not delegated to the specified
     # 'metadata_role' child role.
     else:
@@ -1014,7 +1054,7 @@ def _fileinfo_has_changed(self, metadata_filename, new_fileinfo):
         dict conforms to 'tuf.formats.FILEINFO_SCHEMA' and has
         the form:
         {'length': 23423
-         'hashes': {'sha256': adfbc32343..}}
+         'hashes': {'sha256': /dfbc32343..}}
         
     <Exceptions>
       None.
@@ -1534,6 +1574,13 @@ def target(self, target_filepath):
     # Raise 'tuf.FormatError' if there is a mismatch.
     tuf.formats.RELPATH_SCHEMA.check_match(target_filepath)
 
+    # The algorithm used by the repository to generate the hashes of the
+    # target filepaths.  The repository may optionally organize
+    # targets into hashed bins to ease target delegations and role metadata
+    # management.  The use of consistent hashing allows for a uniform
+    # distribution of targets into bins. 
+    HASH_PATH_ALGORITHM = 'sha256'
+    
     # Ensure the client has the most up-to-date version of 'targets.txt'.
     # Raise 'tuf.MetadataNotAvailableError' if the changed metadata
     # cannot be successfully downloaded and 'tuf.RepositoryError' if the
@@ -1545,12 +1592,23 @@ def target(self, target_filepath):
     # The target is assumed to be missing until proven otherwise.
     target = None
 
+    # Calculate the hash of the filepath to determine which bin to find the 
+    # target.  The client currently assumes the repository uses
+    # 'HASH_PATH_ALGORITHM' to generate hashes.
+    # TODO: Should the TUF spec restrict the repository to one particular
+    # algorithm?  Should we allow the repository to specify in the role
+    # dictionary the algorithm used for these generated hashed paths?
+    digest_object = tuf.hash.digest(HASH_PATH_ALGORITHM)
+    digest_object.update(target_filepath)
+    target_file_path_hash = digest_object.hexdigest() 
+
     try:
       current_metadata = self.metadata['current']
       role_names = ['targets']
 
       # Preorder depth-first traversal of the tree of target delegations.
       while len(role_names) > 0 and target is None:
+        
         # Pop the role name from the top of the stack.
         role_name = role_names.pop(-1)
         
@@ -1575,20 +1633,49 @@ def target(self, target_filepath):
             break
 
         # Push children in reverse order of appearance onto the stack.
+        # NOTE: This may be a slow operation if there are many delegated roles
+        # or bins.
         for child_role in reversed(child_roles):
           child_role_name = child_role['name']
-          child_role_paths = child_role['paths']
+          child_role_paths = child_role.get('paths')
+          child_role_path_hash_prefix = child_role.get('path_hash_prefix')
 
-          # Ensure that we explore only delegated roles trusted with the target.
-          # We assume conservation of delegated paths in the complete tree of
-          # delegations. Note that the call to _ensure_all_targets_allowed in
-          # _update_metadata should already ensure that all targets metadata is
-          # valid; i.e. that the targets signed by a delegatee is a proper
-          # subset of the targets delegated to it by the delegator.
-          # Nevertheless, we check it again here for performance and safety
-          # reasons.
-          if target_filepath in child_role_paths:
-            role_names.append(child_role_name)
+          if child_role_path_hash_prefix is not None:
+            if target_file_path_hash.startswith(child_role_path_hash_prefix):
+              
+              # Found a matching path hash prefix.  The metadata for
+              # 'child_role_name' will be retrieved on the next iteration
+              # of the while-loop.
+              role_names.append(child_role_name)
+          elif child_role_paths is not None:
+            
+            # Ensure that we explore only delegated roles trusted with the target.
+            # We assume conservation of delegated paths in the complete tree of
+            # delegations. Note that the call to _ensure_all_targets_allowed in
+            # _update_metadata should already ensure that all targets metadata is
+            # valid; i.e. that the targets signed by a delegatee is a proper
+            # subset of the targets delegated to it by the delegator.
+            # Nevertheless, we check it again here for performance and safety
+            # reasons.
+            for child_role_path in child_role_paths:
+              
+              # A child role path may be a filepath or directory.  The child
+              # role 'child_role_name' is added if 'target_filepath' is located
+              # under 'child_role_path'.  Explicit filepaths are also added.
+              prefix = os.path.commonprefix([target_filepath, child_role_path])
+              if prefix == child_role_path:
+                
+                # The metadata for 'child_role_name' will be retrieved on the next
+                # iteration of the while-loop.
+                role_names.append(child_role_name)
+          else:
+            
+            # 'role_name' should have been validated when it was downloaded.
+            # The 'paths' or 'path_hash_prefix' fields should not be missing,
+            # so log a warning if this else clause is reached. 
+            message = repr(child_role)+' unexpectedly did not contain one of '+\
+              'the required fields ("paths" or "path_hash_prefix").'
+            logger.warn(message)
     except:
       raise
     finally:
diff --git a/tuf/log.py b/tuf/log.py
index cd61ecf7..f3c018bc 100755
--- a/tuf/log.py
+++ b/tuf/log.py
@@ -12,10 +12,9 @@
   See LICENSE for licensing information.
 
 <Purpose>
-  A central location for all logging-related configuration.
-  This module should be imported once by the main program.
-  If other modules wish to incorporate 'tuf' logging, they
-  should do the following:
+  A central location for all logging-related configuration.  This module should
+  be imported once by the main program.  If other modules wish to incorporate
+  'tuf' logging, they should do the following:
 
   import logging
   logger = logging.getLogger('tuf')
@@ -26,18 +25,28 @@
   instance.  In this 'log.py' module, we perform the initial setup for the name
   'tuf'.  The 'log.py' module should only be imported once by the main program.
   When any other module does a logging.getLogger('tuf'), it is referring to the
-  same 'tuf' instance and its associated settings we set up here in 'log.py'.
-  See http://docs.python.org/library/logging.html#logger-objects
-  for more information.
+  same 'tuf' instance, and its associated settings, set here in 'log.py'.
+  See http://docs.python.org/library/logging.html#logger-objects for more
+  information.
 
   We use multiple handlers to process log messages in various ways and to
   configure each one independently.  Instead of using one single manner of
   processing log messages, we can use two built-in handlers that have already
   been configured for us.  For example, the built-in FileHandler will catch
-  log message and dump them to a file.  If we wanted, we could set this file
-  handler to only catch CRITICAL (and greater)  messages and save them to a
-  file.  The other stream handler would still handle DEBUG-level (and greater)
-  messages.
+  log messages and dump them to a file.  If we wanted, we could set this file
+  handler to only catch CRITICAL (and greater) messages and save them to a
+  file.  Other handlers (e.g., StreamHandler) could handle INFO-level
+  (and greater) messages.
+
+  Logging Levels:
+
+    --Level--         --Value--
+  logging.CRITICAL        50
+  logging.ERROR           40
+  logging.WARNING         30
+  logging.INFO            20
+  logging.DEBUG           10
+  logging.NOTSET           0
 
 """
 
@@ -45,35 +54,46 @@
 import logging
 import time
 
+import tuf
 
-_DEFAULT_LOG_LEVEL = logging.INFO
+# Setting a handler's log level filters only logging messages of that level
+# (and above).  For example, setting the built-in StreamHandler's log level to
+# 'logging.WARNING' will cause the stream handler to only process messages
+# of levels: WARNING, ERROR, and CRITICAL. 
 _DEFAULT_LOG_FILENAME = 'tuf.log'
+_DEFAULT_LOG_LEVEL = logging.DEBUG
+_DEFAULT_CONSOLE_LOG_LEVEL = logging.INFO
+_DEFAULT_FILE_LOG_LEVEL = logging.DEBUG
 
 # Set the format for logging messages.
+# Example format for '_FORMAT_STRING': 
+# [2013-08-13 15:21:18,068 UTC] [tuf] [INFO][_update_metadata:851@updater.py]
 _FORMAT_STRING = '[%(asctime)s UTC] [%(name)s] [%(levelname)s]'+\
   '[%(funcName)s:%(lineno)s@%(filename)s] %(message)s'
+
+
 logging.Formatter.converter = time.gmtime
 formatter = logging.Formatter(_FORMAT_STRING)
 
-# Set the handlers for the logger.
-# The built-in stream handler will log
-# messages to 'sys.stderr' and capture
-# '_DEFAULT_LOG_LEVEL' messages.
-stream_handler = logging.StreamHandler()
-stream_handler.setLevel(_DEFAULT_LOG_LEVEL)
-stream_handler.setFormatter(formatter)
+# Set the handlers for the logger.  The console handler is unset by default.  A
+# module importing 'log.py' should explicitly set the console handler if
+# outputting log messages to the screen is needed.  Adding a console handler
+# can be done with tuf.log.add_console_handler().  Logging messages to a file
+# *is* set by default. 
+console_handler = None
 
-# Set the built-in file handler.  Messages
-# will be logged to '_DEFAULT_LOG_FILENAME'
-# and use the logger's default log level.
-# The file will be opened in append mode.
+# Set the built-in file handler.  Messages will be logged to
+# '_DEFAULT_LOG_FILENAME', and only those messages with a log level of
+# '_DEFAULT_LOG_LEVEL'.  The log level of messages handled by 'file_handler'
+# may be modified with 'set_filehandler_log_level()'.  '_DEFAULT_LOG_FILENAME'
+# will be opened in append mode.
 file_handler = logging.FileHandler(_DEFAULT_LOG_FILENAME)
+file_handler.setLevel(_DEFAULT_LOG_LEVEL)
 file_handler.setFormatter(formatter)
 
 # Set the logger and its settings.
 logger = logging.getLogger('tuf')
 logger.setLevel(_DEFAULT_LOG_LEVEL)
-logger.addHandler(stream_handler)
 logger.addHandler(file_handler)
 
 # Silently ignore logger exceptions.
@@ -83,27 +103,132 @@
 
 
 
-def set_log_level(log_level):
+def set_log_level(log_level=_DEFAULT_LOG_LEVEL):
   """
   <Purpose>
     Allow the default log level to be overridden.
 
   <Arguments>
     log_level:
-      The log level to set for the logger and handler(s).
-      E.g., logging.INFO; logging.CRITICAL.
+      The log level to set for the 'log.py' file handler.
+      'log_level' examples: logging.INFO; logging.CRITICAL.
       
   <Exceptions>
     None.
 
   <Side Effects>
-    Overrides the logging level for the internal 
-    'logger' and 'handler'.
+    Overrides the logging level for the 'log.py' file handler.
 
   <Returns>
     None.
 
   """
-
+  
+  # Does 'log_level' have the correct format?
+  # Raise 'tuf.FormatError' if there is a mismatch.
+  tuf.formats.LENGTH_SCHEMA.check_match(log_level)
+  
   logger.setLevel(log_level)
-  stream_handler.setLevel(log_level)
+
+
+
+
+
+def set_filehandler_log_level(log_level=_DEFAULT_FILE_LOG_LEVEL):
+  """
+  <Purpose>
+    Allow the default file handler log level to be overridden.
+
+  <Arguments>
+    log_level:
+      The log level to set for the 'log.py' file handler.
+      'log_level' examples: logging.INFO; logging.CRITICAL.
+      
+  <Exceptions>
+    None.
+
+  <Side Effects>
+    Overrides the logging level for the 'log.py' file handler.
+
+  <Returns>
+    None.
+
+  """
+  
+  # Does 'log_level' have the correct format?
+  # Raise 'tuf.FormatError' if there is a mismatch.
+  tuf.formats.LENGTH_SCHEMA.check_match(log_level)
+  
+  file_handler.setLevel(log_level)
+
+
+
+
+
+def set_console_log_level(log_level=_DEFAULT_CONSOLE_LOG_LEVEL):
+  """
+  <Purpose>
+    Allow the default log level for console messages to be overridden.
+
+  <Arguments>
+    log_level:
+      The log level to set for the console handler.
+      'log_level' examples: logging.INFO; logging.CRITICAL.
+      
+  <Exceptions>
+    tuf.Error, if the 'log.py' console handler has not been set yet with
+    add_console_handler().
+
+  <Side Effects>
+    Overrides the logging level for the console handler.
+
+  <Returns>
+    None.
+
+  """
+  
+  # Does 'log_level' have the correct format?
+  # Raise 'tuf.FormatError' if there is a mismatch.
+  tuf.formats.LENGTH_SCHEMA.check_match(log_level)
+  
+  if console_handler is not None:
+    console_handler.setLevel(log_level)
+  else:
+    message = 'The console handler has not been set with add_console_handler().'
+    raise tuf.Error(message)
+
+
+
+
+def add_console_handler(log_level=_DEFAULT_CONSOLE_LOG_LEVEL):
+  """
+  <Purpose>
+    Add a console handler and set its log level to 'log_level'.
+
+  <Arguments>
+    log_level:
+      The log level to set for the console handler.
+      'log_level' examples: logging.INFO; logging.CRITICAL.
+      
+  <Exceptions>
+    None.
+
+  <Side Effects>
+    Adds a console handler to the 'log.py' logger and sets its logging level to
+    'log_level'.
+
+  <Returns>
+    None.
+
+  """
+  
+  # Does 'log_level' have the correct format?
+  # Raise 'tuf.FormatError' if there is a mismatch.
+  tuf.formats.LENGTH_SCHEMA.check_match(log_level)
+
+  # Set the console handler for the logger.  The built-in console handler will
+  # log messages to 'sys.stderr' and capture 'log_level' messages.
+  console_handler = logging.StreamHandler()
+  console_handler.setLevel(log_level)
+  console_handler.setFormatter(formatter)
+  logger.addHandler(console_handler)
diff --git a/tuf/repo/keystore.py b/tuf/repo/keystore.py
index c2ca98c0..c8fe5afd 100755
--- a/tuf/repo/keystore.py
+++ b/tuf/repo/keystore.py
@@ -147,7 +147,7 @@ def load_keystore_from_keyfiles(directory_name, keyids, passwords):
   <Arguments>
     directory_name:
       The name of the directory containing the key files ('<keyid>.key'),
-      conformant to tuf.formats.RELPATH_SCHEMA.
+      conformant to 'tuf.formats.RELPATH_SCHEMA'.
 
     keyids:
       A list containing the keyids of the signing keys to load.
@@ -188,10 +188,8 @@ def load_keystore_from_keyfiles(directory_name, keyids, passwords):
   
   logger.info('Loading private key(s) from '+repr(directory_name))
 
-  # Make sure the directory exists.
-  if not os.path.exists(directory_name):
-    logger.warn('...no such directory.  Keystore cannot be loaded.')
-  else:
+  # Load the private key(s) if 'directory_name' exists, otherwise log a warning.
+  if os.path.exists(directory_name):
     # Decrypt the keys we can from those stored in 'keyids'.
     for keyid in keyids:
       try:
@@ -243,7 +241,11 @@ def load_keystore_from_keyfiles(directory_name, keyids, passwords):
             logger.warn(repr(full_filepath)+' contains an invalid key type.')
             continue
 
+  else:
+    logger.warn('...no such directory.  Keystore cannot be loaded.')
+
   logger.info('Done.')
+
   return loaded_keys
 
 
diff --git a/tuf/tests/system_tests/test_extraneous_dependencies_attack.py b/tuf/tests/system_tests/test_extraneous_dependencies_attack.py
index 72937d65..25a258f0 100755
--- a/tuf/tests/system_tests/test_extraneous_dependencies_attack.py
+++ b/tuf/tests/system_tests/test_extraneous_dependencies_attack.py
@@ -187,4 +187,5 @@ def _write_rogue_metadata():
 try:
   test_extraneous_dependencies_attack()
 except ExtraneousDependenciesAttackAlert, error:
+  raise
   print 'error'
diff --git a/tuf/tests/test_keystore.py b/tuf/tests/test_keystore.py
index 196bb7d4..816f400a 100755
--- a/tuf/tests/test_keystore.py
+++ b/tuf/tests/test_keystore.py
@@ -19,12 +19,19 @@
 import unittest
 import shutil
 import os
+import logging
 
 import tuf.repo.keystore
 import tuf.rsa_key
 import tuf.formats
 import tuf.util
 
+logger = logging.getLogger('tuf')
+
+# Disable all logging calls of level CRITICAL and below.
+# Comment the line below to enable logging.
+logging.disable(logging.CRITICAL)
+
 # We'll need json module for testing '_encrypt()' and '_decrypt()'
 # internal function.
 json = tuf.util.import_json()
diff --git a/tuf/tests/test_push.py b/tuf/tests/test_push.py
old mode 100644
new mode 100755
diff --git a/tuf/tests/test_pushtoolslib.py b/tuf/tests/test_pushtoolslib.py
old mode 100644
new mode 100755

From 74c1d29a37b762bca7c7b9014a9149dc639affce Mon Sep 17 00:00:00 2001
From: ttgump <syrttgump@gmail.com>
Date: Tue, 27 Aug 2013 17:02:42 -0400
Subject: [PATCH 11/19] Update fix for endless-data-attack test.

---
 tuf/download.py                                    | 6 +++---
 tuf/tests/system_tests/test_endless_data_attack.py | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tuf/download.py b/tuf/download.py
index 561cbae0..861e2966 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -403,10 +403,10 @@ def download_url_to_tempfileobj(url, required_hashes=None,
                                                       file_length,
                                                       required_length)
   
-    # Does 'total_downloaded' match 'required_length'?
-    if total_downloaded != required_length:
+    # Does 'total_downloaded' matches 'required_length'?
+    if required_length is not None and total_downloaded != required_length:
       message = 'Total downloaded length '+str(total_downloaded)+ \
-		' bytes doesn\'t match required length '+str(required_length)+' bytes.'
+                ' bytes doesn\'t match required length '+str(required_length)+' bytes.'
       raise tuf.DownloadError(message)
  
     # We appear to have downloaded the correct amount.  Check the hashes.
diff --git a/tuf/tests/system_tests/test_endless_data_attack.py b/tuf/tests/system_tests/test_endless_data_attack.py
index 196e4af9..476a134b 100755
--- a/tuf/tests/system_tests/test_endless_data_attack.py
+++ b/tuf/tests/system_tests/test_endless_data_attack.py
@@ -44,6 +44,7 @@
 
 import logging
 
+logger = logging.getLogger('tuf.test_endless_data_attack')
 
 class EndlessDataAttack(Exception):
   pass

From 6638089b99d03e4b598d47551f1d189280e22dff Mon Sep 17 00:00:00 2001
From: zhengyuyu <nuistzyy@gmail.com>
Date: Tue, 20 Aug 2013 03:48:29 -0400
Subject: [PATCH 12/19] Fix the slow retrieval attack issue

download.py:Add a timeout and rewrite the _fileobject.read()

test_slow_retrieval_attack.py:Add a new kind of slow retrieval attack

slow_retrieval_server.py:Modification for new kind of slow retrieval attack
---
 tuf/__init__.py                               |   4 +
 tuf/conf.py                                   |   7 +
 tuf/download.py                               | 139 +++++++++++++++++-
 .../system_tests/slow_retrieval_server.py     |  37 +++--
 .../test_slow_retrieval_attack.py             |  48 ++++--
 5 files changed, 210 insertions(+), 25 deletions(-)

diff --git a/tuf/__init__.py b/tuf/__init__.py
index b8d34217..b2a1ed12 100755
--- a/tuf/__init__.py
+++ b/tuf/__init__.py
@@ -136,6 +136,10 @@ class DownloadError(Error):
 
 
 
+class SlowRetrievalError(DownloadError):
+  """"Indicate that a downloading a file took longer than we would like it to."""
+  pass
+
 
 
 class KeyAlreadyExistsError(Error):
diff --git a/tuf/conf.py b/tuf/conf.py
index e601726d..65b8cb6d 100755
--- a/tuf/conf.py
+++ b/tuf/conf.py
@@ -40,3 +40,10 @@
 # Since the timestamp role does not have signed metadata about itself, we set a
 # default but sane upper bound for the number of bytes required to download it.
 DEFAULT_TIMESTAMP_REQUIRED_LENGTH = 2048
+
+# set the maximum waiting time for the socket.recv() before receives anything.
+recv_timeout = 2
+
+# the maximum tolorated number of times that receive data with shorter length than required
+# when download a file. 
+maximum_abnormal_length_count = 5
\ No newline at end of file
diff --git a/tuf/download.py b/tuf/download.py
index cfe8ac22..07d6ce9c 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -25,11 +25,14 @@
 import logging
 import os.path
 import socket
+import time
+import signal
 
 import tuf
 import tuf.hash
 import tuf.util
 import tuf.formats
+import tuf.conf
 
 from tuf.compatibility import httplib, ssl, urllib2, urlparse
 if ssl:
@@ -37,6 +40,16 @@
 else:
     raise tuf.Error( "No SSL support!" )    # TODO: degrade gracefully
 
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+
+try:
+  import errno
+except ImportError:
+  errno = None
+EINTR = getattr(errno, 'EINTR', 4)
 
 # See 'log.py' to learn how logging is handled in TUF.
 logger = logging.getLogger('tuf.download')
@@ -230,6 +243,112 @@ def _check_hashes(input_file, trusted_hashes=None):
 
 
 
+class safe_fileobject(socket._fileobject):
+  """
+  A socket fileobject that uses our own safe read function to avoid slow
+  retrieval attack.
+  """   
+  # Keep track of the number of times receiving data with shorter length than
+  # required. Each time a new download begins, that is, a new instance of this
+  # class is created, it will be reset to 0. 
+  abnormal_length_count = 0
+  def read(self,size):
+    """
+    <Purpose>
+      This is a safer version of the socket._fileobject.read(). Prevent client
+      from the slow retrieval attack by checking the received data lenth.
+
+    <Arguments>
+      size:
+        The length of data that expected to download.
+    
+    <Exceptions>
+      tuf.SlowRetrievalError, if the abnormal_length_count is bigger than we
+      expected.
+      socket.error, if errors happend in the lower layer socket.
+    
+    <Side Effects>
+      None.
+    
+    <Returns>
+      "size" length of received data or empty string.
+
+  """
+    # Use max, disallow tiny reads in a loop as they are very inefficient.
+    # We never leave read() with any leftover data from a new recv() call
+    # in our internal buffer.
+    rbufsize = max(self._rbufsize, self.default_bufsize)
+    # Our use of StringIO rather than lists of string objects returned by
+    # recv() minimizes memory usage and fragmentation that occurs when
+    # rbufsize is large compared to the typical return value of recv().
+    buf = self._rbuf
+    buf.seek(0, 2)  # seek end
+
+    max_count = tuf.conf.maximum_abnormal_length_count
+
+    # Read until size bytes or EOF seen, whichever comes first
+    buf_len = buf.tell()
+    if buf_len >= size:
+      # Already have size bytes in our buffer?  Extract and return.
+      buf.seek(0)
+      rv = buf.read(size)
+      self._rbuf = StringIO()
+      self._rbuf.write(buf.read())
+      return rv
+    
+    else:
+      self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf.
+      # When received  more shorter length data than we can tolorate,
+      # exit current download process and raise a slow retrieval 
+      # attack exception.
+      while self.abnormal_length_count < max_count:
+        left = size - buf_len
+        # recv() will malloc the amount of memory given as its
+        # parameter even though it often returns much less data
+        # than that.  The returned data string is short lived
+        # as we copy it into a StringIO and free it.  This avoids
+        # fragmentation issues on many platforms.
+        try:
+          data = self._sock.recv(left)
+        except socket.error, e:
+          if e.args[0] != EINTR:
+            raise
+        if not data:
+          break
+        
+        else:
+          n = len(data)
+          if n == size and not buf_len:
+            # Shortcut.  Avoid buffer data copies when:
+            # - We have no data in our buffer.
+            # AND
+            # - Our call to recv returned exactly the
+            #   number of bytes we were asked to read.
+            return data
+
+          elif n == left:
+            buf.write(data)
+            del data  # explicit free
+            break
+
+          else:
+            self.abnormal_length_count += 1
+            assert n <= left, "recv(%d) returned %d bytes" % (left, n)
+            buf.write(data)
+            buf_len += n
+            del data  # explicit free
+      else:
+        message = "received " + str(self.abnormal_length_count + 1) + \
+                  "times of abnormal length data. it could be " + \
+                  "slow retrieve attack!"
+        logger.error(message) 
+        raise tuf.SlowRetrievalError(message)
+      return buf.getvalue()
+
+      
+
+
+
 def _download_fixed_amount_of_data(connection, temp_file, required_length):
   """
   <Purpose>
@@ -276,7 +395,8 @@ def _download_fixed_amount_of_data(connection, temp_file, required_length):
       # We download a fixed chunk of data in every round. This is so that we
       # can defend against slow retrieval attacks. Furthermore, we do not wish
       # to download an extremely large file in one shot.
-      data = connection.read(min(BLOCK_SIZE, required_length-total_downloaded))
+      length_to_read = min(BLOCK_SIZE, required_length-total_downloaded)
+      data = connection.read(length_to_read)
 
       # We might have no more data to read. Check number of bytes downloaded. 
       if not data:
@@ -508,6 +628,17 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
   # to the common format. 
   url = url.replace('\\', '/')
   logger.info('Downloading: '+str(url))
+  
+  # Set the timeout for the recv() function inside the connection.read().
+  recv_timeout = tuf.conf.recv_timeout
+  socket.setdefaulttimeout(recv_timeout)
+
+  # Save the original _fileobject class for later restore.
+  original_fileobject = socket._fileobject
+  # Change the original _fileobject class into our own _fileobject which has a
+  # safe read() function to avoid slow retrieval attack.
+  socket._fileobject = safe_fileobject
+
   connection = _open_connection(url)
   temp_file = tuf.util.TempFile()
 
@@ -540,6 +671,12 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
   else:
     return temp_file
 
+  finally:
+    #After download is done or a exception is raised, restore the socket timeout 
+    # and socket._fileobject to default.
+    socket.setdefaulttimeout(None)
+    socket._fileobject = original_fileobject
+    
 
 
 
diff --git a/tuf/tests/system_tests/slow_retrieval_server.py b/tuf/tests/system_tests/slow_retrieval_server.py
index 0887d119..ba50e426 100755
--- a/tuf/tests/system_tests/slow_retrieval_server.py
+++ b/tuf/tests/system_tests/slow_retrieval_server.py
@@ -25,9 +25,13 @@
 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
 
 
-DELAY = 1
 
 
+# Modify the HTTPServer class to pass the test_mode argument to do_GET function.
+class HTTPServer_Test(HTTPServer):
+  def __init__(self, server_address, Handler, test_mode):
+    HTTPServer.__init__(self, server_address, Handler)
+    self.test_mode = test_mode
 
 # HTTP request handler.
 class Handler(BaseHTTPRequestHandler):
@@ -43,13 +47,22 @@ def do_GET(self):
       self.send_response(200)
       self.send_header('Content-length', str(len(data)))
       self.end_headers()
-
-      # Throttle the file by sending a character every few seconds.
-      for i in range(len(data)):
+      
+      if self.server.test_mode == "mode_1":
+      # before sends any data, the server does nothing during a long time.
+        DELAY = 1000
         time.sleep(DELAY)
-        self.wfile.write(data[i])
+        self.wfile.write(data)
 
-      return
+        return
+
+      elif self.server.test_mode == "mode_2":
+        DELAY = 1
+        # Throttle the file by sending a character every few seconds.
+        for i in range(len(data)):
+          self.wfile.write(data[i])
+          time.sleep(DELAY)
+        return
 
     except IOError, e:
       self.send_error(404, 'File Not Found!')
@@ -62,18 +75,20 @@ def get_random_port():
 
 
 
-def run(port):
+def run(port, test_mode):
   server_address = ('localhost', port)
-  httpd = HTTPServer(server_address, Handler)
+  httpd = HTTPServer_Test(server_address, Handler, test_mode)
   print('Slow server is active on port: '+str(port)+' ...')
   httpd.handle_request()
 
 
 
 if __name__ == '__main__':
-  if len(sys.argv) > 1:
+  if len(sys.argv) > 2:
     port = int(sys.argv[1])
+    test_mode = sys.argv[2]
   else:
     port = get_random_port()
-
-  run(port)
\ No newline at end of file
+    test_mode = None
+    
+  run(port, test_mode)
\ No newline at end of file
diff --git a/tuf/tests/system_tests/test_slow_retrieval_attack.py b/tuf/tests/system_tests/test_slow_retrieval_attack.py
index d2f9f4d9..71b9ac6e 100755
--- a/tuf/tests/system_tests/test_slow_retrieval_attack.py
+++ b/tuf/tests/system_tests/test_slow_retrieval_attack.py
@@ -41,6 +41,9 @@
 import random
 import subprocess
 from multiprocessing import Process
+import tuf
+import socket
+
 
 import util_test_tools
 from tuf.interposition import urllib_tuf
@@ -55,23 +58,28 @@ class SlowRetrievalAttackAlert(Exception):
   pass
 
 
-def _download(url, filename, tuf=False):
-  if tuf:
-    urllib_tuf.urlretrieve(url, filename)
-    
+def _download(url, filename, TUF=False):
+  if TUF:
+    try:
+      urllib_tuf.urlretrieve(url, filename)
+    # If timeout or RepositoryError is raised, this means
+    # that TUF has prevented the slow retrieval attack. Enable
+    # the logging to see, what actually happened.
+    except (socket.timeout, tuf.RepositoryError), e:
+      print "Download exits with " + str(e) + "! Successfully avoid slow retrieval attack!\n\n"
   else:
     urllib.urlretrieve(url, filename)
 
 
 
-def test_slow_retrieval_attack(TUF=False):
+def test_slow_retrieval_attack(TUF=False, mode=None):
 
-  WAIT_TIME = 5  # Number of seconds to wait until download completes.
-  ERROR_MSG = '\tSlow Retrieval Attack was Successful!\n\n'
+  WAIT_TIME = 10  # Number of seconds to wait until download completes.
+  ERROR_MSG = mode + '\tSlow Retrieval Attack was Successful!\n\n'
 
   # Launch the server.
   port = random.randint(30000, 45000)
-  command = ['python', 'slow_retrieval_server.py', str(port)]
+  command = ['python', 'slow_retrieval_server.py', str(port), mode]
   server_process = subprocess.Popen(command, stderr=subprocess.PIPE)
   time.sleep(.1)
 
@@ -109,6 +117,7 @@ def test_slow_retrieval_attack(TUF=False):
     proc = Process(target=_download, args=(url_to_file, downloaded_file, TUF))
     proc.start()
     proc.join(WAIT_TIME)
+
     if proc.exitcode is None:
       proc.terminate()
       raise SlowRetrievalAttackAlert(ERROR_MSG)
@@ -117,21 +126,34 @@ def test_slow_retrieval_attack(TUF=False):
   finally:
     if server_process.returncode is None:
       server_process.kill()
-      print 'Slow server terminated.\n'
-
+      print 'Communication with slow server aborted. Terminate the slow server.\n'
+    
     util_test_tools.cleanup(root_repo, server_proc)
 
 
 
 
-
+# Stimulates two kinds of slow retrieval attacks.
+# mode_1: When download begins,the server blocks the download
+# for a long time by doing nothing before it sends first byte of data.
+# mode_2: During the download process, the server blocks the download 
+# by sending just several characters every few seconds.
 try:
-  test_slow_retrieval_attack(TUF=False)
+  test_slow_retrieval_attack(TUF=False, mode = "mode_1")
 except SlowRetrievalAttackAlert, error:
   print error
 
+try:
+  test_slow_retrieval_attack(TUF=False, mode = "mode_2")
+except SlowRetrievalAttackAlert, error:
+  print error  
 
 try:
-  test_slow_retrieval_attack(TUF=True)
+  test_slow_retrieval_attack(TUF=True, mode = "mode_1")
+except SlowRetrievalAttackAlert, error:
+  print error
+
+try:
+  test_slow_retrieval_attack(TUF=True, mode = "mode_2")
 except SlowRetrievalAttackAlert, error:
   print error
\ No newline at end of file

From dcae72c19da8771262ce030d88ed451cc07d8c1b Mon Sep 17 00:00:00 2001
From: zhengyuyu <nuistzyy@gmail.com>
Date: Wed, 28 Aug 2013 05:53:00 -0400
Subject: [PATCH 13/19] delete signal import

---
 tuf/download.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tuf/download.py b/tuf/download.py
index 07d6ce9c..ad5188e1 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -26,7 +26,6 @@
 import os.path
 import socket
 import time
-import signal
 
 import tuf
 import tuf.hash

From d5272cfc7c3cf5761084de28c0296fb00f806f68 Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Wed, 4 Sep 2013 16:48:38 -0400
Subject: [PATCH 14/19] Add some exceptions for more fine-grained exception
 handling.

---
 tuf/__init__.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/tuf/__init__.py b/tuf/__init__.py
index deac6f50..c3933ca6 100755
--- a/tuf/__init__.py
+++ b/tuf/__init__.py
@@ -53,6 +53,14 @@ class FormatError(Error):
 
 
 
+class InvalidMetadataJSONError(FormatError):
+  """Indicate that some metadata file is not valid JSON."""
+  pass
+
+
+
+
+
 class UnsupportedAlgorithmError(Error):
   """Indicate an error while trying to identify a user-specified algorithm."""
   pass
@@ -93,6 +101,22 @@ class RepositoryError(Error):
 
 
 
+class ForbiddenTargetError(RepositoryError):
+  """Indicate that a role signed for a target that it was not delegated to."""
+  pass
+
+
+
+
+
+class ReplayError(RepositoryError):
+  """Indicate that some metadata has been replayed to the client."""
+  pass
+
+
+
+
+
 class ExpiredMetadataError(Error):
   """Indicate that a TUF Metadata file has expired."""
   pass
@@ -117,8 +141,8 @@ class CryptoError(Error):
 
 
 
-class UnsupportedLibraryError(Error):
-  """Indicate that a supported library could not be located or imported."""
+class BadSignatureError(CryptoError):
+  """Indicate that some metadata file had a bad signature."""
   pass
 
 
@@ -133,6 +157,22 @@ class UnknownMethodError(CryptoError):
 
 
 
+class UnsupportedLibraryError(Error):
+  """Indicate that a supported library could not be located or imported."""
+  pass
+
+
+
+
+
+class DecompressionError(Error):
+  """Indicate that some error happened while decompressing a file."""
+  pass
+
+
+
+
+
 class DownloadError(Error):
   """Indicate an error occurred while attempting to download a file."""
   pass
@@ -141,6 +181,14 @@ class DownloadError(Error):
 
 
 
+class DownloadLengthMismatchError(DownloadError):
+  """Indicate that a mismatch of lengths was seen while downloading a file."""
+  pass
+
+
+
+
+
 class SlowRetrievalError(DownloadError):
   """"Indicate that downloading a file took an unreasonably long time."""
 
@@ -182,4 +230,18 @@ class InvalidNameError(Error):
 
 
 
+class UpdateError(Error):
+  """An updater will throw this exception in case it could not download a
+  metadata or target file.
+
+  A dictionary of Exception instances indexed by every mirror URL will also be
+  provided."""
+
+  def __init__(self, mirror_errors):
+    # Dictionary of URL strings to Exception instances
+    self.mirror_errors = mirror_errors
+
+
+
+
 

From 3bbe4672d7ad9af5dce1c71fb56c3bed2a42b9ee Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Wed, 4 Sep 2013 17:29:06 -0400
Subject: [PATCH 15/19] Replace generic exception with a specific one.

---
 tuf/download.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tuf/download.py b/tuf/download.py
index ed80c370..0321a72d 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -336,7 +336,7 @@ def _open_connection(url):
       URL string (e.g., 'http://...' or 'ftp://...' or 'file://...') 
     
   <Exceptions>
-    tuf.DownloadError
+    None.
     
   <Side Effects>
     Opens a connection to a remote server.
@@ -590,8 +590,8 @@ def _check_downloaded_length(total_downloaded, required_length,
     None.
  
   <Exceptions>
-    tuf.DownloadError, if STRICT_REQUIRED_LENGTH is True and total_downloaded
-    is not equal required_length.
+    tuf.DownloadLengthMismatchError, if STRICT_REQUIRED_LENGTH is True and
+    total_downloaded is not equal required_length.
  
   <Returns>
     None.
@@ -612,7 +612,7 @@ def _check_downloaded_length(total_downloaded, required_length,
     if STRICT_REQUIRED_LENGTH:  
       # This must be due to a programming error, and must never happen!
       logger.error(message)          
-      raise tuf.DownloadError(message)
+      raise tuf.DownloadLengthMismatchError(message)
     else:
       # We specifically disabled strict checking of required length, but we
       # will log a warning anyway. This is useful when we wish to download the
@@ -660,12 +660,11 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
     'url'.
  
   <Exceptions>
-    tuf.DownloadError, if there was an error while downloading the file.
+    tuf.DownloadLengthMismatchError, if there was a mismatch of observed vs
+    expected lengths while downloading the file.
  
     tuf.FormatError, if any of the arguments are improperly formatted.
 
-    tuf.BadHashError, if the hashes don't match.
-
     Any other unforeseen runtime exception.
  
   <Returns>

From 5970ad8c253f79a14b86b0321e03f40b2a8c209f Mon Sep 17 00:00:00 2001
From: zanefisher <zanefisher@gmail.com>
Date: Wed, 4 Sep 2013 20:12:51 -0400
Subject: [PATCH 16/19] Refactor client.updater, removing verification from
 download.py.

---
 tuf/client/updater.py | 262 ++++++++++++++++++++++++++----------------
 tuf/download.py       |  75 ++----------
 2 files changed, 174 insertions(+), 163 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index 1ce51548..3b583e23 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -582,8 +582,7 @@ def refresh(self):
 
     # Use default but sane information for timestamp metadata, and do not
     # require strict checks on its required length.
-    self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO, 
-                          STRICT_REQUIRED_LENGTH=False)
+    self._update_metadata('timestamp', DEFAULT_TIMESTAMP_FILEINFO)
 
     self._update_metadata_if_changed('release', referenced_metadata='timestamp')
 
@@ -601,8 +600,158 @@ def refresh(self):
 
 
 
-  def _update_metadata(self, metadata_role, fileinfo, compression=None,
-                       STRICT_REQUIRED_LENGTH=True):
+def _check_hashes(input_file, trusted_hashes=None):
+  """
+  <Purpose>
+    A helper function that verifies multiple secure hashes of the downloaded
+    file.  If any of these fail it raises an exception.  This is to conform
+    with the TUF specs, which support clients with different hashing
+    algorithms. The 'hash.py' module is used to compute the hashes of the
+    'input_file'. 
+
+  <Arguments>
+    input_file:
+      A file-like object.
+    
+    trusted_hashes: 
+      A dictionary with hash-algorithm names as keys and hashes as dict values.
+      The hashes should be in the hexdigest format.
+    
+  <Exceptions>
+    tuf.BadHashError, if the hashes don't match.
+    
+  <Side Effects>
+    Hash digest object is created using the 'tuf.hash' module.
+    
+  <Returns>
+    None.
+
+  """
+
+  if trusted_hashes:
+    # Verify each trusted hash of 'trusted_hashes'.  Raise exception if
+    # any of the hashes are incorrect and return if all are correct.
+    for algorithm, trusted_hash in trusted_hashes.items():
+      digest_object = tuf.hash.digest(algorithm)
+      digest_object.update(input_file.read())
+      computed_hash = digest_object.hexdigest()
+      if trusted_hash != computed_hash:
+        raise tuf.BadHashError('Hashes do not match! Expected '+
+                               trusted_hash+' got '+computed_hash)
+      else:
+        logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
+  else:
+    logger.warn('No trusted hashes supplied to verify file at: '+
+                str(input_file))
+
+
+
+
+
+  def get_target_file(self, target_filepath, file_length, file_hashes):
+
+    def verify_target_file(self, target_file_object):
+      self._check_hashes(target_file_object, file_hashes)
+    
+    return self.__get_file(target_filepath, verify_target_file, 'target',
+              file_length, download_safely=True)
+
+
+
+
+
+  def __verify_metadata_file(self, metadata_file_object, metadata_role, file_hashes):
+    # FIXME: Decompression should be handled elsewhere.
+    #if compression:
+    #  metadata_file_object.decompress_temp_file_object(compression)
+
+    self._check_hashes(metadata_file_object, file_hashes)
+
+    # Read and load the downloaded file.
+    try:
+      metadata_signable = \
+        tuf.util.load_json_string(metadata_file_object.read())
+    except:
+      logger.exception('Invalid metadata from '+mirror_url+'.')
+      raise
+    else:
+      # Verify the signature on the downloaded metadata object.
+      try:
+        valid = tuf.sig.verify(metadata_signable, metadata_role)
+      except:
+        message = 'Unable to verify '+metadata_filename
+        logger.exception(message)
+        raise
+      else:
+        if valid:
+          logger.debug('Good signature on '+mirror_url+'.')
+        else:
+          raise tuf.BadSignatureError('Bad signature on '+mirror_url+'.')
+
+
+
+
+
+  def unsafely_get_metadata_file(self, metadata_role, metadata_filepath, file_length):
+
+    def unsafely_verify_metadata_file(metadata_file_object):
+      self.__verify_metadata_file(metadata_file_object, metadata_role, None)
+
+    return self.__get_file(metadata_filepath, unsafely_verify_metadata_file,
+                            'meta', file_length, download_safely=False)
+
+
+  def safely_get_metadata_file(self, metadata_role, metadata_filepath, file_length, file_hashes):
+
+    def safely_verify_metadata_file(metadata_file_object):
+      self.__verify_metadata_file(metadata_file_object, metadata_role, file_hashes)
+  
+    return self.__get_file(metadata_filepath, _verify_metadata_file,
+                            'meta', file_length, download_safely=True)
+
+
+
+
+
+  def __get_file(self, filepath, verify_file, reference_metadata, trusted_length, download_safely):
+    file_mirrors = tuf.mirrors.get_list_of_mirrors(reference_metadata, filepath,
+                                                   self.mirrors)
+    # file_mirror (URL): error (Exception)
+    file_mirror_errors = {}
+    target_file_object = None
+
+    for file_mirror in file_mirrors:
+      try:
+        if (download_safely):
+          target_file_object = tuf.download.safe_download(file_mirror, trusted_length)
+        else:
+          target_file_object = tuf.download.unsafe_download(file_mirror, trusted_length)
+
+      except Exception, e:
+        # Remember the error from this mirror, and "reset" the target file.
+        logger.exception('Download failed from '+file_mirror+'.')
+        file_mirror_errors[file_mirror] = e
+        target_file_object = None
+      else:
+        try:
+          verify_file(file_object)
+        except Exception, e:
+          file_mirror_errors[file_mirror] = e
+          target_file_object = None
+        else:
+          break
+
+    if target_file_object:
+      return target_file_object
+    else:
+      # TODO: wrap file_mirror_errors in an Exception
+      raise tuf.UpdateError(file_mirror_errors)
+
+
+
+
+
+  def _update_metadata(self, metadata_role, fileinfo, compression=None):
     """
     <Purpose>
       Download, verify, and 'install' the metadata belonging to 'metadata_role'.
@@ -659,15 +808,15 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
       metadata_filename = metadata_filename + '.gz'
 
     # Reference to the 'get_list_of_mirrors' function.
-    get_mirrors = tuf.mirrors.get_list_of_mirrors
+    # get_mirrors = tuf.mirrors.get_list_of_mirrors
 
     # Reference to the 'download_url_to_tempfileobj' function.
     download_file = tuf.download.download_url_to_tempfileobj
 
     # Extract file length and file hashes.  They will be passed as arguments
     # to 'download_file' function.
-    file_length=fileinfo['length']
-    file_hashes=fileinfo['hashes']
+    file_length = fileinfo['length']
+    file_hashes = fileinfo['hashes']
 
     # A dictionary to keep the error from every mirror that we try.
     mirror_errors = {}
@@ -681,63 +830,15 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None,
     # 'tuf.formats.SIGNABLE_SCHEMA'.
     metadata_file_object = None
     metadata_signable = None
-
-    for mirror_url in get_mirrors('meta',
-                                  metadata_filename.encode("utf-8"),
-                                  self.mirrors):
-      try:
+    if metadata_role == 'timestamp':
         metadata_file_object = \
-          download_file(mirror_url, file_length, file_hashes,
-                        STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH)
-      except:
-        logger.exception('Download failed from '+mirror_url+'.')
-        mirror_errors[mirror_url] = traceback.format_exc(1)
-      else:
-        # FIXME: mirror_errors for a mirror_url must not be overwritten!
+          self.unsafely_get_metadata_file(metadata_role, metadata_filename, file_length)
+    else:
+        metadata_file_object = \
+          self.safely_get_metadata_file(metadata_role, metadata_filename, file_length, file_hashes)
 
-        # FIXME: Another point of failure which we should handle.
-        if compression:
-          metadata_file_object.decompress_temp_file_object(compression)
-
-        # Read and load the downloaded file.
-        try:
-          metadata_signable = \
-            tuf.util.load_json_string(metadata_file_object.read())
-        except:
-          logger.exception('Invalid metadata from '+mirror_url+'.')
-          mirror_errors[mirror_url] = traceback.format_exc(1)
-          metadata_signable = None
-        else:
-          # Verify the signature on the downloaded metadata object.
-          try:
-            valid = tuf.sig.verify(metadata_signable, metadata_role)
-          except (tuf.UnknownRoleError, tuf.FormatError, tuf.Error), e:
-            # FIXME: Exception.message is deprecated in 2.6, and gone in 3.0,
-            # but this is a workaround for Unicode messages. We need a
-            # long-term solution with #61.
-            # http://bugs.python.org/issue2517
-            message = 'Unable to verify '+metadata_filename+':'+\
-                      e.message.encode("utf-8")
-            logger.exception(message)
-            mirror_errors[mirror_url] = message
-            metadata_signable = None
-          else:
-            if valid:
-              logger.debug('Good signature on '+mirror_url+'.')
-              break
-            else:
-              message = 'Bad signature on '+mirror_url+'.'
-              logger.warn(message)
-              mirror_errors[mirror_url] = message
-              metadata_signable = None
-
-    # Raise an exception if a valid metadata signable could not be downloaded
-    # from any of the mirrors.
-    if metadata_signable is None:
-      message = 'Unable to update '+str(metadata_filename)+\
-                ' from all known mirrors: '+str(mirror_errors)
-      logger.error(message)
-      raise tuf.RepositoryError(message)
+    # Read and load the downloaded file.
+    metadata_signable = tuf.util.load_json_string(metadata_file_object.read())
 
     # Ensure the loaded 'metadata_signable' is properly formatted.
     try:
@@ -1885,45 +1986,14 @@ def download_target(self, target, destination_directory):
     tuf.formats.TARGETFILE_SCHEMA.check_match(target)
     tuf.formats.PATH_SCHEMA.check_match(destination_directory)
 
-    # Reference to the 'get_list_of_mirrors' function.
-    get_mirrors = tuf.mirrors.get_list_of_mirrors
-
-    # Reference to the 'download_url_to_tempfileobj' function.
-    download_file = tuf.download.download_url_to_tempfileobj
-
     # Extract the target file information.
     target_filepath = target['filepath']
     trusted_length = target['fileinfo']['length']
     trusted_hashes = target['fileinfo']['hashes']
 
-    # A dictionary to keep the error from every mirror that we try.
-    mirror_errors = {}
-
-    # Wherein the downloaded target file will be stored.
-    target_file_object = None
-
-    logger.info('Trying to download: '+str(target_filepath))
-
-    # Iterate through the repositority mirrors until we successfully
-    # download a target.
-    for mirror_url in get_mirrors('target', target_filepath, self.mirrors):
-      try: 
-        target_file_object = download_file(mirror_url, trusted_length,
-                                           trusted_hashes)
-        break
-      except:
-        # Remember the error from this mirror, and "reset" the target file.
-        logger.exception('Download failed from '+mirror_url+'.')
-        mirror_errors[mirror_url] = traceback.format_exc(1)
-        target_file_object = None
-        continue
-
-    # We have gone through all the mirrors.  Did we get a target file object?
-    if target_file_object == None: 
-      raise tuf.DownloadError('Failed to download target '+\
-                              str(target_filepath)+\
-                              ' from all known mirrors: '+\
-                              str(mirror_errors))
+    # get_target_file checks every mirror and returns the first target
+    # that passes verification.
+    target_file_object = get_target_file(target_filepath, trusted_length, trusted_hashes)
    
     # We acquired a target file object from a mirror.  Move the file into
     # place (i.e., locally to 'destination_directory').
diff --git a/tuf/download.py b/tuf/download.py
index 0321a72d..5fe0bc3e 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -363,54 +363,6 @@ def _open_connection(url):
 
 
 
-def _check_hashes(input_file, trusted_hashes=None):
-  """
-  <Purpose>
-    A helper function that verifies multiple secure hashes of the downloaded
-    file.  If any of these fail it raises an exception.  This is to conform
-    with the TUF specs, which support clients with different hashing
-    algorithms. The 'hash.py' module is used to compute the hashes of the
-    'input_file'. 
-
-  <Arguments>
-    input_file:
-      A file-like object.
-    
-    trusted_hashes: 
-      A dictionary with hash-algorithm names as keys and hashes as dict values.
-      The hashes should be in the hexdigest format.
-    
-  <Exceptions>
-    tuf.BadHashError, if the hashes don't match.
-    
-  <Side Effects>
-    Hash digest object is created using the 'tuf.hash' module.
-    
-  <Returns>
-    None.
-
-  """
-
-  if trusted_hashes:
-    # Verify each trusted hash of 'trusted_hashes'.  Raise exception if
-    # any of the hashes are incorrect and return if all are correct.
-    for algorithm, trusted_hash in trusted_hashes.items():
-      digest_object = tuf.hash.digest(algorithm)
-      digest_object.update(input_file.read())
-      computed_hash = digest_object.hexdigest()
-      if trusted_hash != computed_hash:
-        raise tuf.BadHashError('Hashes do not match! Expected '+
-                               trusted_hash+' got '+computed_hash)
-      else:
-        logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
-  else:
-    logger.warn('No trusted hashes supplied to verify file at: '+
-                str(input_file))
-
-      
-
-
-
 def _download_fixed_amount_of_data(connection, temp_file, required_length):
   """
   <Purpose>
@@ -624,8 +576,7 @@ def _check_downloaded_length(total_downloaded, required_length,
 
 
 
-def download_url_to_tempfileobj(url, required_length, required_hashes=None,
-                                STRICT_REQUIRED_LENGTH=True):
+def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
   """
   <Purpose>
     Given the url, hashes and length of the desired file, this function 
@@ -641,13 +592,6 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
   
     required_length:
       An integer value representing the length of the file.
-  
-    required_hashes:
-      A dictionary, where the keys represent the hashing algorithm used to 
-      hash the file and the dict values the hexdigest.
-  
-      For instance, a hash pair might look something like this:
-      {'md5': '37544f383be1fc1a32f42801c9c4b4d6'}
 
     STRICT_REQUIRED_LENGTH:
       A Boolean indicator used to signal whether we should perform strict
@@ -678,13 +622,6 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
   tuf.formats.URL_SCHEMA.check_match(url)
   tuf.formats.LENGTH_SCHEMA.check_match(required_length)
 
-  # FIXME: This function should only download files up to an expected length,
-  # and not check hashes; that is the job of the updater.
-  if required_hashes:
-    tuf.formats.HASHDICT_SCHEMA.check_match(required_hashes)
-  else:
-    logger.warn('Missing hashes for: '+str(url))
-
   # 'url.replace()' is for compatibility with Windows-based systems because
   # they might put back-slashes in place of forward-slashes.  This converts it
   # to the common format. 
@@ -725,9 +662,6 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
     _check_downloaded_length(total_downloaded, required_length,
                              STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH)
 
-    # Finally, check the hashes expected of the file.
-    _check_hashes(temp_file, trusted_hashes=required_hashes)
-
   except:
     # Close 'temp_file'; any written data is lost.
     temp_file.close_temp_file()
@@ -744,6 +678,13 @@ def download_url_to_tempfileobj(url, required_length, required_hashes=None,
     socket.setdefaulttimeout(previous_socket_timeout)
     
 
+def safe_download(url, required_length):
+  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True)
+
+
+def unsafe_download(url, required_length):
+  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False)
+
 
 
 

From 74e5764a533e737c729355a2ed4a932d13a7c123 Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Wed, 4 Sep 2013 23:45:08 -0400
Subject: [PATCH 17/19] Merge with updater/download refactoring from
 @zanefisher.

Update download unit test to work after refactoring, but it is a little
incomplete (in particular, the unsafe_download function needs more testing).
---
 tuf/client/updater.py      | 131 +++++++++++++++++++------------------
 tuf/download.py            |  25 ++++---
 tuf/tests/test_download.py |  67 +++++--------------
 tuf/util.py                |  11 +++-
 4 files changed, 111 insertions(+), 123 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index 3b583e23..7091ccea 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -105,7 +105,6 @@
 import os
 import shutil
 import time
-import traceback
 
 import tuf
 import tuf.conf
@@ -600,35 +599,34 @@ def refresh(self):
 
 
 
-def _check_hashes(input_file, trusted_hashes=None):
-  """
-  <Purpose>
-    A helper function that verifies multiple secure hashes of the downloaded
-    file.  If any of these fail it raises an exception.  This is to conform
-    with the TUF specs, which support clients with different hashing
-    algorithms. The 'hash.py' module is used to compute the hashes of the
-    'input_file'. 
+  def __check_hashes(self, input_file, trusted_hashes):
+    """
+    <Purpose>
+      A helper function that verifies multiple secure hashes of the downloaded
+      file.  If any of these fail it raises an exception.  This is to conform
+      with the TUF specs, which support clients with different hashing
+      algorithms. The 'hash.py' module is used to compute the hashes of the
+      'input_file'.
 
-  <Arguments>
-    input_file:
-      A file-like object.
-    
-    trusted_hashes: 
-      A dictionary with hash-algorithm names as keys and hashes as dict values.
-      The hashes should be in the hexdigest format.
-    
-  <Exceptions>
-    tuf.BadHashError, if the hashes don't match.
-    
-  <Side Effects>
-    Hash digest object is created using the 'tuf.hash' module.
-    
-  <Returns>
-    None.
+    <Arguments>
+      input_file:
+        A file-like object.
 
-  """
+      trusted_hashes:
+        A dictionary with hash-algorithm names as keys and hashes as dict values.
+        The hashes should be in the hexdigest format.
+
+    <Exceptions>
+      tuf.BadHashError, if the hashes don't match.
+
+    <Side Effects>
+      Hash digest object is created using the 'tuf.hash' module.
+
+    <Returns>
+      None.
+
+    """
 
-  if trusted_hashes:
     # Verify each trusted hash of 'trusted_hashes'.  Raise exception if
     # any of the hashes are incorrect and return if all are correct.
     for algorithm, trusted_hash in trusted_hashes.items():
@@ -640,9 +638,6 @@ def _check_hashes(input_file, trusted_hashes=None):
                                trusted_hash+' got '+computed_hash)
       else:
         logger.info('The file\'s '+algorithm+' hash is correct: '+trusted_hash)
-  else:
-    logger.warn('No trusted hashes supplied to verify file at: '+
-                str(input_file))
 
 
 
@@ -651,21 +646,19 @@ def _check_hashes(input_file, trusted_hashes=None):
   def get_target_file(self, target_filepath, file_length, file_hashes):
 
     def verify_target_file(self, target_file_object):
-      self._check_hashes(target_file_object, file_hashes)
+      self.__check_hashes(target_file_object, file_hashes)
     
     return self.__get_file(target_filepath, verify_target_file, 'target',
-              file_length, download_safely=True)
+              file_length, download_safely=True, compression=None)
 
 
 
 
 
-  def __verify_metadata_file(self, metadata_file_object, metadata_role, file_hashes):
-    # FIXME: Decompression should be handled elsewhere.
-    #if compression:
-    #  metadata_file_object.decompress_temp_file_object(compression)
+  def __verify_metadata_file(self, metadata_file_object, metadata_role,
+                             file_hashes):
 
-    self._check_hashes(metadata_file_object, file_hashes)
+    self.__check_hashes(metadata_file_object, file_hashes)
 
     # Read and load the downloaded file.
     try:
@@ -692,59 +685,72 @@ def __verify_metadata_file(self, metadata_file_object, metadata_role, file_hashe
 
 
 
-  def unsafely_get_metadata_file(self, metadata_role, metadata_filepath, file_length):
+  def unsafely_get_metadata_file(self, metadata_role, metadata_filepath,
+                                 file_length):
 
     def unsafely_verify_metadata_file(metadata_file_object):
-      self.__verify_metadata_file(metadata_file_object, metadata_role, None)
+      self.__verify_metadata_file(metadata_file_object, metadata_role,
+                                  file_hashes=None)
 
     return self.__get_file(metadata_filepath, unsafely_verify_metadata_file,
-                            'meta', file_length, download_safely=False)
+                           'meta', file_length, download_safely=False,
+                           compression=None)
 
 
-  def safely_get_metadata_file(self, metadata_role, metadata_filepath, file_length, file_hashes):
+
+
+
+  def safely_get_metadata_file(self, metadata_role, metadata_filepath,
+                                 file_length, file_hashes, compression):
 
     def safely_verify_metadata_file(metadata_file_object):
-      self.__verify_metadata_file(metadata_file_object, metadata_role, file_hashes)
-  
+      self.__verify_metadata_file(metadata_file_object, metadata_role,
+                                  file_hashes=file_hashes)
+
     return self.__get_file(metadata_filepath, _verify_metadata_file,
-                            'meta', file_length, download_safely=True)
+                           'meta', file_length, download_safely=True,
+                           compression=compression)
 
 
 
 
 
-  def __get_file(self, filepath, verify_file, reference_metadata, trusted_length, download_safely):
-    file_mirrors = tuf.mirrors.get_list_of_mirrors(reference_metadata, filepath,
-                                                   self.mirrors)
+  def __get_file(self, filepath, verify_file, reference_metadata,
+                 trusted_length, download_safely, compression):
+    file_mirrors = tuf.mirrors.get_list_of_mirrors(reference_metadata,
+                                                   filepath, self.mirrors)
     # file_mirror (URL): error (Exception)
     file_mirror_errors = {}
-    target_file_object = None
+    file_object = None
 
     for file_mirror in file_mirrors:
       try:
-        if (download_safely):
-          target_file_object = tuf.download.safe_download(file_mirror, trusted_length)
+        if download_safely:
+          file_object = tuf.download.safe_download(file_mirror, trusted_length)
         else:
-          target_file_object = tuf.download.unsafe_download(file_mirror, trusted_length)
+          file_object = tuf.download.unsafe_download(file_mirror,
+                                                     trusted_length)
+
+        if compression:
+          file_object.decompress_temp_file_object(compression)
 
       except Exception, e:
         # Remember the error from this mirror, and "reset" the target file.
         logger.exception('Download failed from '+file_mirror+'.')
         file_mirror_errors[file_mirror] = e
-        target_file_object = None
+        file_object = None
       else:
         try:
           verify_file(file_object)
         except Exception, e:
           file_mirror_errors[file_mirror] = e
-          target_file_object = None
+          file_object = None
         else:
           break
 
-    if target_file_object:
-      return target_file_object
+    if file_object:
+      return file_object
     else:
-      # TODO: wrap file_mirror_errors in an Exception
       raise tuf.UpdateError(file_mirror_errors)
 
 
@@ -807,12 +813,6 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None):
     if compression == 'gzip':
       metadata_filename = metadata_filename + '.gz'
 
-    # Reference to the 'get_list_of_mirrors' function.
-    # get_mirrors = tuf.mirrors.get_list_of_mirrors
-
-    # Reference to the 'download_url_to_tempfileobj' function.
-    download_file = tuf.download.download_url_to_tempfileobj
-
     # Extract file length and file hashes.  They will be passed as arguments
     # to 'download_file' function.
     file_length = fileinfo['length']
@@ -832,10 +832,13 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None):
     metadata_signable = None
     if metadata_role == 'timestamp':
         metadata_file_object = \
-          self.unsafely_get_metadata_file(metadata_role, metadata_filename, file_length)
+          self.unsafely_get_metadata_file(metadata_role, metadata_filename,
+                                          file_length)
     else:
         metadata_file_object = \
-          self.safely_get_metadata_file(metadata_role, metadata_filename, file_length, file_hashes)
+          self.safely_get_metadata_file(metadata_role, metadata_filename,
+                                        file_length, file_hashes,
+                                        compression=compression)
 
     # Read and load the downloaded file.
     metadata_signable = tuf.util.load_json_string(metadata_file_object.read())
diff --git a/tuf/download.py b/tuf/download.py
index 5fe0bc3e..2771b460 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -18,7 +18,7 @@
   supplied by the metadata of that file.  The downloaded file is technically a 
   file-like object that will automatically destroys itself once closed.  Note
   that the file-like object, 'tuf.util.TempFile', is returned by the
-  'download_url_to_tempfileobj()' function.
+  '_download_file()' function.
   
 """
 
@@ -576,6 +576,20 @@ def _check_downloaded_length(total_downloaded, required_length,
 
 
 
+def safe_download(url, required_length):
+  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True)
+
+
+
+
+
+def unsafe_download(url, required_length):
+  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False)
+
+
+
+
+
 def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
   """
   <Purpose>
@@ -676,14 +690,7 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True):
     # Restore previously saved values or functions.
     httplib.HTTPConnection.response_class = previous_http_response_class
     socket.setdefaulttimeout(previous_socket_timeout)
-    
-
-def safe_download(url, required_length):
-  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True)
-
-
-def unsafe_download(url, required_length):
-  return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False)
+
 
 
 
diff --git a/tuf/tests/test_download.py b/tuf/tests/test_download.py
index c5a3e903..77d0d5fd 100755
--- a/tuf/tests/test_download.py
+++ b/tuf/tests/test_download.py
@@ -30,6 +30,7 @@
 import subprocess
 import time
 import unittest
+import urllib2
 
 
 import tuf
@@ -69,7 +70,7 @@ def setUp(self):
 
     # NOTE: Following error is raised if delay is not applied:
     #    <urlopen error [Errno 111] Connection refused>
-    time.sleep(.1)
+    time.sleep(1)
 
     # Computing hash of target file data.
     m = hashlib.md5()
@@ -90,47 +91,24 @@ def tearDown(self):
   # Test: Normal case.
   def test_download_url_to_tempfileobj(self):
 
-    download_file = download.download_url_to_tempfileobj
+    download_file = download.safe_download
 
-    temp_fileobj = download_file(self.url, self.target_data_length,
-                                 required_hashes=self.target_hash)
-    self.assertEquals(self.target_data, temp_fileobj.read())
-    self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
-    temp_fileobj.close_temp_file()
-
-
-  # Test: Incorrect hashes.
-  def test_download_url_to_tempfileobj_and_hashes(self):
-
-    download_file = download.download_url_to_tempfileobj
-
-    # Test: Normal cases without supplying hash arguments.
     temp_fileobj = download_file(self.url, self.target_data_length)
     self.assertEquals(self.target_data, temp_fileobj.read())
     self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
     temp_fileobj.close_temp_file()
 
-    # What happens when we pass bad hashes to check the downloaded file?
-    self.assertRaises(tuf.BadHashError,
-                      download_file, self.url, self.target_data_length,
-                      required_hashes={'md5':self.random_string()})
-
 
   # Test: Incorrect lengths.
   def test_download_url_to_tempfileobj_and_lengths(self):
 
-    download_file = download.download_url_to_tempfileobj
-
     # NOTE: We catch tuf.BadHashError here because the file, shorter by a byte,
     # would not match the expected hashes. We log a warning when we find that
     # the server-reported length of the file does not match our
     # required_length. We also see that STRICT_REQUIRED_LENGTH does not change
     # the outcome of the previous test.
-    for strict_required_length in (True, False):
-      self.assertRaises(tuf.BadHashError, 
-                        download_file, self.url, self.target_data_length - 1,
-                        required_hashes=self.target_hash,
-                        STRICT_REQUIRED_LENGTH=strict_required_length)
+    download.safe_download(self.url, self.target_data_length - 1)
+    download.unsafe_download(self.url, self.target_data_length - 1)
 
     # NOTE: We catch tuf.DownloadError here because the STRICT_REQUIRED_LENGTH,
     # which is True by default, mandates that we must download exactly what is
@@ -140,14 +118,12 @@ def test_download_url_to_tempfileobj_and_lengths(self):
                         str(self.target_data_length+1)+\
                         ' bytes. There is a difference of 1 bytes!'
     self.assertRaisesRegexp(tuf.DownloadError, exception_message,
-                      download_file, self.url, self.target_data_length + 1,
-                      required_hashes=self.target_hash)
+                            download.safe_download, self.url,
+                            self.target_data_length + 1)
 
     # NOTE: However, we do not catch a tuf.DownloadError here for the same test
     # as the previous one because we have disabled STRICT_REQUIRED_LENGTH.
-    temp_fileobj = download_file(self.url, self.target_data_length + 1,
-                                 required_hashes=self.target_hash,
-                                 STRICT_REQUIRED_LENGTH=False)
+    temp_fileobj = download.unsafe_download(self.url, self.target_data_length + 1)
     self.assertEquals(self.target_data, temp_fileobj.read())
     self.assertEquals(self.target_data_length, len(temp_fileobj.read()))
     temp_fileobj.close_temp_file()
@@ -155,17 +131,14 @@ def test_download_url_to_tempfileobj_and_lengths(self):
 
   def test_download_url_to_tempfileobj_and_performance(self):
 
-    download_file = download.download_url_to_tempfileobj
-
     """
     # Measuring performance of 'auto_flush = False' vs. 'auto_flush = True'
-    # in download_url_to_tempfileobj() during write. No change was observed.
+    # in download._download_file() during write. No change was observed.
     star_cpu = time.clock()
     star_real = time.time()
 
     temp_fileobj = download_file(self.url, 
-                                 self.target_data_length,
-                                 required_hashes=self.target_hash)
+                                 self.target_data_length)
 
     end_cpu = time.clock()
     end_real = time.time()  
@@ -184,28 +157,24 @@ def test_download_url_to_tempfileobj_and_performance(self):
   # Test: Incorrect/Unreachable URLs.
   def test_download_url_to_tempfileobj_and_urls(self):
 
-    download_file = download.download_url_to_tempfileobj
+    download_file = download.safe_download
 
     self.assertRaises(tuf.FormatError,
-                      download_file, None, self.target_data_length,
-                      required_hashes=self.target_hash)
+                      download_file, None, self.target_data_length)
 
-    self.assertRaises(tuf.DownloadError,
+    self.assertRaises(ValueError,
                       download_file,
-                      self.random_string(), self.target_data_length,
-                      required_hashes=self.target_hash)
+                      self.random_string(), self.target_data_length)
 
-    self.assertRaises(tuf.DownloadError,
+    self.assertRaises(urllib2.HTTPError,
                       download_file,
                       'http://localhost:'+str(self.PORT)+'/'+self.random_string(), 
-                      self.target_data_length,
-                      required_hashes=self.target_hash)
+                      self.target_data_length)
 
-    self.assertRaises(tuf.DownloadError,
+    self.assertRaises(urllib2.URLError,
                       download_file,
                       'http://localhost:'+str(self.PORT+1)+'/'+self.random_string(), 
-                      self.target_data_length,
-                      required_hashes=self.target_hash)
+                      self.target_data_length)
 
 
 # Run unit test.
diff --git a/tuf/util.py b/tuf/util.py
index 3c72023a..aea3b4f6 100755
--- a/tuf/util.py
+++ b/tuf/util.py
@@ -249,6 +249,8 @@ def decompress_temp_file_object(self, compression):
 
       tuf.Error: If an invalid compression is given.
 
+      tuf.DecompressionError: If the compression failed for any reason.
+
     <Side Effects>
       'self._orig_file' is used to store the original data of 'temporary_file'.
 
@@ -266,10 +268,17 @@ def decompress_temp_file_object(self, compression):
 
     if compression != 'gzip':
       raise tuf.Error('Only gzip compression is supported.')
+
     self.seek(0)
     self._compression = compression
     self._orig_file = self.temporary_file
-    self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb')
+
+    try:
+      self.temporary_file = gzip.GzipFile(fileobj=self.temporary_file, mode='rb')
+    except:
+      raise tuf.DecompressionError(self.temporary_file)
+
+
 
 
 

From 36a11f7f0ad1e8e25dc9df1913db38e699bf3b9a Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Thu, 5 Sep 2013 00:23:23 -0400
Subject: [PATCH 18/19] Adapt the updater unit test against latest changes.

---
 tuf/client/updater.py     |  3 ++-
 tuf/download.py           |  4 ++--
 tuf/tests/test_updater.py | 46 +++++++++++++++++++--------------------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index 7091ccea..ac648720 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -1996,7 +1996,8 @@ def download_target(self, target, destination_directory):
 
     # get_target_file checks every mirror and returns the first target
     # that passes verification.
-    target_file_object = get_target_file(target_filepath, trusted_length, trusted_hashes)
+    target_file_object = self.get_target_file(target_filepath, trusted_length,
+                                              trusted_hashes)
    
     # We acquired a target file object from a mirror.  Move the file into
     # place (i.e., locally to 'destination_directory').
diff --git a/tuf/download.py b/tuf/download.py
index 2771b460..d3cbb1ee 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -111,8 +111,8 @@ def read(self, size):
 
     """
 
-    # We should never try to specify a nonpositive size.
-    assert size > 0
+    # We should never try to specify a negative size.
+    assert size >= 0
 
     # Use max, disallow tiny reads in a loop as they are very inefficient.
     # We never leave read() with any leftover data from a new recv() call
diff --git a/tuf/tests/test_updater.py b/tuf/tests/test_updater.py
index 917d2aee..acd536c7 100755
--- a/tuf/tests/test_updater.py
+++ b/tuf/tests/test_updater.py
@@ -213,7 +213,7 @@ def _mock_download_url_to_tempfileobj(self, output):
 
     """
 
-    def _mock_download(url, length, hashes=None, STRICT_REQUIRED_LENGTH=True):
+    def _mock_download(url, length):
       if isinstance(output, (str, unicode)):
         file_path = output
       elif isinstance(output, list):
@@ -223,8 +223,8 @@ def _mock_download(url, length, hashes=None, STRICT_REQUIRED_LENGTH=True):
       temp_fileobj.write(file_obj.read())
       return temp_fileobj
 
-    # Patch tuf.download.download_url_to_tempfileobj().
-    tuf.download.download_url_to_tempfileobj = _mock_download
+    # Patch tuf.download.safe_download().
+    tuf.download.safe_download = _mock_download
 
 
 
@@ -490,7 +490,7 @@ def test_3__update_metadata(self):
     """
 
     # Setup
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     #  Since client's '.../metadata/current' will need to have separate
     #  gzipped metadata file in order to test compressed file handling,
@@ -554,7 +554,7 @@ def test_3__update_metadata(self):
     self._remove_target_from_targets_dir(added_target_1)
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 
@@ -616,7 +616,7 @@ def test_3__update_metadata_if_changed(self):
     """
 
     # Setup
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     #  To test updater._update_metadata_if_changed, 'targets' metadata file is
     #  going to be modified at the server's repository.
@@ -707,7 +707,7 @@ def test_3__update_metadata_if_changed(self):
     self._remove_target_from_targets_dir(added_target_1)
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 
@@ -766,7 +766,7 @@ def test_2__ensure_not_expired(self):
 
   def test_4_refresh(self):
     # Setup.
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     #  This unit test is based on adding an extra target file to the
     #  server and rebuilding all server-side metadata.  When 'refresh'
@@ -799,7 +799,7 @@ def test_4_refresh(self):
     setup.build_server_repository(self.server_repo_dir, self.targets_dir)
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 
@@ -807,7 +807,7 @@ def test_4_refresh(self):
   def test_4__refresh_targets_metadata(self):
 
     # Setup
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     # To test this method a target file would be added to a delegated role,
     # and metadata on the server side would be rebuilt.
@@ -864,7 +864,7 @@ def test_4__refresh_targets_metadata(self):
     setup.build_server_repository(self.server_repo_dir, self.targets_dir)
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 
@@ -894,10 +894,10 @@ def test_3__targets_of_role(self):
   def test_5_all_targets(self):
 
    # Setup
-   original_download = tuf.download.download_url_to_tempfileobj
+   original_download = tuf.download.safe_download
    
    # As with '_refresh_targets_metadata()', tuf.roledb._roledb_dict
-   # has to be populated.  The 'tuf.download.download_url_to_tempfileobj' method
+   # has to be populated.  The 'tuf.download.safe_download' method
    # should be patched.  The 'self.all_role_paths' argument is passed so that
    # the top-level roles and delegations may be all "downloaded" when
    # Repository.refresh() is called below.  '_mock_download_url_to_tempfileobj'
@@ -925,7 +925,7 @@ def test_5_all_targets(self):
    self.assertTrue(len(all_targets) is 6)   
 
    # RESTORE
-   tuf.download.download_url_to_tempfileobj = original_download
+   tuf.download.safe_download = original_download
 
 
 
@@ -954,7 +954,7 @@ def test_5_targets_of_role(self):
 
   def test_6_target(self):
     # Requirements: make sure roledb_dict is populated and
-    # tuf.download.download_url_to_tempfileobj function is patched.
+    # tuf.download.safe_download function is patched.
 
     # Setup
     targets_dir_content = os.listdir(self.targets_dir)
@@ -985,9 +985,9 @@ def test_6_target(self):
   def test_6_download_target(self):
 
     # Setup:
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
-    # 'tuf.download.download_url_to_tempfileobj' method should be patched.
+    # 'tuf.download.safe_download' method should be patched.
     target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir)
 
     #  Create temporary directory that will be passed as an argument to the
@@ -1032,7 +1032,7 @@ def test_6_download_target(self):
       mirrors[mirror_name]['confined_target_dirs'] = ['']
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 
@@ -1040,11 +1040,11 @@ def test_6_download_target(self):
   def test_7_updated_targets(self):
     
     # Setup:
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     # In this test, client will have two target files.  Server will modify 
     # one of them.  As with 'all_targets' function, tuf.roledb._roledb_dict
-    # has to be populated.  'tuf.download.download_url_to_tempfileobj' method
+    # has to be populated.  'tuf.download.safe_download' method
     # should be patched.
     target_rel_paths_src = self._get_list_of_target_paths(self.targets_dir)
 
@@ -1103,7 +1103,7 @@ def test_7_updated_targets(self):
         self.fail(msg)
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
     
 
@@ -1111,7 +1111,7 @@ def test_7_updated_targets(self):
   def test_8_remove_obsolete_targets(self):
     
     # Setup:
-    original_download = tuf.download.download_url_to_tempfileobj
+    original_download = tuf.download.safe_download
     
     # This unit test should be last, because it removes target files from the
     # server's targets directory. It is done to avoid adding files, rebuilding 
@@ -1162,7 +1162,7 @@ def test_8_remove_obsolete_targets(self):
     self.assertTrue(os.listdir(dest_dir), 2)    
 
     # RESTORE
-    tuf.download.download_url_to_tempfileobj = original_download
+    tuf.download.safe_download = original_download
 
 
 def tearDownModule():

From c4557c2a0c2394f1a56bcb19a89ff3c06ab3fe0d Mon Sep 17 00:00:00 2001
From: dachshund <trishankkarthik@gmail.com>
Date: Thu, 5 Sep 2013 15:44:29 -0400
Subject: [PATCH 19/19] WIP on refactoring the updater and downloader.

---
 tuf/client/updater.py | 51 ++++++++++++++++---------------------------
 tuf/download.py       |  2 +-
 2 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/tuf/client/updater.py b/tuf/client/updater.py
index ac648720..fc3b1a0e 100755
--- a/tuf/client/updater.py
+++ b/tuf/client/updater.py
@@ -353,10 +353,6 @@ def _load_metadata_from_file(self, metadata_set, metadata_role):
         not end in '.txt'.  Examples: 'root', 'targets', 'targets/linux/x86'.
 
     <Exceptions>
-      tuf.RepositoryError:
-        If the metadata could not be loaded or the extracted data is not a 
-        valid metadata object.
-
       tuf.FormatError:
         If role information belonging to a delegated role of 'metadata_role'
         is improperly formatted.
@@ -391,11 +387,7 @@ def _load_metadata_from_file(self, metadata_set, metadata_role):
       # 'tuf.formats.SIGNABLE_SCHEMA'.
       metadata_signable = tuf.util.load_json_file(metadata_filepath)
 
-      # Ensure the loaded json object is properly formatted.
-      try: 
-        tuf.formats.check_signable_object_format(metadata_signable)
-      except tuf.FormatError, e:
-        raise tuf.RepositoryError('Invalid format: '+repr(metadata_filepath)+'.')
+      tuf.formats.check_signable_object_format(metadata_signable)
 
       # Extract the 'signed' role object from 'metadata_signable'.
       metadata_object = metadata_signable['signed']
@@ -551,7 +543,7 @@ def refresh(self):
       None.
 
     <Exceptions>
-      tuf.RepositoryError:
+      tuf.UpdateError:
         If the metadata for any of the top-level roles cannot be updated.
 
       tuf.ExpiredMetadataError:
@@ -577,7 +569,7 @@ def refresh(self):
     # Update the top-level metadata.  The _update_metadata_if_changed() and
     # _update_metadata() calls below do NOT perform an update if there
     # is insufficient trusted signatures for the specified metadata.
-    # Raise 'tuf.RepositoryError' if an update fails.
+    # Raise 'tuf.UpdateError' if an update fails.
 
     # Use default but sane information for timestamp metadata, and do not
     # require strict checks on its required length.
@@ -645,7 +637,7 @@ def __check_hashes(self, input_file, trusted_hashes):
 
   def get_target_file(self, target_filepath, file_length, file_hashes):
 
-    def verify_target_file(self, target_file_object):
+    def verify_target_file(target_file_object):
       self.__check_hashes(target_file_object, file_hashes)
     
     return self.__get_file(target_filepath, verify_target_file, 'target',
@@ -655,11 +647,7 @@ def verify_target_file(self, target_file_object):
 
 
 
-  def __verify_metadata_file(self, metadata_file_object, metadata_role,
-                             file_hashes):
-
-    self.__check_hashes(metadata_file_object, file_hashes)
-
+  def __verify_metadata_file(self, metadata_file_object, metadata_role):
     # Read and load the downloaded file.
     try:
       metadata_signable = \
@@ -676,10 +664,8 @@ def __verify_metadata_file(self, metadata_file_object, metadata_role,
         logger.exception(message)
         raise
       else:
-        if valid:
-          logger.debug('Good signature on '+mirror_url+'.')
-        else:
-          raise tuf.BadSignatureError('Bad signature on '+mirror_url+'.')
+        if not valid:
+          raise tuf.BadSignatureError()
 
 
 
@@ -689,8 +675,7 @@ def unsafely_get_metadata_file(self, metadata_role, metadata_filepath,
                                  file_length):
 
     def unsafely_verify_metadata_file(metadata_file_object):
-      self.__verify_metadata_file(metadata_file_object, metadata_role,
-                                  file_hashes=None)
+      self.__verify_metadata_file(metadata_file_object, metadata_role)
 
     return self.__get_file(metadata_filepath, unsafely_verify_metadata_file,
                            'meta', file_length, download_safely=False,
@@ -704,10 +689,10 @@ def safely_get_metadata_file(self, metadata_role, metadata_filepath,
                                  file_length, file_hashes, compression):
 
     def safely_verify_metadata_file(metadata_file_object):
-      self.__verify_metadata_file(metadata_file_object, metadata_role,
-                                  file_hashes=file_hashes)
+      self.__check_hashes(metadata_file_object, file_hashes)
+      self.__verify_metadata_file(metadata_file_object, metadata_role)
 
-    return self.__get_file(metadata_filepath, _verify_metadata_file,
+    return self.__get_file(metadata_filepath, safely_verify_metadata_file,
                            'meta', file_length, download_safely=True,
                            compression=compression)
 
@@ -751,6 +736,8 @@ def __get_file(self, filepath, verify_file, reference_metadata,
     if file_object:
       return file_object
     else:
+      logger.exception('Failed to download {0}: {1}'.format(filepath,
+                       file_mirror_errors))
       raise tuf.UpdateError(file_mirror_errors)
 
 
@@ -790,7 +777,7 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None):
         are considered.  Any other string is ignored.
 
     <Exceptions>
-      tuf.RepositoryError:
+      tuf.UpdateError:
         The metadata could not be updated. This is not specific to a single
         failure but rather indicates that all possible ways to update the
         metadata have been tried and failed.
@@ -860,7 +847,7 @@ def _update_metadata(self, metadata_role, fileinfo, compression=None):
       current_version = current_metadata_role['version'] 
       downloaded_version = metadata_signable['signed']['version']
       if downloaded_version < current_version:
-        message = repr(mirror_url)+' is older than the version currently '+\
+        message = str(current_metadata_role)+' is older than the version currently '+\
           'installed.\nDownloaded version: '+repr(downloaded_version)+'\n'+\
           'Current version: '+repr(current_version)
         raise tuf.RepositoryError(message)
@@ -1013,7 +1000,7 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas
     try:
       self._update_metadata(metadata_role, fileinfo=new_fileinfo,
                             compression=compression)
-    except tuf.RepositoryError, e:
+    except:
       # The current metadata we have is not current but we couldn't
       # get new metadata. We shouldn't use the old metadata anymore.
       # This will get rid of in-memory knowledge of the role and
@@ -1023,8 +1010,8 @@ def _update_metadata_if_changed(self, metadata_role, referenced_metadata='releas
       # We shouldn't need to, but we need to check the trust
       # implications of the current implementation.
       self._delete_metadata(metadata_role)
-      message = 'Metadata for '+repr(metadata_role)+' could not be updated: '
-      raise tuf.MetadataNotAvailableError(message+str(e))
+      logger.error('Metadata for '+str(metadata_role)+' could not be updated')
+      raise
     else:
       # We need to remove delegated roles because the delegated roles
       # may not be trusted anymore.
@@ -1970,7 +1957,7 @@ def download_target(self, target, destination_directory):
       tuf.FormatError:
         If 'target' is not properly formatted.
 
-      tuf.DownloadError:
+      tuf.UpdateError:
         If a target could not be downloaded from any of the mirrors.
 
     <Side Effects>
diff --git a/tuf/download.py b/tuf/download.py
index d3cbb1ee..0aec7e2f 100755
--- a/tuf/download.py
+++ b/tuf/download.py
@@ -177,7 +177,7 @@ def read(self, size):
       # We assume that 'size' is accurate w.r.t. to the overall file length;
       # otherwise, we will miscount the number of truly slow chunks.
       self.__number_of_slow_chunks += 1
-      logger.warn('slow chunk {0}'.format(self.__number_of_slow_chunks))
+      logger.warn('slow chunk {0}: {1} <= {2}'.format(self.__number_of_slow_chunks, n, left))
     else:
       # Since we saw more than a tolerable number of slow chunks, we flag this
       # as a possible slow-retrieval attack. This threshold will determine our