chore: Refactor tar file extraction in Job class

This commit refactors the tar file extraction logic in the `Job` class in the `jobs.py` file. Instead of using the `extractall` method, it now iterates over the members of the tar file and extracts each member individually. This change allows for better error handling and logging when extracting the tar file.
This commit is contained in:
Théophile Diot 2024-05-24 11:11:17 +01:00
parent 2587e9ee6e
commit 1e54e6adcd
No known key found for this signature in database
GPG key ID: 248FEA4BAE400D06

View file

@ -9,7 +9,7 @@ from os.path import sep
from pathlib import Path
from shutil import rmtree
from sys import argv
from tarfile import open as tar_open
from tarfile import TarFile, open as tar_open
from threading import Lock
from traceback import format_exc
from typing import Any, Dict, Literal, Optional, Tuple, Union
@ -80,15 +80,22 @@ class Job:
rmtree(extract_path, ignore_errors=True)
extract_path.mkdir(parents=True, exist_ok=True)
with tar_open(fileobj=BytesIO(job_cache_file["data"]), mode="r:gz") as tar:
assert isinstance(tar, TarFile)
try:
tar.extractall(extract_path, filter="fully_trusted")
except TypeError:
tar.extractall(extract_path)
for member in tar.getmembers():
try:
tar.extract(member, path=extract_path)
except Exception as e:
self.logger.error(f"Error extracting {member.name}: {e}")
except Exception as e:
self.logger.error(f"Error extracting tar file: {e}")
self.logger.debug(f"Restored cache directory {extract_path}")
continue
elif job_cache_file["job_name"] != job_name:
continue
cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_path.write_bytes(job_cache_file["data"])
self.logger.debug(f"Restored cache file {job_cache_file['file_name']}")
except BaseException as e:
self.logger.error(f"Exception while restoring cache file {job_cache_file['file_name']} :\n{e}")
ret = False
@ -207,7 +214,7 @@ class Job:
tgz.add(dir_path, arcname=".")
content.seek(0, 0)
return self.cache_file(file_name, content.read(), job_name=job_name, service_id=service_id)
return self.cache_file(file_name, content.getvalue(), job_name=job_name, service_id=service_id)
def del_cache(self, name: str, *, job_name: str = "", service_id: str = "") -> Tuple[bool, str]:
"""Delete cache file from database and local cache file."""