Refactor file and bytes hashing functions

This commit is contained in:
Théophile Diot 2024-03-12 15:55:20 +00:00
parent 6f16da357a
commit 8e53a8fc73
No known key found for this signature in database
GPG key ID: 248FEA4BAE400D06
3 changed files with 19 additions and 71 deletions

View file

@ -2,7 +2,6 @@
from datetime import date, datetime, timedelta
from gzip import decompress
from hashlib import sha1
from io import BytesIO
from os import getenv, sep
from os.path import join
@ -10,16 +9,9 @@ from pathlib import Path
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
from typing import Optional, Union
from typing import Optional
for deps_path in [
join(sep, "usr", "share", "bunkerweb", *paths)
for paths in (
("deps", "python"),
("utils",),
("db",),
)
]:
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
if deps_path not in sys_path:
sys_path.append(deps_path)
@ -27,7 +19,7 @@ from maxminddb import MODE_FD, open_database
from requests import RequestException, Response, get
from logger import setup_logger # type: ignore
from common_utils import bytes_hash # type: ignore
from common_utils import bytes_hash, file_hash # type: ignore
from jobs import Job # type: ignore
LOGGER = setup_logger("JOBS.mmdb-asn", getenv("LOG_LEVEL", "INFO"))
@ -44,24 +36,6 @@ def request_mmdb() -> Optional[Response]:
return None
def bytes_sha1(bio: Union[Path, bytes, BytesIO]) -> str:
if isinstance(bio, Path):
bio = bio.read_bytes()
if isinstance(bio, bytes):
bio = BytesIO(bio)
assert isinstance(bio, BytesIO)
_sha512 = sha1()
while True:
data = bio.read(1024)
if not data:
break
_sha512.update(data)
bio.seek(0)
return _sha512.hexdigest()
try:
dl_mmdb = True
tmp_path = Path(sep, "var", "tmp", "bunkerweb", "asn.mmdb")
@ -73,7 +47,7 @@ try:
response = request_mmdb()
if response and response.status_code == 200:
if response.content.find(bytes_sha1(tmp_path).encode()) != -1:
if response.content.find(file_hash(tmp_path, algorithm="sha1").encode()) != -1:
LOGGER.info("asn.mmdb is already the latest version, skipping download...")
dl_mmdb = False
else:
@ -89,7 +63,7 @@ try:
response = request_mmdb()
if response and response.status_code == 200:
skip_dl = response.content.find(bytes_sha1(job_cache["data"]).encode()) != -1
skip_dl = response.content.find(bytes_hash(job_cache["data"], algorithm="sha1").encode()) != -1
elif job_cache["last_update"] < (datetime.now() - timedelta(weeks=1)).timestamp():
LOGGER.warning("Unable to check if the cache file is the latest version from db-ip.com and file is older than 1 week, checking anyway...")
skip_dl = False

View file

@ -2,7 +2,6 @@
from datetime import date, datetime, timedelta
from gzip import decompress
from hashlib import sha1
from io import BytesIO
from os import getenv, sep
from os.path import join
@ -10,16 +9,9 @@ from pathlib import Path
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
from typing import Optional, Union
from typing import Optional
for deps_path in [
join(sep, "usr", "share", "bunkerweb", *paths)
for paths in (
("deps", "python"),
("utils",),
("db",),
)
]:
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
if deps_path not in sys_path:
sys_path.append(deps_path)
@ -27,7 +19,7 @@ from maxminddb import MODE_FD, open_database
from requests import RequestException, Response, get
from logger import setup_logger # type: ignore
from common_utils import bytes_hash # type: ignore
from common_utils import bytes_hash, file_hash # type: ignore
from jobs import Job # type: ignore
LOGGER = setup_logger("JOBS.mmdb-country", getenv("LOG_LEVEL", "INFO"))
@ -44,24 +36,6 @@ def request_mmdb() -> Optional[Response]:
return None
def bytes_sha1(bio: Union[Path, bytes, BytesIO]) -> str:
if isinstance(bio, Path):
bio = bio.read_bytes()
if isinstance(bio, bytes):
bio = BytesIO(bio)
assert isinstance(bio, BytesIO)
_sha512 = sha1()
while True:
data = bio.read(1024)
if not data:
break
_sha512.update(data)
bio.seek(0)
return _sha512.hexdigest()
try:
dl_mmdb = True
tmp_path = Path(sep, "var", "tmp", "bunkerweb", "country.mmdb")
@ -73,7 +47,7 @@ try:
response = request_mmdb()
if response and response.status_code == 200:
if response.content.find(bytes_sha1(tmp_path).encode()) != -1:
if response.content.find(file_hash(tmp_path, algorithm="sha1").encode()) != -1:
LOGGER.info("country.mmdb is already the latest version, skipping download...")
dl_mmdb = False
else:
@ -89,7 +63,7 @@ try:
response = request_mmdb()
if response and response.status_code == 200:
skip_dl = response.content.find(bytes_sha1(job_cache["data"]).encode()) != -1
skip_dl = response.content.find(bytes_hash(job_cache["data"], algorithm="sha1").encode()) != -1
elif job_cache["last_update"] < (datetime.now() - timedelta(weeks=1)).timestamp():
LOGGER.warning("Unable to check if the cache file is the latest version from db-ip.com and file is older than 1 week, checking anyway...")
skip_dl = False

View file

@ -1,4 +1,4 @@
from hashlib import sha512
from hashlib import new as new_hash
from io import BytesIO
from os import getenv, sep
from pathlib import Path
@ -50,8 +50,8 @@ def get_os_info() -> Dict[str, str]:
return os_data
def file_hash(file: Union[str, Path]) -> str:
_sha512 = sha512()
def file_hash(file: Union[str, Path], *, algorithm: str = "sha512") -> str:
_hash = new_hash(algorithm)
if not isinstance(file, Path):
file = Path(file)
@ -60,11 +60,11 @@ def file_hash(file: Union[str, Path]) -> str:
data = f.read(1024)
if not data:
break
_sha512.update(data)
return _sha512.hexdigest()
_hash.update(data)
return _hash.hexdigest()
def bytes_hash(bio: Union[str, bytes, BytesIO]) -> str:
def bytes_hash(bio: Union[str, bytes, BytesIO], *, algorithm: str = "sha512") -> str:
if isinstance(bio, str):
bio = BytesIO(bio.encode("utf-8"))
elif isinstance(bio, bytes):
@ -72,11 +72,11 @@ def bytes_hash(bio: Union[str, bytes, BytesIO]) -> str:
assert isinstance(bio, BytesIO)
_sha512 = sha512()
_hash = new_hash(algorithm)
while True:
data = bio.read(1024)
if not data:
break
_sha512.update(data)
_hash.update(data)
bio.seek(0)
return _sha512.hexdigest()
return _hash.hexdigest()