mirror of
https://github.com/bunkerity/bunkerweb
synced 2026-05-24 09:28:37 +00:00
Handle no longer used urls in list downloads jobs
This commit is contained in:
parent
1c3fea7f09
commit
41540fcda2
4 changed files with 52 additions and 12 deletions
|
|
@ -8,7 +8,6 @@ from os.path import join, normpath
|
|||
from pathlib import Path
|
||||
from re import compile as re_compile
|
||||
from sys import exit as sys_exit, path as sys_path
|
||||
from traceback import format_exc
|
||||
from typing import Tuple
|
||||
|
||||
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
|
||||
|
|
@ -110,6 +109,7 @@ try:
|
|||
LOGGER.warning(f"Couldn't delete blacklist URLs from cache : {err}")
|
||||
sys_exit(0)
|
||||
|
||||
urls = set()
|
||||
failed_urls = set()
|
||||
|
||||
# Loop on kinds
|
||||
|
|
@ -127,6 +127,7 @@ try:
|
|||
content = b""
|
||||
for url in urls_list:
|
||||
url_file = f"{bytes_hash(url, algorithm='sha1')}.list"
|
||||
urls.add(url_file)
|
||||
cached_url = JOB.get_cache(url_file, with_info=True, with_data=True)
|
||||
try:
|
||||
# Check if the URL has already been downloaded
|
||||
|
|
@ -192,10 +193,19 @@ try:
|
|||
continue
|
||||
|
||||
status = 1
|
||||
|
||||
# Remove old files
|
||||
for url_file in JOB.job_path.glob("*.list"):
|
||||
LOGGER.debug(f"Checking if {url_file} is still in use ...")
|
||||
if url_file.name not in urls:
|
||||
LOGGER.warning(f"Removing no longer used url file {url_file} ...")
|
||||
deleted, err = JOB.del_cache(url_file)
|
||||
if not deleted:
|
||||
LOGGER.warning(f"Couldn't delete url file {url_file} from cache : {err}")
|
||||
except SystemExit as e:
|
||||
status = e.code
|
||||
except:
|
||||
except BaseException as e:
|
||||
status = 2
|
||||
LOGGER.error(f"Exception while running blacklist-download.py :\n{format_exc()}")
|
||||
LOGGER.error(f"Exception while running blacklist-download.py :\n{e}")
|
||||
|
||||
sys_exit(status)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from os.path import join, normpath
|
|||
from pathlib import Path
|
||||
from re import compile as re_compile
|
||||
from sys import exit as sys_exit, path as sys_path
|
||||
from traceback import format_exc
|
||||
from typing import Tuple
|
||||
|
||||
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
|
||||
|
|
@ -110,6 +109,7 @@ try:
|
|||
LOGGER.warning(f"Couldn't delete greylist URLs from cache : {err}")
|
||||
sys_exit(0)
|
||||
|
||||
urls = set()
|
||||
failed_urls = set()
|
||||
|
||||
# Loop on kinds
|
||||
|
|
@ -127,6 +127,7 @@ try:
|
|||
content = b""
|
||||
for url in urls_list:
|
||||
url_file = f"{bytes_hash(url, algorithm='sha1')}.list"
|
||||
urls.add(url_file)
|
||||
cached_url = JOB.get_cache(url_file, with_info=True, with_data=True)
|
||||
try:
|
||||
# Check if the URL has already been downloaded
|
||||
|
|
@ -192,10 +193,19 @@ try:
|
|||
continue
|
||||
|
||||
status = 1
|
||||
|
||||
# Remove old files
|
||||
for url_file in JOB.job_path.glob("*.list"):
|
||||
LOGGER.debug(f"Checking if {url_file} is still in use ...")
|
||||
if url_file.name not in urls:
|
||||
LOGGER.warning(f"Removing no longer used url file {url_file} ...")
|
||||
deleted, err = JOB.del_cache(url_file)
|
||||
if not deleted:
|
||||
LOGGER.warning(f"Couldn't delete url file {url_file} from cache : {err}")
|
||||
except SystemExit as e:
|
||||
status = e.code
|
||||
except:
|
||||
except BaseException as e:
|
||||
status = 2
|
||||
LOGGER.error(f"Exception while running greylist-download.py :\n{format_exc()}")
|
||||
LOGGER.error(f"Exception while running greylist-download.py :\n{e}")
|
||||
|
||||
sys_exit(status)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from os import getenv, sep
|
|||
from os.path import join, normpath
|
||||
from pathlib import Path
|
||||
from sys import exit as sys_exit, path as sys_path
|
||||
from traceback import format_exc
|
||||
|
||||
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
|
||||
if deps_path not in sys_path:
|
||||
|
|
@ -87,6 +86,7 @@ try:
|
|||
LOGGER.warning(f"Couldn't delete realip URLs from cache : {err}")
|
||||
sys_exit(0)
|
||||
|
||||
urls = set()
|
||||
failed_urls = set()
|
||||
|
||||
for service, urls in services_realip_urls.items():
|
||||
|
|
@ -102,6 +102,7 @@ try:
|
|||
content = b""
|
||||
for url in urls:
|
||||
url_file = f"{bytes_hash(url, algorithm='sha1')}.list"
|
||||
urls.add(url_file)
|
||||
cached_url = JOB.get_cache(url_file, with_info=True, with_data=True)
|
||||
try:
|
||||
# Check if the URL has already been downloaded
|
||||
|
|
@ -165,10 +166,19 @@ try:
|
|||
continue
|
||||
|
||||
status = 1
|
||||
|
||||
# Remove old files
|
||||
for url_file in JOB.job_path.glob("*.list"):
|
||||
LOGGER.debug(f"Checking if {url_file} is still in use ...")
|
||||
if url_file.name not in urls:
|
||||
LOGGER.warning(f"Removing no longer used url file {url_file} ...")
|
||||
deleted, err = JOB.del_cache(url_file)
|
||||
if not deleted:
|
||||
LOGGER.warning(f"Couldn't delete url file {url_file} from cache : {err}")
|
||||
except SystemExit as e:
|
||||
status = e.code
|
||||
except:
|
||||
except BaseException as e:
|
||||
status = 2
|
||||
LOGGER.error(f"Exception while running realip-download.py :\n{format_exc()}")
|
||||
LOGGER.error(f"Exception while running realip-download.py :\n{e}")
|
||||
|
||||
sys_exit(status)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from os.path import join, normpath
|
|||
from pathlib import Path
|
||||
from re import compile as re_compile
|
||||
from sys import exit as sys_exit, path as sys_path
|
||||
from traceback import format_exc
|
||||
from typing import Tuple
|
||||
|
||||
for deps_path in [join(sep, "usr", "share", "bunkerweb", *paths) for paths in (("deps", "python"), ("utils",), ("db",))]:
|
||||
|
|
@ -110,6 +109,7 @@ try:
|
|||
LOGGER.warning(f"Couldn't delete whitelist URLs from cache : {err}")
|
||||
sys_exit(0)
|
||||
|
||||
urls = set()
|
||||
failed_urls = set()
|
||||
|
||||
# Loop on kinds
|
||||
|
|
@ -127,6 +127,7 @@ try:
|
|||
content = b""
|
||||
for url in urls_list:
|
||||
url_file = f"{bytes_hash(url, algorithm='sha1')}.list"
|
||||
urls.add(url_file)
|
||||
cached_url = JOB.get_cache(url_file, with_info=True, with_data=True)
|
||||
try:
|
||||
# Check if the URL has already been downloaded
|
||||
|
|
@ -192,10 +193,19 @@ try:
|
|||
continue
|
||||
|
||||
status = 1
|
||||
|
||||
# Remove old files
|
||||
for url_file in JOB.job_path.glob("*.list"):
|
||||
LOGGER.debug(f"Checking if {url_file} is still in use ...")
|
||||
if url_file.name not in urls:
|
||||
LOGGER.warning(f"Removing no longer used url file {url_file} ...")
|
||||
deleted, err = JOB.del_cache(url_file)
|
||||
if not deleted:
|
||||
LOGGER.warning(f"Couldn't delete url file {url_file} from cache : {err}")
|
||||
except SystemExit as e:
|
||||
status = e.code
|
||||
except:
|
||||
except BaseException as e:
|
||||
status = 2
|
||||
LOGGER.error(f"Exception while running whitelist-download.py :\n{format_exc()}")
|
||||
LOGGER.error(f"Exception while running whitelist-download.py :\n{e}")
|
||||
|
||||
sys_exit(status)
|
||||
|
|
|
|||
Loading…
Reference in a new issue