Remove old cached files if urls are empty

This commit is contained in:
Théophile Diot 2023-09-22 18:38:05 +01:00
parent 61c8ef73b0
commit f3d6f860e0
No known key found for this signature in database
GPG key ID: 248FEA4BAE400D06
4 changed files with 59 additions and 45 deletions

View file

@ -21,7 +21,7 @@ from requests import get
from Database import Database # type: ignore
from logger import setup_logger # type: ignore
from jobs import cache_file, cache_hash, is_cached_file, file_hash
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
asn_rx = re_compile(rb"^\d+$")
@ -85,8 +85,23 @@ try:
tmp_blacklist_path = Path(sep, "var", "tmp", "bunkerweb", "blacklist")
tmp_blacklist_path.mkdir(parents=True, exist_ok=True)
# Our urls data
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
# Get URLs
urls = {
"IP": [],
"RDNS": [],
"ASN": [],
"USER_AGENT": [],
"URI": [],
"IGNORE_IP": [],
"IGNORE_RDNS": [],
"IGNORE_ASN": [],
"IGNORE_USER_AGENT": [],
"IGNORE_URI": [],
}
for kind in urls:
for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {
@ -113,27 +128,15 @@ try:
logger.info(
f"Blacklist for {kind} is already in cache, skipping downloads...",
)
if not urls[kind]:
blacklist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
deleted, err = del_file_in_db(f"{kind}.list", db)
if not deleted:
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
if all_fresh:
_exit(0)
# Get URLs
urls = {
"IP": [],
"RDNS": [],
"ASN": [],
"USER_AGENT": [],
"URI": [],
"IGNORE_IP": [],
"IGNORE_RDNS": [],
"IGNORE_ASN": [],
"IGNORE_USER_AGENT": [],
"IGNORE_URI": [],
}
for kind in urls:
for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]:

View file

@ -21,7 +21,7 @@ from requests import get
from Database import Database # type: ignore
from logger import setup_logger # type: ignore
from jobs import cache_file, cache_hash, is_cached_file, file_hash
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
asn_rx = re_compile(rb"^\d+$")
@ -85,8 +85,12 @@ try:
tmp_greylist_path = Path(sep, "var", "tmp", "bunkerweb", "greylist")
tmp_greylist_path.mkdir(parents=True, exist_ok=True)
# Our urls data
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {
@ -108,16 +112,15 @@ try:
logger.info(
f"Greylist for {kind} is already in cache, skipping downloads...",
)
if not urls[kind]:
greylist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
deleted, err = del_file_in_db(f"{kind}.list", db)
if not deleted:
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
if all_fresh:
_exit(0)
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]:

View file

@ -23,7 +23,7 @@ from requests import get
from Database import Database # type: ignore
from logger import setup_logger # type: ignore
from jobs import cache_file, cache_hash, file_hash, is_cached_file
from jobs import cache_file, cache_hash, del_file_in_db, file_hash, is_cached_file
def check_line(line):
@ -75,14 +75,19 @@ try:
db = Database(logger, sqlalchemy_string=getenv("DATABASE_URI", None), pool=False)
# Don't go further if the cache is fresh
if is_cached_file(realip_path.joinpath("combined.list"), "hour", db):
logger.info("RealIP list is already in cache, skipping download...")
_exit(0)
# Get URLs
urls = [url for url in getenv("REAL_IP_FROM_URLS", "").split(" ") if url]
# Don't go further if the cache is fresh
if is_cached_file(realip_path.joinpath("combined.list"), "hour", db):
if not urls:
tmp_realip_path.joinpath("combined.list").unlink(missing_ok=True)
deleted, err = del_file_in_db("combined.list", db)
if not deleted:
logger.warning(f"Coudn't delete combined.list from cache : {err}")
logger.info("RealIP list is already in cache, skipping download...")
_exit(0)
# Download and write data to temp file
i = 0
content = b""

View file

@ -21,7 +21,7 @@ from requests import get
from Database import Database # type: ignore
from logger import setup_logger # type: ignore
from jobs import cache_file, cache_hash, is_cached_file, file_hash
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
asn_rx = re_compile(rb"^\d+$")
@ -85,8 +85,12 @@ try:
tmp_whitelist_path = Path(sep, "var", "tmp", "bunkerweb", "whitelist")
tmp_whitelist_path.mkdir(parents=True, exist_ok=True)
# Our urls data
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {
@ -108,16 +112,15 @@ try:
logger.info(
f"Whitelist for {kind} is already in cache, skipping downloads...",
)
if not urls[kind]:
whitelist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
deleted, err = del_file_in_db(f"{kind}.list", db)
if not deleted:
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
if all_fresh:
_exit(0)
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]: