mirror of
https://github.com/bunkerity/bunkerweb
synced 2026-05-24 09:28:37 +00:00
Remove old cached files if urls are empty
This commit is contained in:
parent
61c8ef73b0
commit
f3d6f860e0
4 changed files with 59 additions and 45 deletions
|
|
@ -21,7 +21,7 @@ from requests import get
|
|||
|
||||
from Database import Database # type: ignore
|
||||
from logger import setup_logger # type: ignore
|
||||
from jobs import cache_file, cache_hash, is_cached_file, file_hash
|
||||
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
|
||||
|
||||
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
|
||||
asn_rx = re_compile(rb"^\d+$")
|
||||
|
|
@ -85,8 +85,23 @@ try:
|
|||
tmp_blacklist_path = Path(sep, "var", "tmp", "bunkerweb", "blacklist")
|
||||
tmp_blacklist_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Our urls data
|
||||
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
|
||||
# Get URLs
|
||||
urls = {
|
||||
"IP": [],
|
||||
"RDNS": [],
|
||||
"ASN": [],
|
||||
"USER_AGENT": [],
|
||||
"URI": [],
|
||||
"IGNORE_IP": [],
|
||||
"IGNORE_RDNS": [],
|
||||
"IGNORE_ASN": [],
|
||||
"IGNORE_USER_AGENT": [],
|
||||
"IGNORE_URI": [],
|
||||
}
|
||||
for kind in urls:
|
||||
for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Don't go further if the cache is fresh
|
||||
kinds_fresh = {
|
||||
|
|
@ -113,27 +128,15 @@ try:
|
|||
logger.info(
|
||||
f"Blacklist for {kind} is already in cache, skipping downloads...",
|
||||
)
|
||||
|
||||
if not urls[kind]:
|
||||
blacklist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
|
||||
deleted, err = del_file_in_db(f"{kind}.list", db)
|
||||
if not deleted:
|
||||
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
|
||||
if all_fresh:
|
||||
_exit(0)
|
||||
|
||||
# Get URLs
|
||||
urls = {
|
||||
"IP": [],
|
||||
"RDNS": [],
|
||||
"ASN": [],
|
||||
"USER_AGENT": [],
|
||||
"URI": [],
|
||||
"IGNORE_IP": [],
|
||||
"IGNORE_RDNS": [],
|
||||
"IGNORE_ASN": [],
|
||||
"IGNORE_USER_AGENT": [],
|
||||
"IGNORE_URI": [],
|
||||
}
|
||||
for kind in urls:
|
||||
for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Loop on kinds
|
||||
for kind, urls_list in urls.items():
|
||||
if kinds_fresh[kind]:
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from requests import get
|
|||
|
||||
from Database import Database # type: ignore
|
||||
from logger import setup_logger # type: ignore
|
||||
from jobs import cache_file, cache_hash, is_cached_file, file_hash
|
||||
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
|
||||
|
||||
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
|
||||
asn_rx = re_compile(rb"^\d+$")
|
||||
|
|
@ -85,8 +85,12 @@ try:
|
|||
tmp_greylist_path = Path(sep, "var", "tmp", "bunkerweb", "greylist")
|
||||
tmp_greylist_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Our urls data
|
||||
# Get URLs
|
||||
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
|
||||
for kind in urls:
|
||||
for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Don't go further if the cache is fresh
|
||||
kinds_fresh = {
|
||||
|
|
@ -108,16 +112,15 @@ try:
|
|||
logger.info(
|
||||
f"Greylist for {kind} is already in cache, skipping downloads...",
|
||||
)
|
||||
|
||||
if not urls[kind]:
|
||||
greylist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
|
||||
deleted, err = del_file_in_db(f"{kind}.list", db)
|
||||
if not deleted:
|
||||
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
|
||||
if all_fresh:
|
||||
_exit(0)
|
||||
|
||||
# Get URLs
|
||||
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
|
||||
for kind in urls:
|
||||
for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Loop on kinds
|
||||
for kind, urls_list in urls.items():
|
||||
if kinds_fresh[kind]:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from requests import get
|
|||
|
||||
from Database import Database # type: ignore
|
||||
from logger import setup_logger # type: ignore
|
||||
from jobs import cache_file, cache_hash, file_hash, is_cached_file
|
||||
from jobs import cache_file, cache_hash, del_file_in_db, file_hash, is_cached_file
|
||||
|
||||
|
||||
def check_line(line):
|
||||
|
|
@ -75,14 +75,19 @@ try:
|
|||
|
||||
db = Database(logger, sqlalchemy_string=getenv("DATABASE_URI", None), pool=False)
|
||||
|
||||
# Don't go further if the cache is fresh
|
||||
if is_cached_file(realip_path.joinpath("combined.list"), "hour", db):
|
||||
logger.info("RealIP list is already in cache, skipping download...")
|
||||
_exit(0)
|
||||
|
||||
# Get URLs
|
||||
urls = [url for url in getenv("REAL_IP_FROM_URLS", "").split(" ") if url]
|
||||
|
||||
# Don't go further if the cache is fresh
|
||||
if is_cached_file(realip_path.joinpath("combined.list"), "hour", db):
|
||||
if not urls:
|
||||
tmp_realip_path.joinpath("combined.list").unlink(missing_ok=True)
|
||||
deleted, err = del_file_in_db("combined.list", db)
|
||||
if not deleted:
|
||||
logger.warning(f"Coudn't delete combined.list from cache : {err}")
|
||||
logger.info("RealIP list is already in cache, skipping download...")
|
||||
_exit(0)
|
||||
|
||||
# Download and write data to temp file
|
||||
i = 0
|
||||
content = b""
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from requests import get
|
|||
|
||||
from Database import Database # type: ignore
|
||||
from logger import setup_logger # type: ignore
|
||||
from jobs import cache_file, cache_hash, is_cached_file, file_hash
|
||||
from jobs import cache_file, cache_hash, del_file_in_db, is_cached_file, file_hash
|
||||
|
||||
rdns_rx = re_compile(rb"^[^ ]+$", IGNORECASE)
|
||||
asn_rx = re_compile(rb"^\d+$")
|
||||
|
|
@ -85,8 +85,12 @@ try:
|
|||
tmp_whitelist_path = Path(sep, "var", "tmp", "bunkerweb", "whitelist")
|
||||
tmp_whitelist_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Our urls data
|
||||
# Get URLs
|
||||
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
|
||||
for kind in urls:
|
||||
for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Don't go further if the cache is fresh
|
||||
kinds_fresh = {
|
||||
|
|
@ -108,16 +112,15 @@ try:
|
|||
logger.info(
|
||||
f"Whitelist for {kind} is already in cache, skipping downloads...",
|
||||
)
|
||||
|
||||
if not urls[kind]:
|
||||
whitelist_path.joinpath(f"{kind}.list").unlink(missing_ok=True)
|
||||
deleted, err = del_file_in_db(f"{kind}.list", db)
|
||||
if not deleted:
|
||||
logger.warning(f"Coudn't delete {kind}.list from cache : {err}")
|
||||
if all_fresh:
|
||||
_exit(0)
|
||||
|
||||
# Get URLs
|
||||
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
|
||||
for kind in urls:
|
||||
for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "):
|
||||
if url and url not in urls[kind]:
|
||||
urls[kind].append(url)
|
||||
|
||||
# Loop on kinds
|
||||
for kind, urls_list in urls.items():
|
||||
if kinds_fresh[kind]:
|
||||
|
|
|
|||
Loading…
Reference in a new issue