Make it so that _URLS settings are now multisite

This commit is contained in:
Théophile Diot 2024-10-25 17:24:21 +02:00
parent 6d342abce9
commit 7032edd9f5
No known key found for this signature in database
GPG key ID: FA995104A0BA376A
12 changed files with 694 additions and 408 deletions

View file

@ -156,16 +156,16 @@ Deny access based on internal and external IP/network/rDNS/ASN blacklists.
| `BLACKLIST_IGNORE_ASN` | | multisite | no | List of ASN numbers, separated with spaces, to ignore in the blacklist. |
| `BLACKLIST_IGNORE_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to ignore in the blacklist. |
| `BLACKLIST_IGNORE_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to ignore in the blacklist. |
| `BLACKLIST_IP_URLS` | `https://www.dan.me.uk/torlist/?exit` | global | no | List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_USER_AGENT_URLS` | `https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list` | global | no | List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_IP_URLS` | | global | no | List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_URI_URLS` | | global | no | List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IP_URLS` | `https://www.dan.me.uk/torlist/?exit` | multisite | no | List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_USER_AGENT_URLS` | `https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list` | multisite | no | List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `BLACKLIST_IGNORE_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
## Brotli
@ -299,11 +299,11 @@ Allow access while keeping security features based on internal and external IP/n
| `GREYLIST_ASN` | | multisite | no | List of ASN numbers, separated with spaces, to put into the greylist. |
| `GREYLIST_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to put into the greylist. |
| `GREYLIST_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to put into the greylist. |
| `GREYLIST_IP_URLS` | | global | no | List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `GREYLIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
## Gzip
@ -530,7 +530,7 @@ Get real IP of clients when BunkerWeb is behind a reverse proxy / load balancer.
| `REAL_IP_FROM` | `192.168.0.0/16 172.16.0.0/12 10.0.0.0/8` | multisite | no | List of trusted IPs / networks, separated with spaces, where proxied requests come from. |
| `REAL_IP_HEADER` | `X-Forwarded-For` | multisite | no | HTTP header containing the real IP or special value proxy_protocol for PROXY protocol. |
| `REAL_IP_RECURSIVE` | `yes` | multisite | no | Perform a recursive search in the header container IP address. |
| `REAL_IP_FROM_URLS` | | global | no | List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `REAL_IP_FROM_URLS` | | multisite | no | List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
## Redirect
@ -715,8 +715,8 @@ Allow access based on internal and external IP/network/rDNS/ASN whitelists.
| `WHITELIST_ASN` | `32934` | multisite | no | List of ASN numbers, separated with spaces, to whitelist. |
| `WHITELIST_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to whitelist. |
| `WHITELIST_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to whitelist. |
| `WHITELIST_IP_URLS` | | global | no | List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |
| `WHITELIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. |

View file

@ -13,6 +13,7 @@ local get_deny_status = utils.get_deny_status
local get_rdns = utils.get_rdns
local get_asn = utils.get_asn
local regex_match = utils.regex_match
local get_variable = utils.get_variable
local ipmatcher_new = ipmatcher.new
local tostring = tostring
local open = io.open
@ -22,12 +23,12 @@ function blacklist:initialize(ctx)
plugin.initialize(self, "blacklist", ctx)
-- Decode lists
if get_phase() ~= "init" and self:is_needed() then
local lists, err = self.datastore:get("plugin_blacklist_lists", true)
if not lists then
local datastore_lists, err = self.datastore:get("plugin_blacklist_lists_" .. self.ctx.bw.server_name, true)
if not datastore_lists then
self.logger:log(ERR, err)
self.lists = {}
else
self.lists = lists
self.lists = datastore_lists
end
local kinds = {
["IP"] = {},
@ -42,10 +43,10 @@ function blacklist:initialize(ctx)
["IGNORE_URI"] = {},
}
for kind, _ in pairs(kinds) do
if not self.lists[kind] then
self.lists[kind] = {}
end
for data in self.variables["BLACKLIST_" .. kind]:gmatch("%S+") do
if not self.lists[kind] then
self.lists[kind] = {}
end
table.insert(self.lists[kind], data)
end
end
@ -70,7 +71,7 @@ function blacklist:is_needed()
end
function blacklist:init()
-- Check if init needed
-- Check if init is needed
if not self:is_needed() then
return self:ret(true, "init not needed")
end
@ -88,21 +89,46 @@ function blacklist:init()
["IGNORE_USER_AGENT"] = {},
["IGNORE_URI"] = {},
}
local i = 0
for kind, _ in pairs(blacklists) do
local f, _ = open("/var/cache/bunkerweb/blacklist/" .. kind .. ".list", "r")
if f then
for line in f:lines() do
table.insert(blacklists[kind], line)
i = i + 1
end
f:close()
end
local server_name, err = get_variable("SERVER_NAME", false)
if not server_name then
return self:ret(false, "can't get SERVER_NAME variable : " .. err)
end
-- Load them into datastore
local ok, err = self.datastore:set("plugin_blacklist_lists", blacklists, nil, true)
if not ok then
return self:ret(false, "can't store blacklist list into datastore : " .. err)
-- Iterate over each kind and server
local i = 0
for key in server_name:gmatch("%S+") do
for kind, _ in pairs(blacklists) do
local file_path = "/var/cache/bunkerweb/blacklist/" .. key .. "/" .. kind .. ".list"
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(blacklists[kind], line)
i = i + 1
end
f:close()
end
end
-- Load service specific ones into datastore
local ok
ok, err = self.datastore:set("plugin_blacklist_lists_" .. key, blacklists, nil, true)
if not ok then
return self:ret(false, "can't store blacklist list into datastore : " .. err)
end
blacklists = {
["IP"] = {},
["RDNS"] = {},
["ASN"] = {},
["USER_AGENT"] = {},
["URI"] = {},
["IGNORE_IP"] = {},
["IGNORE_RDNS"] = {},
["IGNORE_ASN"] = {},
["IGNORE_USER_AGENT"] = {},
["IGNORE_URI"] = {},
}
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
end

View file

@ -1,10 +1,14 @@
#!/usr/bin/env python3
from contextlib import suppress
from datetime import datetime, timedelta
from ipaddress import ip_address, ip_network
from json import dumps, loads
from os import getenv, sep
from os.path import join, normpath
from pathlib import Path
from re import compile as re_compile
from shutil import rmtree
from sys import exit as sys_exit, path as sys_path
from traceback import format_exc
from typing import Tuple
@ -53,130 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]:
LOGGER = setup_logger("BLACKLIST", getenv("LOG_LEVEL", "INFO"))
status = 0
KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI", "IGNORE_IP", "IGNORE_RDNS", "IGNORE_ASN", "IGNORE_USER_AGENT", "IGNORE_URI")
try:
# Check if at least a server has Blacklist activated
blacklist_activated = False
services = getenv("SERVER_NAME", "").strip()
if not services:
LOGGER.warning("No services found, exiting...")
sys_exit(0)
services = services.split(" ")
services_blacklist_urls = {}
# Multisite case
if getenv("MULTISITE", "no") == "yes":
for first_server in getenv("SERVER_NAME", "").split(" "):
for first_server in services:
if getenv(f"{first_server}_USE_BLACKLIST", getenv("USE_BLACKLIST", "yes")) == "yes":
blacklist_activated = True
break
# Get URLs
services_blacklist_urls[first_server] = {}
for kind in KINDS:
services_blacklist_urls[first_server][kind] = set()
for url in getenv(f"{first_server}_BLACKLIST_{kind}_URLS", getenv(f"BLACKLIST_{kind}_URLS", "")).strip().split(" "):
if url:
services_blacklist_urls[first_server][kind].add(url)
# Singlesite case
elif getenv("USE_BLACKLIST", "yes") == "yes":
blacklist_activated = True
# Get URLs
services_blacklist_urls[services[0]] = {}
for kind in KINDS:
services_blacklist_urls[services[0]][kind] = set()
for url in getenv(f"BLACKLIST_{kind}_URLS", "").strip().split(" "):
if url:
services_blacklist_urls[services[0]][kind].add(url)
if not blacklist_activated:
LOGGER.info("Blacklist is not activated, skipping downloads...")
sys_exit(0)
JOB = Job(LOGGER)
# Get URLs
urls = {
"IP": [],
"RDNS": [],
"ASN": [],
"USER_AGENT": [],
"URI": [],
"IGNORE_IP": [],
"IGNORE_RDNS": [],
"IGNORE_ASN": [],
"IGNORE_USER_AGENT": [],
"IGNORE_URI": [],
}
for kind in urls:
for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {
"IP": True,
"RDNS": True,
"ASN": True,
"USER_AGENT": True,
"URI": True,
"IGNORE_IP": True,
"IGNORE_RDNS": True,
"IGNORE_ASN": True,
"IGNORE_USER_AGENT": True,
"IGNORE_URI": True,
}
for kind in kinds_fresh:
if not JOB.is_cached_file(f"{kind}.list", "hour"):
if urls[kind]:
kinds_fresh[kind] = False
LOGGER.info(f"Blacklist for {kind} is not cached, processing downloads..")
continue
LOGGER.info(f"Blacklist for {kind} is already in cache, skipping downloads...")
if not urls[kind]:
LOGGER.warning(f"Blacklist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list")
if not any(url for urls in services_blacklist_urls.values() for url in urls.values()):
LOGGER.warning("No blacklist URL is configured, nothing to do...")
if Path(JOB.job_path.joinpath("urls.json")).exists():
LOGGER.warning("Blacklist URLs are cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache("urls.json")
if not deleted:
LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}")
if all(kinds_fresh.values()):
if not any(urls.values()):
LOGGER.info("No blacklist URL is configured, nothing to do...")
LOGGER.warning(f"Couldn't delete blacklist URLs from cache : {err}")
sys_exit(0)
cached_urls = loads(JOB.get_cache("urls.json") or "{}")
tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "blacklist")
tmp_downloads.mkdir(parents=True, exist_ok=True)
downloaded_urls = {}
failed_urls = set()
current_timestamp = datetime.now().astimezone().timestamp()
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]:
continue
for service, kinds in services_blacklist_urls.items():
for kind, urls_list in kinds.items():
if not urls_list:
if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists():
LOGGER.warning(f"{service} blacklist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list", service_id=service)
if not deleted:
LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}")
continue
# Write combined data of the kind in memory and check if it has changed
for url in urls_list:
try:
LOGGER.info(f"Downloading blacklist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
# Write combined data of the kind in memory and check if it has changed
content = b""
for url in urls_list:
try:
cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""})
# Check if the URL's last download timestamp is younger than 1 hour
if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds():
downloaded_urls[url] = {
"time": cached_url["time"],
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...")
failed_urls.add(url)
status = 1 if status == 1 else 0
continue
iterable = resp.iter_lines()
i = 0
content = b""
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list")
if new_hash == old_hash:
LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed")
else:
LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching blacklist : {err}")
status = 2
elif url in downloaded_urls:
LOGGER.info(f"URL {url} has already been downloaded, skipping it...")
content += Path(downloaded_urls[url]["tmp_path"]).read_bytes()
else:
status = 1
except:
LOGGER.info(f"Downloading blacklist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
continue
iterable = resp.iter_lines()
i = 0
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
downloaded_urls[url] = {
"time": current_timestamp,
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
except BaseException as e:
status = 2
LOGGER.error(f"Exception while getting {service} blacklist from {url} :\n{e}")
failed_urls.add(url)
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list", service_id=service)
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching blacklist : {err}")
status = 2
LOGGER.error(f"Exception while getting blacklist from {url} :\n{format_exc()}")
continue
status = 1
cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8"))
if not cached:
LOGGER.error(f"Error while caching blacklist URLs : {err}")
rmtree(tmp_downloads, ignore_errors=True)
except SystemExit as e:
status = e.code
except:

View file

@ -114,7 +114,7 @@
"type": "text"
},
"BLACKLIST_IP_URLS": {
"context": "global",
"context": "multisite",
"default": "https://www.dan.me.uk/torlist/?exit",
"help": "List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ip-urls",
@ -123,7 +123,7 @@
"type": "text"
},
"BLACKLIST_RDNS_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-rdns-urls",
@ -132,7 +132,7 @@
"type": "text"
},
"BLACKLIST_ASN_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-asn-urls",
@ -141,7 +141,7 @@
"type": "text"
},
"BLACKLIST_USER_AGENT_URLS": {
"context": "global",
"context": "multisite",
"default": "https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list",
"help": "List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-user-agent-urls",
@ -150,7 +150,7 @@
"type": "text"
},
"BLACKLIST_URI_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-uri-urls",
@ -159,7 +159,7 @@
"type": "text"
},
"BLACKLIST_IGNORE_IP_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ignore-ip-urls",
@ -168,7 +168,7 @@
"type": "text"
},
"BLACKLIST_IGNORE_RDNS_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ignore-rdns-urls",
@ -177,7 +177,7 @@
"type": "text"
},
"BLACKLIST_IGNORE_ASN_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ignore-asn-urls",
@ -186,7 +186,7 @@
"type": "text"
},
"BLACKLIST_IGNORE_USER_AGENT_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ignore-user-agent-urls",
@ -195,7 +195,7 @@
"type": "text"
},
"BLACKLIST_IGNORE_URI_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "blacklist-ignore-uri-urls",

View file

@ -13,6 +13,7 @@ local get_deny_status = utils.get_deny_status
local get_rdns = utils.get_rdns
local get_asn = utils.get_asn
local regex_match = utils.regex_match
local get_variable = utils.get_variable
local ipmatcher_new = ipmatcher.new
local tostring = tostring
local open = io.open
@ -22,12 +23,12 @@ function greylist:initialize(ctx)
plugin.initialize(self, "greylist", ctx)
-- Decode lists
if get_phase() ~= "init" and self:is_needed() then
local lists, err = self.datastore:get("plugin_greylist_lists", true)
if not lists then
local datastore_lists, err = self.datastore:get("plugin_greylist_lists_" .. self.ctx.bw.server_name, true)
if not datastore_lists then
self.logger:log(ERR, err)
self.lists = {}
else
self.lists = lists
self.lists = datastore_lists
end
local kinds = {
["IP"] = {},
@ -37,10 +38,10 @@ function greylist:initialize(ctx)
["URI"] = {},
}
for kind, _ in pairs(kinds) do
if not self.lists[kind] then
self.lists[kind] = {}
end
for data in self.variables["GREYLIST_" .. kind]:gmatch("%S+") do
if not self.lists[kind] then
self.lists[kind] = {}
end
table.insert(self.lists[kind], data)
end
end
@ -65,10 +66,11 @@ function greylist:is_needed()
end
function greylist:init()
-- Check if init needed
-- Check if init is needed
if not self:is_needed() then
return self:ret(true, "init not needed")
end
-- Read greylists
local greylists = {
["IP"] = {},
@ -77,23 +79,43 @@ function greylist:init()
["USER_AGENT"] = {},
["URI"] = {},
}
local server_name, err = get_variable("SERVER_NAME", false)
if not server_name then
return self:ret(false, "can't get SERVER_NAME variable : " .. err)
end
-- Iterate over each kind and server
local i = 0
for kind, _ in pairs(greylists) do
local f, _ = open("/var/cache/bunkerweb/greylist/" .. kind .. ".list", "r")
if f then
for line in f:lines() do
table.insert(greylists[kind], line)
i = i + 1
for key in server_name:gmatch("%S+") do
for kind, _ in pairs(greylists) do
local file_path = "/var/cache/bunkerweb/greylist/" .. key .. "/" .. kind .. ".list"
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(greylists[kind], line)
i = i + 1
end
f:close()
end
f:close()
end
-- Load service specific ones into datastore
local ok
ok, err = self.datastore:set("plugin_greylist_lists_" .. key, greylists, nil, true)
if not ok then
return self:ret(false, "can't store greylist list into datastore : " .. err)
end
greylists = {
["IP"] = {},
["RDNS"] = {},
["ASN"] = {},
["USER_AGENT"] = {},
["URI"] = {},
}
end
-- Load them into datastore
local ok, err = self.datastore:set("plugin_greylist_lists", greylists, nil, true)
if not ok then
return self:ret(false, "can't store greylist list into datastore : " .. err)
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " bad IP/network/rDNS/ASN/User-Agent/URI")
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
end
function greylist:access()

View file

@ -1,10 +1,14 @@
#!/usr/bin/env python3
from contextlib import suppress
from datetime import datetime, timedelta
from ipaddress import ip_address, ip_network
from json import dumps, loads
from os import getenv, sep
from os.path import join, normpath
from pathlib import Path
from re import compile as re_compile
from shutil import rmtree
from sys import exit as sys_exit, path as sys_path
from traceback import format_exc
from typing import Tuple
@ -53,108 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]:
LOGGER = setup_logger("GREYLIST", getenv("LOG_LEVEL", "INFO"))
status = 0
KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI")
try:
# Check if at least a server has Greylist activated
greylist_activated = False
services = getenv("SERVER_NAME", "").strip()
if not services:
LOGGER.warning("No services found, exiting...")
sys_exit(0)
services = services.split(" ")
services_greylist_urls = {}
# Multisite case
if getenv("MULTISITE", "no") == "yes":
for first_server in getenv("SERVER_NAME", "").split(" "):
for first_server in services:
if getenv(f"{first_server}_USE_GREYLIST", getenv("USE_GREYLIST", "no")) == "yes":
greylist_activated = True
break
# Get URLs
services_greylist_urls[first_server] = {}
for kind in KINDS:
services_greylist_urls[first_server][kind] = set()
for url in getenv(f"{first_server}_GREYLIST_{kind}_URLS", getenv(f"GREYLIST_{kind}_URLS", "")).strip().split(" "):
if url:
services_greylist_urls[first_server][kind].add(url)
# Singlesite case
elif getenv("USE_GREYLIST", "no") == "yes":
greylist_activated = True
# Get URLs
services_greylist_urls[services[0]] = {}
for kind in KINDS:
services_greylist_urls[services[0]][kind] = set()
for url in getenv(f"GREYLIST_{kind}_URLS", "").strip().split(" "):
if url:
services_greylist_urls[services[0]][kind].add(url)
if not greylist_activated:
LOGGER.info("Greylist is not activated, skipping downloads...")
sys_exit(0)
JOB = Job(LOGGER)
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {"IP": True, "RDNS": True, "ASN": True, "USER_AGENT": True, "URI": True}
for kind in kinds_fresh:
if not JOB.is_cached_file(f"{kind}.list", "hour"):
if urls[kind]:
kinds_fresh[kind] = False
LOGGER.info(f"Greylist for {kind} is not cached, processing downloads..")
continue
LOGGER.info(f"Greylist for {kind} is already in cache, skipping downloads...")
if not urls[kind]:
LOGGER.warning(f"Greylist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list")
if not any(url for urls in services_greylist_urls.values() for url in urls.values()):
LOGGER.warning("No greylist URL is configured, nothing to do...")
if Path(JOB.job_path.joinpath("urls.json")).exists():
LOGGER.warning("Greylist URLs are cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache("urls.json")
if not deleted:
LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}")
if all(kinds_fresh.values()):
if not any(urls.values()):
LOGGER.info("No greylist URL is configured, nothing to do...")
LOGGER.warning(f"Couldn't delete greylist URLs from cache : {err}")
sys_exit(0)
cached_urls = loads(JOB.get_cache("urls.json") or "{}")
tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "greylist")
tmp_downloads.mkdir(parents=True, exist_ok=True)
downloaded_urls = {}
failed_urls = set()
current_timestamp = datetime.now().astimezone().timestamp()
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]:
continue
for service, kinds in services_greylist_urls.items():
for kind, urls_list in kinds.items():
if not urls_list:
if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists():
LOGGER.warning(f"{service} greylist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list", service_id=service)
if not deleted:
LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}")
continue
# Write combined data of the kind in memory and check if it has changed
for url in urls_list:
try:
LOGGER.info(f"Downloading greylist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
# Write combined data of the kind in memory and check if it has changed
content = b""
for url in urls_list:
try:
cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""})
# Check if the URL's last download timestamp is younger than 1 hour
if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds():
downloaded_urls[url] = {
"time": cached_url["time"],
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...")
failed_urls.add(url)
status = 1 if status == 1 else 0
continue
iterable = resp.iter_lines()
i = 0
content = b""
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list")
if new_hash == old_hash:
LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed")
else:
LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching greylist : {err}")
status = 2
elif url in downloaded_urls:
LOGGER.info(f"URL {url} has already been downloaded, skipping it...")
content += Path(downloaded_urls[url]["tmp_path"]).read_bytes()
else:
status = 1
except:
LOGGER.info(f"Downloading greylist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
continue
iterable = resp.iter_lines()
i = 0
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
downloaded_urls[url] = {
"time": current_timestamp,
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
except BaseException as e:
status = 2
LOGGER.error(f"Exception while getting {service} greylist from {url} :\n{e}")
failed_urls.add(url)
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list", service_id=service)
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching greylist : {err}")
status = 2
LOGGER.error(f"Exception while getting greylist from {url} :\n{format_exc()}")
continue
status = 1
cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8"))
if not cached:
LOGGER.error(f"Error while caching greylist URLs : {err}")
rmtree(tmp_downloads, ignore_errors=True)
except SystemExit as e:
status = e.code
except:

View file

@ -69,7 +69,7 @@
"type": "text"
},
"GREYLIST_IP_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "greylist-ip-urls",
@ -78,7 +78,7 @@
"type": "text"
},
"GREYLIST_RDNS_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "greylist-rdns-urls",
@ -87,7 +87,7 @@
"type": "text"
},
"GREYLIST_ASN_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "greylist-asn-urls",
@ -96,7 +96,7 @@
"type": "text"
},
"GREYLIST_USER_AGENT_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "greylist-user-agent-urls",
@ -105,7 +105,7 @@
"type": "text"
},
"GREYLIST_URI_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "greylist-uri-urls",

View file

@ -1,9 +1,13 @@
#!/usr/bin/env python3
from contextlib import suppress
from datetime import datetime, timedelta
from ipaddress import ip_address, ip_network
from json import dumps, loads
from os import getenv, sep
from os.path import join, normpath
from pathlib import Path
from shutil import rmtree
from sys import exit as sys_exit, path as sys_path
from traceback import format_exc
@ -36,93 +40,153 @@ status = 0
try:
# Check if at least a server has Realip activated
realip_activated = False
# Check if at least a server has Greylist activated
greylist_activated = False
services = getenv("SERVER_NAME", "").strip()
if not services:
LOGGER.warning("No services found, exiting...")
sys_exit(0)
services = services.split(" ")
services_realip_urls = {}
# Multisite case
if getenv("MULTISITE", "no") == "yes":
servers = getenv("SERVER_NAME", [])
if isinstance(servers, str):
servers = servers.split(" ")
for first_server in servers:
for first_server in services:
if getenv(f"{first_server}_USE_REAL_IP", getenv("USE_REAL_IP", "no")) == "yes":
realip_activated = True
break
# Get URLs
services_realip_urls[first_server] = set()
for url in getenv(f"{first_server}_REAL_IP_FROM_URLS", getenv("REAL_IP_FROM_URLS", "")).strip().split(" "):
if url:
services_realip_urls[first_server].add(url)
# Singlesite case
elif getenv("USE_REAL_IP", "no") == "yes":
realip_activated = True
# Get URLs
services_realip_urls[services[0]] = set()
for url in getenv("REAL_IP_FROM_URLS", "").strip().split(" "):
if url:
services_realip_urls[services[0]].add(url)
if not realip_activated:
LOGGER.info("RealIP is not activated, skipping download...")
sys_exit(0)
JOB = Job(LOGGER)
# Get URLs
urls = [url for url in getenv("REAL_IP_FROM_URLS", "").split(" ") if url]
# Don't go further if the cache is fresh
if JOB.is_cached_file("combined.list", "hour"):
LOGGER.info("RealIP list is already in cache, skipping download...")
if not urls:
LOGGER.warning("No URL found, deleting combined.list from cache...")
deleted, err = JOB.del_cache("combined.list")
if not any(services_realip_urls.values()):
LOGGER.warning("No URL configured, nothing to do...")
if Path(JOB.job_path.joinpath("urls.json")).exists():
LOGGER.warning("RealIP URLs are cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache("urls.json")
if not deleted:
LOGGER.warning(f"Couldn't delete combined.list from cache : {err}")
LOGGER.warning(f"Couldn't delete realip URLs from cache : {err}")
sys_exit(0)
if not urls:
LOGGER.info("No URL found, skipping download...")
sys_exit(0)
cached_urls = loads(JOB.get_cache("urls.json") or "{}")
# Download and write data to temp file
i = 0
content = b""
for url in urls:
try:
LOGGER.info(f"Downloading RealIP list from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "realip")
tmp_downloads.mkdir(parents=True, exist_ok=True)
downloaded_urls = {}
failed_urls = set()
current_timestamp = datetime.now().astimezone().timestamp()
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
for service, urls in services_realip_urls.items():
if not urls:
if Path(JOB.job_path.joinpath(service, "combined.list")).exists():
LOGGER.warning(f"{service} realip combined.list is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache("combined.list", service_id=service)
if not deleted:
LOGGER.warning(f"Couldn't delete {service} combined.list from cache : {err}")
continue
# Write combined data of the kind in memory and check if it has changed
content = b""
for url in urls:
try:
cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""})
# Check if the URL's last download timestamp is younger than 1 hour
if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds():
downloaded_urls[url] = {
"time": cached_url["time"],
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...")
failed_urls.add(url)
status = 1 if status == 1 else 0
continue
iterable = resp.iter_lines()
for line in iterable:
line = line.strip().split(b" ")[0]
if not line or line.startswith((b"#", b";")):
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif url in downloaded_urls:
LOGGER.info(f"URL {url} has already been downloaded, skipping it...")
content += Path(downloaded_urls[url]["tmp_path"]).read_bytes()
else:
LOGGER.info(f"Downloading realip data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
ok, data = check_line(line)
if ok:
content += data + b"\n"
i += 1
except:
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
continue
iterable = resp.iter_lines()
i = 0
for line in iterable:
line = line.strip().split(b" ")[0]
if not line or line.startswith((b"#", b";")):
continue
ok, data = check_line(line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} realip from {url}")
tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").write_bytes(content)
downloaded_urls[url] = {
"time": current_timestamp,
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
except BaseException as e:
status = 2
LOGGER.error(f"Exception while getting {service} realip from {url} :\n{e}")
failed_urls.add(url)
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash("combined.list", service_id=service)
if new_hash == old_hash:
LOGGER.info(f"New {service} file combined.list is identical to cache file, reload is not needed")
continue
LOGGER.info(f"New {service} file combined.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file("combined.list", content, service_id=service, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching realip : {err}")
status = 2
LOGGER.error(f"Exception while getting RealIP list from {url} :\n{format_exc()}")
continue
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash("combined.list")
if new_hash == old_hash:
LOGGER.info("New file is identical to cache file, reload is not needed")
sys_exit(0)
status = 1
# Put file in cache
cached, err = JOB.cache_file("combined.list", content, checksum=new_hash)
cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8"))
if not cached:
LOGGER.error(f"Error while caching list : {err}")
sys_exit(2)
LOGGER.error(f"Error while caching whitelist URLs : {err}")
LOGGER.info(f"Downloaded {i} trusted IP/net")
status = 1
rmtree(tmp_downloads, ignore_errors=True)
except SystemExit as e:
status = e.code
except:

View file

@ -51,7 +51,7 @@
"type": "check"
},
"REAL_IP_FROM_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "real-ip-from-urls",

View file

@ -1,10 +1,14 @@
#!/usr/bin/env python3
from contextlib import suppress
from datetime import datetime, timedelta
from ipaddress import ip_address, ip_network
from json import dumps, loads
from os import getenv, sep
from os.path import join, normpath
from pathlib import Path
from re import compile as re_compile
from shutil import rmtree
from sys import exit as sys_exit, path as sys_path
from traceback import format_exc
from typing import Tuple
@ -53,108 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]:
LOGGER = setup_logger("WHITELIST", getenv("LOG_LEVEL", "INFO"))
status = 0
KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI")
try:
# Check if at least a server has Whitelist activated
whitelist_activated = False
services = getenv("SERVER_NAME", "").strip()
if not services:
LOGGER.warning("No services found, exiting...")
sys_exit(0)
services = services.split(" ")
services_whitelist_urls = {}
# Multisite case
if getenv("MULTISITE", "no") == "yes":
for first_server in getenv("SERVER_NAME", "").split(" "):
for first_server in services:
if getenv(f"{first_server}_USE_WHITELIST", getenv("USE_WHITELIST", "yes")) == "yes":
whitelist_activated = True
break
# Get URLs
services_whitelist_urls[first_server] = {}
for kind in KINDS:
services_whitelist_urls[first_server][kind] = set()
for url in getenv(f"{first_server}_WHITELIST_{kind}_URLS", getenv(f"WHITELIST_{kind}_URLS", "")).strip().split(" "):
if url:
services_whitelist_urls[first_server][kind].add(url)
# Singlesite case
elif getenv("USE_WHITELIST", "yes") == "yes":
whitelist_activated = True
# Get URLs
services_whitelist_urls[services[0]] = {}
for kind in KINDS:
services_whitelist_urls[services[0]][kind] = set()
for url in getenv(f"WHITELIST_{kind}_URLS", "").strip().split(" "):
if url:
services_whitelist_urls[services[0]][kind].add(url)
if not whitelist_activated:
LOGGER.info("Whitelist is not activated, skipping downloads...")
sys_exit(0)
JOB = Job(LOGGER)
# Get URLs
urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []}
for kind in urls:
for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "):
if url and url not in urls[kind]:
urls[kind].append(url)
# Don't go further if the cache is fresh
kinds_fresh = {"IP": True, "RDNS": True, "ASN": True, "USER_AGENT": True, "URI": True}
for kind in kinds_fresh:
if not JOB.is_cached_file(f"{kind}.list", "hour"):
if urls[kind]:
kinds_fresh[kind] = False
LOGGER.info(f"Whitelist for {kind} is not cached, processing downloads..")
continue
LOGGER.info(f"Whitelist for {kind} is already in cache, skipping downloads...")
if not urls[kind]:
LOGGER.warning(f"Whitelist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list")
if not any(url for urls in services_whitelist_urls.values() for url in urls.values()):
LOGGER.warning("No whitelist URL is configured, nothing to do...")
if Path(JOB.job_path.joinpath("urls.json")).exists():
LOGGER.warning("Whitelist URLs are cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache("urls.json")
if not deleted:
LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}")
if all(kinds_fresh.values()):
if not any(urls.values()):
LOGGER.info("No whitelist URL is configured, nothing to do...")
LOGGER.warning(f"Couldn't delete whitelist URLs from cache : {err}")
sys_exit(0)
cached_urls = loads(JOB.get_cache("urls.json") or "{}")
tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "blacklist")
tmp_downloads.mkdir(parents=True, exist_ok=True)
downloaded_urls = {}
failed_urls = set()
current_timestamp = datetime.now().astimezone().timestamp()
# Loop on kinds
for kind, urls_list in urls.items():
if kinds_fresh[kind]:
continue
for service, kinds in services_whitelist_urls.items():
for kind, urls_list in kinds.items():
if not urls_list:
if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists():
LOGGER.warning(f"{service} whitelist for {kind} is cached but no URL is configured, removing from cache...")
deleted, err = JOB.del_cache(f"{kind}.list", service_id=service)
if not deleted:
LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}")
continue
# Write combined data of the kind in memory and check if it has changed
for url in urls_list:
try:
LOGGER.info(f"Downloading whitelist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
# Write combined data of the kind in memory and check if it has changed
content = b""
for url in urls_list:
try:
cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""})
# Check if the URL's last download timestamp is younger than 1 hour
if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds():
downloaded_urls[url] = {
"time": cached_url["time"],
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...")
failed_urls.add(url)
status = 1 if status == 1 else 0
continue
iterable = resp.iter_lines()
i = 0
content = b""
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list")
if new_hash == old_hash:
LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed")
else:
LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching whitelist : {err}")
status = 2
elif url in downloaded_urls:
LOGGER.info(f"URL {url} has already been downloaded, skipping it...")
content += Path(downloaded_urls[url]["tmp_path"]).read_bytes()
else:
status = 1
except:
LOGGER.info(f"Downloading whitelist data from {url} ...")
if url.startswith("file://"):
with open(normpath(url[7:]), "rb") as f:
iterable = f.readlines()
else:
resp = get(url, stream=True, timeout=10)
if resp.status_code != 200:
LOGGER.warning(f"Got status code {resp.status_code}, skipping...")
continue
iterable = resp.iter_lines()
i = 0
for line in iterable:
line = line.strip()
if not line or line.startswith((b"#", b";")):
continue
elif kind != "USER_AGENT":
line = line.split(b" ")[0]
ok, data = check_line(kind, line)
if ok:
content += data + b"\n"
i += 1
LOGGER.info(f"Downloaded {i} bad {kind}")
downloaded_urls[url] = {
"time": current_timestamp,
"tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(),
}
except BaseException as e:
status = 2
LOGGER.error(f"Exception while getting {service} whitelist from {url} :\n{e}")
failed_urls.add(url)
# Check if file has changed
new_hash = bytes_hash(content)
old_hash = JOB.cache_hash(f"{kind}.list", service_id=service)
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
LOGGER.error(f"Error while caching whitelist : {err}")
status = 2
LOGGER.error(f"Exception while getting whitelist from {url} :\n{format_exc()}")
continue
status = 1
cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8"))
if not cached:
LOGGER.error(f"Error while caching whitelist URLs : {err}")
rmtree(tmp_downloads, ignore_errors=True)
except SystemExit as e:
status = e.code
except:

View file

@ -69,7 +69,7 @@
"type": "text"
},
"WHITELIST_IP_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "whitelist-ip-urls",
@ -78,7 +78,7 @@
"type": "text"
},
"WHITELIST_RDNS_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "whitelist-rdns-urls",
@ -87,7 +87,7 @@
"type": "text"
},
"WHITELIST_ASN_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "whitelist-asn-urls",
@ -96,7 +96,7 @@
"type": "text"
},
"WHITELIST_USER_AGENT_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "whitelist-user-agent-urls",
@ -105,7 +105,7 @@
"type": "text"
},
"WHITELIST_URI_URLS": {
"context": "global",
"context": "multisite",
"default": "",
"help": "List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.",
"id": "whitelist-uri-urls",

View file

@ -16,6 +16,7 @@ local get_ips = utils.get_ips
local get_rdns = utils.get_rdns
local get_asn = utils.get_asn
local regex_match = utils.regex_match
local get_variable = utils.get_variable
local ipmatcher_new = ipmatcher.new
local tostring = tostring
local open = io.open
@ -26,12 +27,12 @@ function whitelist:initialize(ctx)
plugin.initialize(self, "whitelist", ctx)
-- Decode lists
if get_phase() ~= "init" and self:is_needed() then
local lists, err = self.datastore:get("plugin_whitelist_lists", true)
if not lists then
local datastore_lists, err = self.datastore:get("plugin_whitelist_lists_" .. self.ctx.bw.server_name, true)
if not datastore_lists then
self.logger:log(ERR, err)
self.lists = {}
else
self.lists = lists
self.lists = datastore_lists
end
local kinds = {
["IP"] = {},
@ -41,10 +42,10 @@ function whitelist:initialize(ctx)
["URI"] = {},
}
for kind, _ in pairs(kinds) do
if not self.lists[kind] then
self.lists[kind] = {}
end
for data in self.variables["WHITELIST_" .. kind]:gmatch("%S+") do
if not self.lists[kind] then
self.lists[kind] = {}
end
table.insert(self.lists[kind], data)
end
end
@ -73,6 +74,7 @@ function whitelist:init()
if not self:is_needed() then
return self:ret(true, "init not needed")
end
-- Read whitelists
local whitelists = {
["IP"] = {},
@ -81,21 +83,41 @@ function whitelist:init()
["USER_AGENT"] = {},
["URI"] = {},
}
local i = 0
for kind, _ in pairs(whitelists) do
local f, _ = open("/var/cache/bunkerweb/whitelist/" .. kind .. ".list", "r")
if f then
for line in f:lines() do
table.insert(whitelists[kind], line)
i = i + 1
end
f:close()
end
local server_name, err = get_variable("SERVER_NAME", false)
if not server_name then
return self:ret(false, "can't get SERVER_NAME variable : " .. err)
end
-- Load them into datastore
local ok, err = self.datastore:set("plugin_whitelist_lists", whitelists, nil, true)
if not ok then
return self:ret(false, "can't store whitelist list into datastore : " .. err)
-- Iterate over each kind and server
local i = 0
for key in server_name:gmatch("%S+") do
for kind, _ in pairs(whitelists) do
local file_path = "/var/cache/bunkerweb/whitelist/" .. key .. "/" .. kind .. ".list"
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(whitelists[kind], line)
i = i + 1
end
f:close()
end
end
-- Load service specific ones into datastore
local ok
ok, err = self.datastore:set("plugin_whitelist_lists_" .. key, whitelists, nil, true)
if not ok then
return self:ret(false, "can't store whitelist list into datastore : " .. err)
end
whitelists = {
["IP"] = {},
["RDNS"] = {},
["ASN"] = {},
["USER_AGENT"] = {},
["URI"] = {},
}
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
end