diff --git a/docs/settings.md b/docs/settings.md index 9a518f170..cf693c5ba 100644 --- a/docs/settings.md +++ b/docs/settings.md @@ -156,16 +156,16 @@ Deny access based on internal and external IP/network/rDNS/ASN blacklists. | `BLACKLIST_IGNORE_ASN` | | multisite | no | List of ASN numbers, separated with spaces, to ignore in the blacklist. | | `BLACKLIST_IGNORE_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to ignore in the blacklist. | | `BLACKLIST_IGNORE_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to ignore in the blacklist. | -| `BLACKLIST_IP_URLS` | `https://www.dan.me.uk/torlist/?exit` | global | no | List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_USER_AGENT_URLS` | `https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list` | global | no | List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_IGNORE_IP_URLS` | | global | no | List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_IGNORE_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_IGNORE_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_IGNORE_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `BLACKLIST_IGNORE_URI_URLS` | | global | no | List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IP_URLS` | `https://www.dan.me.uk/torlist/?exit` | multisite | no | List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_USER_AGENT_URLS` | `https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list` | multisite | no | List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IGNORE_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IGNORE_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IGNORE_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IGNORE_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `BLACKLIST_IGNORE_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | ## Brotli @@ -299,11 +299,11 @@ Allow access while keeping security features based on internal and external IP/n | `GREYLIST_ASN` | | multisite | no | List of ASN numbers, separated with spaces, to put into the greylist. | | `GREYLIST_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to put into the greylist. | | `GREYLIST_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to put into the greylist. | -| `GREYLIST_IP_URLS` | | global | no | List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `GREYLIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `GREYLIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `GREYLIST_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `GREYLIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `GREYLIST_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `GREYLIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `GREYLIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `GREYLIST_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `GREYLIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | ## Gzip @@ -530,7 +530,7 @@ Get real IP of clients when BunkerWeb is behind a reverse proxy / load balancer. | `REAL_IP_FROM` | `192.168.0.0/16 172.16.0.0/12 10.0.0.0/8` | multisite | no | List of trusted IPs / networks, separated with spaces, where proxied requests come from. | | `REAL_IP_HEADER` | `X-Forwarded-For` | multisite | no | HTTP header containing the real IP or special value proxy_protocol for PROXY protocol. | | `REAL_IP_RECURSIVE` | `yes` | multisite | no | Perform a recursive search in the header container IP address. | -| `REAL_IP_FROM_URLS` | | global | no | List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `REAL_IP_FROM_URLS` | | multisite | no | List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | ## Redirect @@ -715,8 +715,8 @@ Allow access based on internal and external IP/network/rDNS/ASN whitelists. | `WHITELIST_ASN` | `32934` | multisite | no | List of ASN numbers, separated with spaces, to whitelist. | | `WHITELIST_USER_AGENT` | | multisite | no | List of User-Agent (PCRE regex), separated with spaces, to whitelist. | | `WHITELIST_URI` | | multisite | no | List of URI (PCRE regex), separated with spaces, to whitelist. | -| `WHITELIST_IP_URLS` | | global | no | List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `WHITELIST_RDNS_URLS` | | global | no | List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `WHITELIST_ASN_URLS` | | global | no | List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `WHITELIST_USER_AGENT_URLS` | | global | no | List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | -| `WHITELIST_URI_URLS` | | global | no | List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `WHITELIST_IP_URLS` | | multisite | no | List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `WHITELIST_RDNS_URLS` | | multisite | no | List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `WHITELIST_ASN_URLS` | | multisite | no | List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `WHITELIST_USER_AGENT_URLS` | | multisite | no | List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | +| `WHITELIST_URI_URLS` | | multisite | no | List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme. | diff --git a/src/common/core/blacklist/blacklist.lua b/src/common/core/blacklist/blacklist.lua index 4eae36a50..f531125c6 100644 --- a/src/common/core/blacklist/blacklist.lua +++ b/src/common/core/blacklist/blacklist.lua @@ -13,6 +13,7 @@ local get_deny_status = utils.get_deny_status local get_rdns = utils.get_rdns local get_asn = utils.get_asn local regex_match = utils.regex_match +local get_variable = utils.get_variable local ipmatcher_new = ipmatcher.new local tostring = tostring local open = io.open @@ -22,12 +23,12 @@ function blacklist:initialize(ctx) plugin.initialize(self, "blacklist", ctx) -- Decode lists if get_phase() ~= "init" and self:is_needed() then - local lists, err = self.datastore:get("plugin_blacklist_lists", true) - if not lists then + local datastore_lists, err = self.datastore:get("plugin_blacklist_lists_" .. self.ctx.bw.server_name, true) + if not datastore_lists then self.logger:log(ERR, err) self.lists = {} else - self.lists = lists + self.lists = datastore_lists end local kinds = { ["IP"] = {}, @@ -42,10 +43,10 @@ function blacklist:initialize(ctx) ["IGNORE_URI"] = {}, } for kind, _ in pairs(kinds) do + if not self.lists[kind] then + self.lists[kind] = {} + end for data in self.variables["BLACKLIST_" .. kind]:gmatch("%S+") do - if not self.lists[kind] then - self.lists[kind] = {} - end table.insert(self.lists[kind], data) end end @@ -70,7 +71,7 @@ function blacklist:is_needed() end function blacklist:init() - -- Check if init needed + -- Check if init is needed if not self:is_needed() then return self:ret(true, "init not needed") end @@ -88,21 +89,46 @@ function blacklist:init() ["IGNORE_USER_AGENT"] = {}, ["IGNORE_URI"] = {}, } - local i = 0 - for kind, _ in pairs(blacklists) do - local f, _ = open("/var/cache/bunkerweb/blacklist/" .. kind .. ".list", "r") - if f then - for line in f:lines() do - table.insert(blacklists[kind], line) - i = i + 1 - end - f:close() - end + + local server_name, err = get_variable("SERVER_NAME", false) + if not server_name then + return self:ret(false, "can't get SERVER_NAME variable : " .. err) end - -- Load them into datastore - local ok, err = self.datastore:set("plugin_blacklist_lists", blacklists, nil, true) - if not ok then - return self:ret(false, "can't store blacklist list into datastore : " .. err) + + -- Iterate over each kind and server + local i = 0 + for key in server_name:gmatch("%S+") do + for kind, _ in pairs(blacklists) do + local file_path = "/var/cache/bunkerweb/blacklist/" .. key .. "/" .. kind .. ".list" + local f = open(file_path, "r") + if f then + for line in f:lines() do + table.insert(blacklists[kind], line) + i = i + 1 + end + f:close() + end + end + + -- Load service specific ones into datastore + local ok + ok, err = self.datastore:set("plugin_blacklist_lists_" .. key, blacklists, nil, true) + if not ok then + return self:ret(false, "can't store blacklist list into datastore : " .. err) + end + + blacklists = { + ["IP"] = {}, + ["RDNS"] = {}, + ["ASN"] = {}, + ["USER_AGENT"] = {}, + ["URI"] = {}, + ["IGNORE_IP"] = {}, + ["IGNORE_RDNS"] = {}, + ["IGNORE_ASN"] = {}, + ["IGNORE_USER_AGENT"] = {}, + ["IGNORE_URI"] = {}, + } end return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI") end diff --git a/src/common/core/blacklist/jobs/blacklist-download.py b/src/common/core/blacklist/jobs/blacklist-download.py index 11ab47513..21dc085c7 100644 --- a/src/common/core/blacklist/jobs/blacklist-download.py +++ b/src/common/core/blacklist/jobs/blacklist-download.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 from contextlib import suppress +from datetime import datetime, timedelta from ipaddress import ip_address, ip_network +from json import dumps, loads from os import getenv, sep from os.path import join, normpath +from pathlib import Path from re import compile as re_compile +from shutil import rmtree from sys import exit as sys_exit, path as sys_path from traceback import format_exc from typing import Tuple @@ -53,130 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]: LOGGER = setup_logger("BLACKLIST", getenv("LOG_LEVEL", "INFO")) status = 0 +KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI", "IGNORE_IP", "IGNORE_RDNS", "IGNORE_ASN", "IGNORE_USER_AGENT", "IGNORE_URI") + try: # Check if at least a server has Blacklist activated blacklist_activated = False + + services = getenv("SERVER_NAME", "").strip() + + if not services: + LOGGER.warning("No services found, exiting...") + sys_exit(0) + + services = services.split(" ") + services_blacklist_urls = {} + # Multisite case if getenv("MULTISITE", "no") == "yes": - for first_server in getenv("SERVER_NAME", "").split(" "): + for first_server in services: if getenv(f"{first_server}_USE_BLACKLIST", getenv("USE_BLACKLIST", "yes")) == "yes": blacklist_activated = True - break + + # Get URLs + services_blacklist_urls[first_server] = {} + for kind in KINDS: + services_blacklist_urls[first_server][kind] = set() + for url in getenv(f"{first_server}_BLACKLIST_{kind}_URLS", getenv(f"BLACKLIST_{kind}_URLS", "")).strip().split(" "): + if url: + services_blacklist_urls[first_server][kind].add(url) # Singlesite case elif getenv("USE_BLACKLIST", "yes") == "yes": blacklist_activated = True + # Get URLs + services_blacklist_urls[services[0]] = {} + for kind in KINDS: + services_blacklist_urls[services[0]][kind] = set() + for url in getenv(f"BLACKLIST_{kind}_URLS", "").strip().split(" "): + if url: + services_blacklist_urls[services[0]][kind].add(url) + if not blacklist_activated: LOGGER.info("Blacklist is not activated, skipping downloads...") sys_exit(0) JOB = Job(LOGGER) - # Get URLs - urls = { - "IP": [], - "RDNS": [], - "ASN": [], - "USER_AGENT": [], - "URI": [], - "IGNORE_IP": [], - "IGNORE_RDNS": [], - "IGNORE_ASN": [], - "IGNORE_USER_AGENT": [], - "IGNORE_URI": [], - } - for kind in urls: - for url in getenv(f"BLACKLIST_{kind}_URLS", "").split(" "): - if url and url not in urls[kind]: - urls[kind].append(url) - - # Don't go further if the cache is fresh - kinds_fresh = { - "IP": True, - "RDNS": True, - "ASN": True, - "USER_AGENT": True, - "URI": True, - "IGNORE_IP": True, - "IGNORE_RDNS": True, - "IGNORE_ASN": True, - "IGNORE_USER_AGENT": True, - "IGNORE_URI": True, - } - for kind in kinds_fresh: - if not JOB.is_cached_file(f"{kind}.list", "hour"): - if urls[kind]: - kinds_fresh[kind] = False - LOGGER.info(f"Blacklist for {kind} is not cached, processing downloads..") - continue - - LOGGER.info(f"Blacklist for {kind} is already in cache, skipping downloads...") - - if not urls[kind]: - LOGGER.warning(f"Blacklist for {kind} is cached but no URL is configured, removing from cache...") - deleted, err = JOB.del_cache(f"{kind}.list") + if not any(url for urls in services_blacklist_urls.values() for url in urls.values()): + LOGGER.warning("No blacklist URL is configured, nothing to do...") + if Path(JOB.job_path.joinpath("urls.json")).exists(): + LOGGER.warning("Blacklist URLs are cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache("urls.json") if not deleted: - LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}") - - if all(kinds_fresh.values()): - if not any(urls.values()): - LOGGER.info("No blacklist URL is configured, nothing to do...") + LOGGER.warning(f"Couldn't delete blacklist URLs from cache : {err}") sys_exit(0) + cached_urls = loads(JOB.get_cache("urls.json") or "{}") + + tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "blacklist") + tmp_downloads.mkdir(parents=True, exist_ok=True) + downloaded_urls = {} + failed_urls = set() + current_timestamp = datetime.now().astimezone().timestamp() + # Loop on kinds - for kind, urls_list in urls.items(): - if kinds_fresh[kind]: - continue + for service, kinds in services_blacklist_urls.items(): + for kind, urls_list in kinds.items(): + if not urls_list: + if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists(): + LOGGER.warning(f"{service} blacklist for {kind} is cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache(f"{kind}.list", service_id=service) + if not deleted: + LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}") + continue - # Write combined data of the kind in memory and check if it has changed - for url in urls_list: - try: - LOGGER.info(f"Downloading blacklist data from {url} ...") - if url.startswith("file://"): - with open(normpath(url[7:]), "rb") as f: - iterable = f.readlines() - else: - resp = get(url, stream=True, timeout=10) - - if resp.status_code != 200: - LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + # Write combined data of the kind in memory and check if it has changed + content = b"" + for url in urls_list: + try: + cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""}) + # Check if the URL's last download timestamp is younger than 1 hour + if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds(): + downloaded_urls[url] = { + "time": cached_url["time"], + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...") + failed_urls.add(url) + status = 1 if status == 1 else 0 continue - iterable = resp.iter_lines() - - i = 0 - content = b"" - for line in iterable: - line = line.strip() - - if not line or line.startswith((b"#", b";")): + # Check if the URL has already been downloaded + if url in failed_urls: continue - elif kind != "USER_AGENT": - line = line.split(b" ")[0] - - ok, data = check_line(kind, line) - if ok: - content += data + b"\n" - i += 1 - - LOGGER.info(f"Downloaded {i} bad {kind}") - # Check if file has changed - new_hash = bytes_hash(content) - old_hash = JOB.cache_hash(f"{kind}.list") - if new_hash == old_hash: - LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed") - else: - LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed") - # Put file in cache - cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash) - if not cached: - LOGGER.error(f"Error while caching blacklist : {err}") - status = 2 + elif url in downloaded_urls: + LOGGER.info(f"URL {url} has already been downloaded, skipping it...") + content += Path(downloaded_urls[url]["tmp_path"]).read_bytes() else: - status = 1 - except: + LOGGER.info(f"Downloading blacklist data from {url} ...") + if url.startswith("file://"): + with open(normpath(url[7:]), "rb") as f: + iterable = f.readlines() + else: + resp = get(url, stream=True, timeout=10) + + if resp.status_code != 200: + LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + continue + + iterable = resp.iter_lines() + + i = 0 + for line in iterable: + line = line.strip() + + if not line or line.startswith((b"#", b";")): + continue + elif kind != "USER_AGENT": + line = line.split(b" ")[0] + + ok, data = check_line(kind, line) + if ok: + content += data + b"\n" + i += 1 + + LOGGER.info(f"Downloaded {i} bad {kind}") + downloaded_urls[url] = { + "time": current_timestamp, + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + except BaseException as e: + status = 2 + LOGGER.error(f"Exception while getting {service} blacklist from {url} :\n{e}") + failed_urls.add(url) + + # Check if file has changed + new_hash = bytes_hash(content) + old_hash = JOB.cache_hash(f"{kind}.list", service_id=service) + if new_hash == old_hash: + LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed") + continue + + LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed") + # Put file in cache + cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash) + if not cached: + LOGGER.error(f"Error while caching blacklist : {err}") status = 2 - LOGGER.error(f"Exception while getting blacklist from {url} :\n{format_exc()}") + continue + + status = 1 + + cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8")) + if not cached: + LOGGER.error(f"Error while caching blacklist URLs : {err}") + + rmtree(tmp_downloads, ignore_errors=True) except SystemExit as e: status = e.code except: diff --git a/src/common/core/blacklist/plugin.json b/src/common/core/blacklist/plugin.json index 82a863114..2fc7e12df 100644 --- a/src/common/core/blacklist/plugin.json +++ b/src/common/core/blacklist/plugin.json @@ -114,7 +114,7 @@ "type": "text" }, "BLACKLIST_IP_URLS": { - "context": "global", + "context": "multisite", "default": "https://www.dan.me.uk/torlist/?exit", "help": "List of URLs, separated with spaces, containing bad IP/network to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ip-urls", @@ -123,7 +123,7 @@ "type": "text" }, "BLACKLIST_RDNS_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing reverse DNS suffixes to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-rdns-urls", @@ -132,7 +132,7 @@ "type": "text" }, "BLACKLIST_ASN_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing ASN to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-asn-urls", @@ -141,7 +141,7 @@ "type": "text" }, "BLACKLIST_USER_AGENT_URLS": { - "context": "global", + "context": "multisite", "default": "https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list", "help": "List of URLs, separated with spaces, containing bad User-Agent to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-user-agent-urls", @@ -150,7 +150,7 @@ "type": "text" }, "BLACKLIST_URI_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing bad URI to block. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-uri-urls", @@ -159,7 +159,7 @@ "type": "text" }, "BLACKLIST_IGNORE_IP_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing IP/network to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ignore-ip-urls", @@ -168,7 +168,7 @@ "type": "text" }, "BLACKLIST_IGNORE_RDNS_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing reverse DNS suffixes to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ignore-rdns-urls", @@ -177,7 +177,7 @@ "type": "text" }, "BLACKLIST_IGNORE_ASN_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing ASN to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ignore-asn-urls", @@ -186,7 +186,7 @@ "type": "text" }, "BLACKLIST_IGNORE_USER_AGENT_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing User-Agent to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ignore-user-agent-urls", @@ -195,7 +195,7 @@ "type": "text" }, "BLACKLIST_IGNORE_URI_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing URI to ignore in the blacklist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "blacklist-ignore-uri-urls", diff --git a/src/common/core/greylist/greylist.lua b/src/common/core/greylist/greylist.lua index 5b0e46491..d6d55b417 100644 --- a/src/common/core/greylist/greylist.lua +++ b/src/common/core/greylist/greylist.lua @@ -13,6 +13,7 @@ local get_deny_status = utils.get_deny_status local get_rdns = utils.get_rdns local get_asn = utils.get_asn local regex_match = utils.regex_match +local get_variable = utils.get_variable local ipmatcher_new = ipmatcher.new local tostring = tostring local open = io.open @@ -22,12 +23,12 @@ function greylist:initialize(ctx) plugin.initialize(self, "greylist", ctx) -- Decode lists if get_phase() ~= "init" and self:is_needed() then - local lists, err = self.datastore:get("plugin_greylist_lists", true) - if not lists then + local datastore_lists, err = self.datastore:get("plugin_greylist_lists_" .. self.ctx.bw.server_name, true) + if not datastore_lists then self.logger:log(ERR, err) self.lists = {} else - self.lists = lists + self.lists = datastore_lists end local kinds = { ["IP"] = {}, @@ -37,10 +38,10 @@ function greylist:initialize(ctx) ["URI"] = {}, } for kind, _ in pairs(kinds) do + if not self.lists[kind] then + self.lists[kind] = {} + end for data in self.variables["GREYLIST_" .. kind]:gmatch("%S+") do - if not self.lists[kind] then - self.lists[kind] = {} - end table.insert(self.lists[kind], data) end end @@ -65,10 +66,11 @@ function greylist:is_needed() end function greylist:init() - -- Check if init needed + -- Check if init is needed if not self:is_needed() then return self:ret(true, "init not needed") end + -- Read greylists local greylists = { ["IP"] = {}, @@ -77,23 +79,43 @@ function greylist:init() ["USER_AGENT"] = {}, ["URI"] = {}, } + + local server_name, err = get_variable("SERVER_NAME", false) + if not server_name then + return self:ret(false, "can't get SERVER_NAME variable : " .. err) + end + + -- Iterate over each kind and server local i = 0 - for kind, _ in pairs(greylists) do - local f, _ = open("/var/cache/bunkerweb/greylist/" .. kind .. ".list", "r") - if f then - for line in f:lines() do - table.insert(greylists[kind], line) - i = i + 1 + for key in server_name:gmatch("%S+") do + for kind, _ in pairs(greylists) do + local file_path = "/var/cache/bunkerweb/greylist/" .. key .. "/" .. kind .. ".list" + local f = open(file_path, "r") + if f then + for line in f:lines() do + table.insert(greylists[kind], line) + i = i + 1 + end + f:close() end - f:close() end + + -- Load service specific ones into datastore + local ok + ok, err = self.datastore:set("plugin_greylist_lists_" .. key, greylists, nil, true) + if not ok then + return self:ret(false, "can't store greylist list into datastore : " .. err) + end + + greylists = { + ["IP"] = {}, + ["RDNS"] = {}, + ["ASN"] = {}, + ["USER_AGENT"] = {}, + ["URI"] = {}, + } end - -- Load them into datastore - local ok, err = self.datastore:set("plugin_greylist_lists", greylists, nil, true) - if not ok then - return self:ret(false, "can't store greylist list into datastore : " .. err) - end - return self:ret(true, "successfully loaded " .. tostring(i) .. " bad IP/network/rDNS/ASN/User-Agent/URI") + return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI") end function greylist:access() diff --git a/src/common/core/greylist/jobs/greylist-download.py b/src/common/core/greylist/jobs/greylist-download.py index f4f4fb016..49ce559cf 100644 --- a/src/common/core/greylist/jobs/greylist-download.py +++ b/src/common/core/greylist/jobs/greylist-download.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 from contextlib import suppress +from datetime import datetime, timedelta from ipaddress import ip_address, ip_network +from json import dumps, loads from os import getenv, sep from os.path import join, normpath +from pathlib import Path from re import compile as re_compile +from shutil import rmtree from sys import exit as sys_exit, path as sys_path from traceback import format_exc from typing import Tuple @@ -53,108 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]: LOGGER = setup_logger("GREYLIST", getenv("LOG_LEVEL", "INFO")) status = 0 +KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI") + try: # Check if at least a server has Greylist activated greylist_activated = False + + services = getenv("SERVER_NAME", "").strip() + + if not services: + LOGGER.warning("No services found, exiting...") + sys_exit(0) + + services = services.split(" ") + services_greylist_urls = {} + # Multisite case if getenv("MULTISITE", "no") == "yes": - for first_server in getenv("SERVER_NAME", "").split(" "): + for first_server in services: if getenv(f"{first_server}_USE_GREYLIST", getenv("USE_GREYLIST", "no")) == "yes": greylist_activated = True - break + + # Get URLs + services_greylist_urls[first_server] = {} + for kind in KINDS: + services_greylist_urls[first_server][kind] = set() + for url in getenv(f"{first_server}_GREYLIST_{kind}_URLS", getenv(f"GREYLIST_{kind}_URLS", "")).strip().split(" "): + if url: + services_greylist_urls[first_server][kind].add(url) # Singlesite case elif getenv("USE_GREYLIST", "no") == "yes": greylist_activated = True + # Get URLs + services_greylist_urls[services[0]] = {} + for kind in KINDS: + services_greylist_urls[services[0]][kind] = set() + for url in getenv(f"GREYLIST_{kind}_URLS", "").strip().split(" "): + if url: + services_greylist_urls[services[0]][kind].add(url) + if not greylist_activated: LOGGER.info("Greylist is not activated, skipping downloads...") sys_exit(0) JOB = Job(LOGGER) - # Get URLs - urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []} - for kind in urls: - for url in getenv(f"GREYLIST_{kind}_URLS", "").split(" "): - if url and url not in urls[kind]: - urls[kind].append(url) - - # Don't go further if the cache is fresh - kinds_fresh = {"IP": True, "RDNS": True, "ASN": True, "USER_AGENT": True, "URI": True} - for kind in kinds_fresh: - if not JOB.is_cached_file(f"{kind}.list", "hour"): - if urls[kind]: - kinds_fresh[kind] = False - LOGGER.info(f"Greylist for {kind} is not cached, processing downloads..") - continue - - LOGGER.info(f"Greylist for {kind} is already in cache, skipping downloads...") - - if not urls[kind]: - LOGGER.warning(f"Greylist for {kind} is cached but no URL is configured, removing from cache...") - deleted, err = JOB.del_cache(f"{kind}.list") + if not any(url for urls in services_greylist_urls.values() for url in urls.values()): + LOGGER.warning("No greylist URL is configured, nothing to do...") + if Path(JOB.job_path.joinpath("urls.json")).exists(): + LOGGER.warning("Greylist URLs are cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache("urls.json") if not deleted: - LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}") - - if all(kinds_fresh.values()): - if not any(urls.values()): - LOGGER.info("No greylist URL is configured, nothing to do...") + LOGGER.warning(f"Couldn't delete greylist URLs from cache : {err}") sys_exit(0) + cached_urls = loads(JOB.get_cache("urls.json") or "{}") + + tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "greylist") + tmp_downloads.mkdir(parents=True, exist_ok=True) + downloaded_urls = {} + failed_urls = set() + current_timestamp = datetime.now().astimezone().timestamp() + # Loop on kinds - for kind, urls_list in urls.items(): - if kinds_fresh[kind]: - continue + for service, kinds in services_greylist_urls.items(): + for kind, urls_list in kinds.items(): + if not urls_list: + if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists(): + LOGGER.warning(f"{service} greylist for {kind} is cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache(f"{kind}.list", service_id=service) + if not deleted: + LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}") + continue - # Write combined data of the kind in memory and check if it has changed - for url in urls_list: - try: - LOGGER.info(f"Downloading greylist data from {url} ...") - if url.startswith("file://"): - with open(normpath(url[7:]), "rb") as f: - iterable = f.readlines() - else: - resp = get(url, stream=True, timeout=10) - - if resp.status_code != 200: - LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + # Write combined data of the kind in memory and check if it has changed + content = b"" + for url in urls_list: + try: + cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""}) + # Check if the URL's last download timestamp is younger than 1 hour + if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds(): + downloaded_urls[url] = { + "time": cached_url["time"], + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...") + failed_urls.add(url) + status = 1 if status == 1 else 0 continue - iterable = resp.iter_lines() - - i = 0 - content = b"" - for line in iterable: - line = line.strip() - - if not line or line.startswith((b"#", b";")): + # Check if the URL has already been downloaded + if url in failed_urls: continue - elif kind != "USER_AGENT": - line = line.split(b" ")[0] - - ok, data = check_line(kind, line) - if ok: - content += data + b"\n" - i += 1 - - LOGGER.info(f"Downloaded {i} bad {kind}") - # Check if file has changed - new_hash = bytes_hash(content) - old_hash = JOB.cache_hash(f"{kind}.list") - if new_hash == old_hash: - LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed") - else: - LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed") - # Put file in cache - cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash) - if not cached: - LOGGER.error(f"Error while caching greylist : {err}") - status = 2 + elif url in downloaded_urls: + LOGGER.info(f"URL {url} has already been downloaded, skipping it...") + content += Path(downloaded_urls[url]["tmp_path"]).read_bytes() else: - status = 1 - except: + LOGGER.info(f"Downloading greylist data from {url} ...") + if url.startswith("file://"): + with open(normpath(url[7:]), "rb") as f: + iterable = f.readlines() + else: + resp = get(url, stream=True, timeout=10) + + if resp.status_code != 200: + LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + continue + + iterable = resp.iter_lines() + + i = 0 + for line in iterable: + line = line.strip() + + if not line or line.startswith((b"#", b";")): + continue + elif kind != "USER_AGENT": + line = line.split(b" ")[0] + + ok, data = check_line(kind, line) + if ok: + content += data + b"\n" + i += 1 + + LOGGER.info(f"Downloaded {i} bad {kind}") + downloaded_urls[url] = { + "time": current_timestamp, + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + except BaseException as e: + status = 2 + LOGGER.error(f"Exception while getting {service} greylist from {url} :\n{e}") + failed_urls.add(url) + + # Check if file has changed + new_hash = bytes_hash(content) + old_hash = JOB.cache_hash(f"{kind}.list", service_id=service) + if new_hash == old_hash: + LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed") + continue + + LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed") + # Put file in cache + cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash) + if not cached: + LOGGER.error(f"Error while caching greylist : {err}") status = 2 - LOGGER.error(f"Exception while getting greylist from {url} :\n{format_exc()}") + continue + + status = 1 + + cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8")) + if not cached: + LOGGER.error(f"Error while caching greylist URLs : {err}") + + rmtree(tmp_downloads, ignore_errors=True) except SystemExit as e: status = e.code except: diff --git a/src/common/core/greylist/plugin.json b/src/common/core/greylist/plugin.json index bd17d8689..f818daddb 100644 --- a/src/common/core/greylist/plugin.json +++ b/src/common/core/greylist/plugin.json @@ -69,7 +69,7 @@ "type": "text" }, "GREYLIST_IP_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing good IP/network to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "greylist-ip-urls", @@ -78,7 +78,7 @@ "type": "text" }, "GREYLIST_RDNS_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing reverse DNS suffixes to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "greylist-rdns-urls", @@ -87,7 +87,7 @@ "type": "text" }, "GREYLIST_ASN_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing ASN to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "greylist-asn-urls", @@ -96,7 +96,7 @@ "type": "text" }, "GREYLIST_USER_AGENT_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing good User-Agent to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "greylist-user-agent-urls", @@ -105,7 +105,7 @@ "type": "text" }, "GREYLIST_URI_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing bad URI to put into the greylist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "greylist-uri-urls", diff --git a/src/common/core/realip/jobs/realip-download.py b/src/common/core/realip/jobs/realip-download.py index 3de6dad6f..c50772042 100644 --- a/src/common/core/realip/jobs/realip-download.py +++ b/src/common/core/realip/jobs/realip-download.py @@ -1,9 +1,13 @@ #!/usr/bin/env python3 from contextlib import suppress +from datetime import datetime, timedelta from ipaddress import ip_address, ip_network +from json import dumps, loads from os import getenv, sep from os.path import join, normpath +from pathlib import Path +from shutil import rmtree from sys import exit as sys_exit, path as sys_path from traceback import format_exc @@ -36,93 +40,153 @@ status = 0 try: # Check if at least a server has Realip activated realip_activated = False + + # Check if at least a server has Greylist activated + greylist_activated = False + + services = getenv("SERVER_NAME", "").strip() + + if not services: + LOGGER.warning("No services found, exiting...") + sys_exit(0) + + services = services.split(" ") + services_realip_urls = {} + # Multisite case if getenv("MULTISITE", "no") == "yes": - servers = getenv("SERVER_NAME", []) - - if isinstance(servers, str): - servers = servers.split(" ") - - for first_server in servers: + for first_server in services: if getenv(f"{first_server}_USE_REAL_IP", getenv("USE_REAL_IP", "no")) == "yes": realip_activated = True - break + # Get URLs + services_realip_urls[first_server] = set() + for url in getenv(f"{first_server}_REAL_IP_FROM_URLS", getenv("REAL_IP_FROM_URLS", "")).strip().split(" "): + if url: + services_realip_urls[first_server].add(url) # Singlesite case elif getenv("USE_REAL_IP", "no") == "yes": realip_activated = True + # Get URLs + services_realip_urls[services[0]] = set() + for url in getenv("REAL_IP_FROM_URLS", "").strip().split(" "): + if url: + services_realip_urls[services[0]].add(url) + if not realip_activated: LOGGER.info("RealIP is not activated, skipping download...") sys_exit(0) JOB = Job(LOGGER) - # Get URLs - urls = [url for url in getenv("REAL_IP_FROM_URLS", "").split(" ") if url] - - # Don't go further if the cache is fresh - if JOB.is_cached_file("combined.list", "hour"): - LOGGER.info("RealIP list is already in cache, skipping download...") - if not urls: - LOGGER.warning("No URL found, deleting combined.list from cache...") - deleted, err = JOB.del_cache("combined.list") + if not any(services_realip_urls.values()): + LOGGER.warning("No URL configured, nothing to do...") + if Path(JOB.job_path.joinpath("urls.json")).exists(): + LOGGER.warning("RealIP URLs are cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache("urls.json") if not deleted: - LOGGER.warning(f"Couldn't delete combined.list from cache : {err}") + LOGGER.warning(f"Couldn't delete realip URLs from cache : {err}") sys_exit(0) - if not urls: - LOGGER.info("No URL found, skipping download...") - sys_exit(0) + cached_urls = loads(JOB.get_cache("urls.json") or "{}") - # Download and write data to temp file - i = 0 - content = b"" - for url in urls: - try: - LOGGER.info(f"Downloading RealIP list from {url} ...") - if url.startswith("file://"): - with open(normpath(url[7:]), "rb") as f: - iterable = f.readlines() - else: - resp = get(url, stream=True, timeout=10) + tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "realip") + tmp_downloads.mkdir(parents=True, exist_ok=True) + downloaded_urls = {} + failed_urls = set() + current_timestamp = datetime.now().astimezone().timestamp() - if resp.status_code != 200: - LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + for service, urls in services_realip_urls.items(): + if not urls: + if Path(JOB.job_path.joinpath(service, "combined.list")).exists(): + LOGGER.warning(f"{service} realip combined.list is cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache("combined.list", service_id=service) + if not deleted: + LOGGER.warning(f"Couldn't delete {service} combined.list from cache : {err}") + continue + + # Write combined data of the kind in memory and check if it has changed + content = b"" + for url in urls: + try: + cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""}) + # Check if the URL's last download timestamp is younger than 1 hour + if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds(): + downloaded_urls[url] = { + "time": cached_url["time"], + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...") + failed_urls.add(url) + status = 1 if status == 1 else 0 continue - iterable = resp.iter_lines() - - for line in iterable: - line = line.strip().split(b" ")[0] - - if not line or line.startswith((b"#", b";")): + # Check if the URL has already been downloaded + if url in failed_urls: continue + elif url in downloaded_urls: + LOGGER.info(f"URL {url} has already been downloaded, skipping it...") + content += Path(downloaded_urls[url]["tmp_path"]).read_bytes() + else: + LOGGER.info(f"Downloading realip data from {url} ...") + if url.startswith("file://"): + with open(normpath(url[7:]), "rb") as f: + iterable = f.readlines() + else: + resp = get(url, stream=True, timeout=10) - ok, data = check_line(line) - if ok: - content += data + b"\n" - i += 1 - except: + if resp.status_code != 200: + LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + continue + + iterable = resp.iter_lines() + + i = 0 + for line in iterable: + line = line.strip().split(b" ")[0] + + if not line or line.startswith((b"#", b";")): + continue + + ok, data = check_line(line) + if ok: + content += data + b"\n" + i += 1 + + LOGGER.info(f"Downloaded {i} realip from {url}") + tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").write_bytes(content) + downloaded_urls[url] = { + "time": current_timestamp, + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + except BaseException as e: + status = 2 + LOGGER.error(f"Exception while getting {service} realip from {url} :\n{e}") + failed_urls.add(url) + + # Check if file has changed + new_hash = bytes_hash(content) + old_hash = JOB.cache_hash("combined.list", service_id=service) + if new_hash == old_hash: + LOGGER.info(f"New {service} file combined.list is identical to cache file, reload is not needed") + continue + + LOGGER.info(f"New {service} file combined.list is different than cache file, reload is needed") + # Put file in cache + cached, err = JOB.cache_file("combined.list", content, service_id=service, checksum=new_hash) + if not cached: + LOGGER.error(f"Error while caching realip : {err}") status = 2 - LOGGER.error(f"Exception while getting RealIP list from {url} :\n{format_exc()}") + continue - # Check if file has changed - new_hash = bytes_hash(content) - old_hash = JOB.cache_hash("combined.list") - if new_hash == old_hash: - LOGGER.info("New file is identical to cache file, reload is not needed") - sys_exit(0) + status = 1 - # Put file in cache - cached, err = JOB.cache_file("combined.list", content, checksum=new_hash) + cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8")) if not cached: - LOGGER.error(f"Error while caching list : {err}") - sys_exit(2) + LOGGER.error(f"Error while caching whitelist URLs : {err}") - LOGGER.info(f"Downloaded {i} trusted IP/net") - - status = 1 + rmtree(tmp_downloads, ignore_errors=True) except SystemExit as e: status = e.code except: diff --git a/src/common/core/realip/plugin.json b/src/common/core/realip/plugin.json index 26b480f47..2985dec48 100644 --- a/src/common/core/realip/plugin.json +++ b/src/common/core/realip/plugin.json @@ -51,7 +51,7 @@ "type": "check" }, "REAL_IP_FROM_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs containing trusted IPs / networks, separated with spaces, where proxied requests come from. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "real-ip-from-urls", diff --git a/src/common/core/whitelist/jobs/whitelist-download.py b/src/common/core/whitelist/jobs/whitelist-download.py index 8f6a0ec12..3a68b523f 100644 --- a/src/common/core/whitelist/jobs/whitelist-download.py +++ b/src/common/core/whitelist/jobs/whitelist-download.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 from contextlib import suppress +from datetime import datetime, timedelta from ipaddress import ip_address, ip_network +from json import dumps, loads from os import getenv, sep from os.path import join, normpath +from pathlib import Path from re import compile as re_compile +from shutil import rmtree from sys import exit as sys_exit, path as sys_path from traceback import format_exc from typing import Tuple @@ -53,108 +57,162 @@ def check_line(kind: str, line: bytes) -> Tuple[bool, bytes]: LOGGER = setup_logger("WHITELIST", getenv("LOG_LEVEL", "INFO")) status = 0 +KINDS = ("IP", "RDNS", "ASN", "USER_AGENT", "URI") + try: # Check if at least a server has Whitelist activated whitelist_activated = False + + services = getenv("SERVER_NAME", "").strip() + + if not services: + LOGGER.warning("No services found, exiting...") + sys_exit(0) + + services = services.split(" ") + services_whitelist_urls = {} + # Multisite case if getenv("MULTISITE", "no") == "yes": - for first_server in getenv("SERVER_NAME", "").split(" "): + for first_server in services: if getenv(f"{first_server}_USE_WHITELIST", getenv("USE_WHITELIST", "yes")) == "yes": whitelist_activated = True - break + + # Get URLs + services_whitelist_urls[first_server] = {} + for kind in KINDS: + services_whitelist_urls[first_server][kind] = set() + for url in getenv(f"{first_server}_WHITELIST_{kind}_URLS", getenv(f"WHITELIST_{kind}_URLS", "")).strip().split(" "): + if url: + services_whitelist_urls[first_server][kind].add(url) # Singlesite case elif getenv("USE_WHITELIST", "yes") == "yes": whitelist_activated = True + # Get URLs + services_whitelist_urls[services[0]] = {} + for kind in KINDS: + services_whitelist_urls[services[0]][kind] = set() + for url in getenv(f"WHITELIST_{kind}_URLS", "").strip().split(" "): + if url: + services_whitelist_urls[services[0]][kind].add(url) + if not whitelist_activated: LOGGER.info("Whitelist is not activated, skipping downloads...") sys_exit(0) JOB = Job(LOGGER) - # Get URLs - urls = {"IP": [], "RDNS": [], "ASN": [], "USER_AGENT": [], "URI": []} - for kind in urls: - for url in getenv(f"WHITELIST_{kind}_URLS", "").split(" "): - if url and url not in urls[kind]: - urls[kind].append(url) - - # Don't go further if the cache is fresh - kinds_fresh = {"IP": True, "RDNS": True, "ASN": True, "USER_AGENT": True, "URI": True} - for kind in kinds_fresh: - if not JOB.is_cached_file(f"{kind}.list", "hour"): - if urls[kind]: - kinds_fresh[kind] = False - LOGGER.info(f"Whitelist for {kind} is not cached, processing downloads..") - continue - - LOGGER.info(f"Whitelist for {kind} is already in cache, skipping downloads...") - - if not urls[kind]: - LOGGER.warning(f"Whitelist for {kind} is cached but no URL is configured, removing from cache...") - deleted, err = JOB.del_cache(f"{kind}.list") + if not any(url for urls in services_whitelist_urls.values() for url in urls.values()): + LOGGER.warning("No whitelist URL is configured, nothing to do...") + if Path(JOB.job_path.joinpath("urls.json")).exists(): + LOGGER.warning("Whitelist URLs are cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache("urls.json") if not deleted: - LOGGER.warning(f"Couldn't delete {kind}.list from cache : {err}") - - if all(kinds_fresh.values()): - if not any(urls.values()): - LOGGER.info("No whitelist URL is configured, nothing to do...") + LOGGER.warning(f"Couldn't delete whitelist URLs from cache : {err}") sys_exit(0) + cached_urls = loads(JOB.get_cache("urls.json") or "{}") + + tmp_downloads = Path(sep, "var", "tmp", "bunkerweb", "blacklist") + tmp_downloads.mkdir(parents=True, exist_ok=True) + downloaded_urls = {} + failed_urls = set() + current_timestamp = datetime.now().astimezone().timestamp() + # Loop on kinds - for kind, urls_list in urls.items(): - if kinds_fresh[kind]: - continue + for service, kinds in services_whitelist_urls.items(): + for kind, urls_list in kinds.items(): + if not urls_list: + if Path(JOB.job_path.joinpath(service, f"{kind}.list")).exists(): + LOGGER.warning(f"{service} whitelist for {kind} is cached but no URL is configured, removing from cache...") + deleted, err = JOB.del_cache(f"{kind}.list", service_id=service) + if not deleted: + LOGGER.warning(f"Couldn't delete {service} {kind}.list from cache : {err}") + continue - # Write combined data of the kind in memory and check if it has changed - for url in urls_list: - try: - LOGGER.info(f"Downloading whitelist data from {url} ...") - if url.startswith("file://"): - with open(normpath(url[7:]), "rb") as f: - iterable = f.readlines() - else: - resp = get(url, stream=True, timeout=10) - - if resp.status_code != 200: - LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + # Write combined data of the kind in memory and check if it has changed + content = b"" + for url in urls_list: + try: + cached_url = cached_urls.get(url, {"time": 0, "tmp_path": ""}) + # Check if the URL's last download timestamp is younger than 1 hour + if current_timestamp - cached_url["time"] < timedelta(hours=1).total_seconds(): + downloaded_urls[url] = { + "time": cached_url["time"], + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + LOGGER.info(f"URL {url} has been downloaded less than 1 hour ago, skipping it...") + failed_urls.add(url) + status = 1 if status == 1 else 0 continue - iterable = resp.iter_lines() - - i = 0 - content = b"" - for line in iterable: - line = line.strip() - - if not line or line.startswith((b"#", b";")): + # Check if the URL has already been downloaded + if url in failed_urls: continue - elif kind != "USER_AGENT": - line = line.split(b" ")[0] - - ok, data = check_line(kind, line) - if ok: - content += data + b"\n" - i += 1 - - LOGGER.info(f"Downloaded {i} bad {kind}") - # Check if file has changed - new_hash = bytes_hash(content) - old_hash = JOB.cache_hash(f"{kind}.list") - if new_hash == old_hash: - LOGGER.info(f"New file {kind}.list is identical to cache file, reload is not needed") - else: - LOGGER.info(f"New file {kind}.list is different than cache file, reload is needed") - # Put file in cache - cached, err = JOB.cache_file(f"{kind}.list", content, checksum=new_hash) - if not cached: - LOGGER.error(f"Error while caching whitelist : {err}") - status = 2 + elif url in downloaded_urls: + LOGGER.info(f"URL {url} has already been downloaded, skipping it...") + content += Path(downloaded_urls[url]["tmp_path"]).read_bytes() else: - status = 1 - except: + LOGGER.info(f"Downloading whitelist data from {url} ...") + if url.startswith("file://"): + with open(normpath(url[7:]), "rb") as f: + iterable = f.readlines() + else: + resp = get(url, stream=True, timeout=10) + + if resp.status_code != 200: + LOGGER.warning(f"Got status code {resp.status_code}, skipping...") + continue + + iterable = resp.iter_lines() + + i = 0 + for line in iterable: + line = line.strip() + + if not line or line.startswith((b"#", b";")): + continue + elif kind != "USER_AGENT": + line = line.split(b" ")[0] + + ok, data = check_line(kind, line) + if ok: + content += data + b"\n" + i += 1 + + LOGGER.info(f"Downloaded {i} bad {kind}") + downloaded_urls[url] = { + "time": current_timestamp, + "tmp_path": tmp_downloads.joinpath(f"{bytes_hash(url, algorithm='sha1')}.list").as_posix(), + } + except BaseException as e: + status = 2 + LOGGER.error(f"Exception while getting {service} whitelist from {url} :\n{e}") + failed_urls.add(url) + + # Check if file has changed + new_hash = bytes_hash(content) + old_hash = JOB.cache_hash(f"{kind}.list", service_id=service) + if new_hash == old_hash: + LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed") + continue + + LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed") + # Put file in cache + cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash) + if not cached: + LOGGER.error(f"Error while caching whitelist : {err}") status = 2 - LOGGER.error(f"Exception while getting whitelist from {url} :\n{format_exc()}") + continue + + status = 1 + + cached, err = JOB.cache_file("urls.json", dumps(downloaded_urls, indent=2).encode("utf-8")) + if not cached: + LOGGER.error(f"Error while caching whitelist URLs : {err}") + + rmtree(tmp_downloads, ignore_errors=True) except SystemExit as e: status = e.code except: diff --git a/src/common/core/whitelist/plugin.json b/src/common/core/whitelist/plugin.json index dc9191223..b538f6a3b 100644 --- a/src/common/core/whitelist/plugin.json +++ b/src/common/core/whitelist/plugin.json @@ -69,7 +69,7 @@ "type": "text" }, "WHITELIST_IP_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing good IP/network to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "whitelist-ip-urls", @@ -78,7 +78,7 @@ "type": "text" }, "WHITELIST_RDNS_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing reverse DNS suffixes to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "whitelist-rdns-urls", @@ -87,7 +87,7 @@ "type": "text" }, "WHITELIST_ASN_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing ASN to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "whitelist-asn-urls", @@ -96,7 +96,7 @@ "type": "text" }, "WHITELIST_USER_AGENT_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing good User-Agent to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "whitelist-user-agent-urls", @@ -105,7 +105,7 @@ "type": "text" }, "WHITELIST_URI_URLS": { - "context": "global", + "context": "multisite", "default": "", "help": "List of URLs, separated with spaces, containing bad URI to whitelist. Also supports file:// URLs and and auth basic using http://user:pass@url scheme.", "id": "whitelist-uri-urls", diff --git a/src/common/core/whitelist/whitelist.lua b/src/common/core/whitelist/whitelist.lua index 880c27de0..36dc64a75 100644 --- a/src/common/core/whitelist/whitelist.lua +++ b/src/common/core/whitelist/whitelist.lua @@ -16,6 +16,7 @@ local get_ips = utils.get_ips local get_rdns = utils.get_rdns local get_asn = utils.get_asn local regex_match = utils.regex_match +local get_variable = utils.get_variable local ipmatcher_new = ipmatcher.new local tostring = tostring local open = io.open @@ -26,12 +27,12 @@ function whitelist:initialize(ctx) plugin.initialize(self, "whitelist", ctx) -- Decode lists if get_phase() ~= "init" and self:is_needed() then - local lists, err = self.datastore:get("plugin_whitelist_lists", true) - if not lists then + local datastore_lists, err = self.datastore:get("plugin_whitelist_lists_" .. self.ctx.bw.server_name, true) + if not datastore_lists then self.logger:log(ERR, err) self.lists = {} else - self.lists = lists + self.lists = datastore_lists end local kinds = { ["IP"] = {}, @@ -41,10 +42,10 @@ function whitelist:initialize(ctx) ["URI"] = {}, } for kind, _ in pairs(kinds) do + if not self.lists[kind] then + self.lists[kind] = {} + end for data in self.variables["WHITELIST_" .. kind]:gmatch("%S+") do - if not self.lists[kind] then - self.lists[kind] = {} - end table.insert(self.lists[kind], data) end end @@ -73,6 +74,7 @@ function whitelist:init() if not self:is_needed() then return self:ret(true, "init not needed") end + -- Read whitelists local whitelists = { ["IP"] = {}, @@ -81,21 +83,41 @@ function whitelist:init() ["USER_AGENT"] = {}, ["URI"] = {}, } - local i = 0 - for kind, _ in pairs(whitelists) do - local f, _ = open("/var/cache/bunkerweb/whitelist/" .. kind .. ".list", "r") - if f then - for line in f:lines() do - table.insert(whitelists[kind], line) - i = i + 1 - end - f:close() - end + + local server_name, err = get_variable("SERVER_NAME", false) + if not server_name then + return self:ret(false, "can't get SERVER_NAME variable : " .. err) end - -- Load them into datastore - local ok, err = self.datastore:set("plugin_whitelist_lists", whitelists, nil, true) - if not ok then - return self:ret(false, "can't store whitelist list into datastore : " .. err) + + -- Iterate over each kind and server + local i = 0 + for key in server_name:gmatch("%S+") do + for kind, _ in pairs(whitelists) do + local file_path = "/var/cache/bunkerweb/whitelist/" .. key .. "/" .. kind .. ".list" + local f = open(file_path, "r") + if f then + for line in f:lines() do + table.insert(whitelists[kind], line) + i = i + 1 + end + f:close() + end + end + + -- Load service specific ones into datastore + local ok + ok, err = self.datastore:set("plugin_whitelist_lists_" .. key, whitelists, nil, true) + if not ok then + return self:ret(false, "can't store whitelist list into datastore : " .. err) + end + + whitelists = { + ["IP"] = {}, + ["RDNS"] = {}, + ["ASN"] = {}, + ["USER_AGENT"] = {}, + ["URI"] = {}, + } end return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI") end