refactor: improve data handling in whitelist, greylist, and blacklist initialization + enhance logging for loaded entries

This commit is contained in:
Théophile Diot 2024-11-19 14:04:49 +01:00
parent 2e2639e467
commit a5ef0d23eb
No known key found for this signature in database
GPG key ID: FA995104A0BA376A
6 changed files with 81 additions and 33 deletions

View file

@ -7,6 +7,7 @@ local blacklist = class("blacklist", plugin)
local ngx = ngx
local ERR = ngx.ERR
local INFO = ngx.INFO
local get_phase = ngx.get_phase
local has_variable = utils.has_variable
local get_deny_status = utils.get_deny_status
@ -47,7 +48,9 @@ function blacklist:initialize(ctx)
self.lists[kind] = {}
end
for data in self.variables["BLACKLIST_" .. kind]:gmatch("%S+") do
table.insert(self.lists[kind], data)
if data ~= "" then
table.insert(self.lists[kind], data)
end
end
end
end
@ -103,8 +106,10 @@ function blacklist:init()
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(blacklists[kind], line)
i = i + 1
if line ~= "" then
table.insert(blacklists[kind], line)
i = i + 1
end
end
f:close()
end
@ -114,9 +119,15 @@ function blacklist:init()
local ok
ok, err = self.datastore:set("plugin_blacklist_lists_" .. key, blacklists, nil, true)
if not ok then
return self:ret(false, "can't store blacklist list into datastore : " .. err)
return self:ret(false, "can't store blacklist " .. key .. " list into datastore : " .. err)
end
self.logger:log(
INFO,
"successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI for the service: " .. key
)
i = 0
blacklists = {
["IP"] = {},
["RDNS"] = {},
@ -130,7 +141,7 @@ function blacklist:init()
["IGNORE_URI"] = {},
}
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
return self:ret(true, "successfully loaded all IP/network/rDNS/ASN/User-Agent/URI")
end
function blacklist:access()

View file

@ -74,7 +74,7 @@ try:
if getenv(f"{first_server}_USE_BLACKLIST", getenv("USE_BLACKLIST", "yes")) == "yes":
blacklist_activated = True
# Get URLs
# Get services URLs
services_blacklist_urls[first_server] = {}
for kind in KINDS:
services_blacklist_urls[first_server][kind] = set()
@ -85,7 +85,7 @@ try:
elif getenv("USE_BLACKLIST", "yes") == "yes":
blacklist_activated = True
# Get URLs
# Get global URLs
services_blacklist_urls[services[0]] = {}
for kind in KINDS:
services_blacklist_urls[services[0]][kind] = set()
@ -137,7 +137,7 @@ try:
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif isinstance(cached_url, dict) and cached_url["last_update"] < (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
elif isinstance(cached_url, dict) and cached_url["last_update"] > (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
LOGGER.info(f"URL {url} has already been downloaded less than 1 hour ago, skipping download...")
# Remove first line (URL) and add to content
content += b"\n".join(cached_url["data"].split(b"\n")[1:]) + b"\n"
@ -179,7 +179,9 @@ try:
LOGGER.error(f"Exception while getting {service} blacklist from {url} :\n{e}")
failed_urls.add(url)
LOGGER.debug(f"Content for {service} {kind} : {content}")
if not content:
LOGGER.warning(f"No data for {service} {kind}, skipping...")
continue
# Check if file has changed
new_hash = bytes_hash(content)
@ -187,8 +189,11 @@ try:
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
elif old_hash:
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
else:
LOGGER.info(f"New {service} file {kind}.list is not in cache, reload is needed")
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
@ -196,7 +201,7 @@ try:
status = 2
continue
status = 1
status = 1 if status != 2 else 2
# Remove old files
for url_file in JOB.job_path.glob("*.list"):

View file

@ -7,6 +7,7 @@ local greylist = class("greylist", plugin)
local ngx = ngx
local ERR = ngx.ERR
local INFO = ngx.INFO
local get_phase = ngx.get_phase
local has_variable = utils.has_variable
local get_deny_status = utils.get_deny_status
@ -42,7 +43,9 @@ function greylist:initialize(ctx)
self.lists[kind] = {}
end
for data in self.variables["GREYLIST_" .. kind]:gmatch("%S+") do
table.insert(self.lists[kind], data)
if data ~= "" then
table.insert(self.lists[kind], data)
end
end
end
end
@ -93,8 +96,10 @@ function greylist:init()
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(greylists[kind], line)
i = i + 1
if line ~= "" then
table.insert(greylists[kind], line)
i = i + 1
end
end
f:close()
end
@ -104,9 +109,15 @@ function greylist:init()
local ok
ok, err = self.datastore:set("plugin_greylist_lists_" .. key, greylists, nil, true)
if not ok then
return self:ret(false, "can't store greylist list into datastore : " .. err)
return self:ret(false, "can't store greylist " .. key .. " list into datastore : " .. err)
end
self.logger:log(
INFO,
"successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI for the service: " .. key
)
i = 0
greylists = {
["IP"] = {},
["RDNS"] = {},
@ -115,7 +126,7 @@ function greylist:init()
["URI"] = {},
}
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
return self:ret(true, "successfully loaded all IP/network/rDNS/ASN/User-Agent/URI")
end
function greylist:access()

View file

@ -74,7 +74,7 @@ try:
if getenv(f"{first_server}_USE_GREYLIST", getenv("USE_GREYLIST", "no")) == "yes":
greylist_activated = True
# Get URLs
# Get services URLs
services_greylist_urls[first_server] = {}
for kind in KINDS:
services_greylist_urls[first_server][kind] = set()
@ -85,7 +85,7 @@ try:
elif getenv("USE_GREYLIST", "no") == "yes":
greylist_activated = True
# Get URLs
# Get global URLs
services_greylist_urls[services[0]] = {}
for kind in KINDS:
services_greylist_urls[services[0]][kind] = set()
@ -137,7 +137,7 @@ try:
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif isinstance(cached_url, dict) and cached_url["last_update"] < (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
elif isinstance(cached_url, dict) and cached_url["last_update"] > (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
LOGGER.info(f"URL {url} has already been downloaded less than 1 hour ago, skipping download...")
# Remove first line (URL) and add to content
content += b"\n".join(cached_url["data"].split(b"\n")[1:]) + b"\n"
@ -179,7 +179,9 @@ try:
LOGGER.error(f"Exception while getting {service} greylist from {url} :\n{e}")
failed_urls.add(url)
LOGGER.debug(f"Content for {service} {kind} : {content}")
if not content:
LOGGER.warning(f"No data for {service} {kind}, skipping...")
continue
# Check if file has changed
new_hash = bytes_hash(content)
@ -187,8 +189,11 @@ try:
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
elif old_hash:
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
else:
LOGGER.info(f"New {service} file {kind}.list is not in cache, reload is needed")
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
@ -196,7 +201,7 @@ try:
status = 2
continue
status = 1
status = 1 if status != 2 else 2
# Remove old files
for url_file in JOB.job_path.glob("*.list"):

View file

@ -74,7 +74,7 @@ try:
if getenv(f"{first_server}_USE_WHITELIST", getenv("USE_WHITELIST", "yes")) == "yes":
whitelist_activated = True
# Get URLs
# Get services URLs
services_whitelist_urls[first_server] = {}
for kind in KINDS:
services_whitelist_urls[first_server][kind] = set()
@ -85,7 +85,7 @@ try:
elif getenv("USE_WHITELIST", "yes") == "yes":
whitelist_activated = True
# Get URLs
# Get global URLs
services_whitelist_urls[services[0]] = {}
for kind in KINDS:
services_whitelist_urls[services[0]][kind] = set()
@ -137,7 +137,7 @@ try:
# Check if the URL has already been downloaded
if url in failed_urls:
continue
elif isinstance(cached_url, dict) and cached_url["last_update"] < (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
elif isinstance(cached_url, dict) and cached_url["last_update"] > (datetime.now().astimezone() - timedelta(hours=1)).timestamp():
LOGGER.info(f"URL {url} has already been downloaded less than 1 hour ago, skipping download...")
# Remove first line (URL) and add to content
content += b"\n".join(cached_url["data"].split(b"\n")[1:]) + b"\n"
@ -179,7 +179,9 @@ try:
LOGGER.error(f"Exception while getting {service} whitelist from {url} :\n{e}")
failed_urls.add(url)
LOGGER.debug(f"Content for {service} {kind} : {content}")
if not content:
LOGGER.warning(f"No data for {service} {kind}, skipping...")
continue
# Check if file has changed
new_hash = bytes_hash(content)
@ -187,8 +189,11 @@ try:
if new_hash == old_hash:
LOGGER.info(f"New {service} file {kind}.list is identical to cache file, reload is not needed")
continue
elif old_hash:
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
else:
LOGGER.info(f"New {service} file {kind}.list is not in cache, reload is needed")
LOGGER.info(f"New {service} file {kind}.list is different than cache file, reload is needed")
# Put file in cache
cached, err = JOB.cache_file(f"{kind}.list", content, service_id=service, checksum=new_hash)
if not cached:
@ -196,7 +201,7 @@ try:
status = 2
continue
status = 1
status = 1 if status != 2 else 2
# Remove old files
for url_file in JOB.job_path.glob("*.list"):

View file

@ -8,6 +8,7 @@ local whitelist = class("whitelist", plugin)
local ngx = ngx
local ERR = ngx.ERR
local INFO = ngx.INFO
local OK = ngx.OK
local WARN = ngx.WARN
local get_phase = ngx.get_phase
@ -46,7 +47,9 @@ function whitelist:initialize(ctx)
self.lists[kind] = {}
end
for data in self.variables["WHITELIST_" .. kind]:gmatch("%S+") do
table.insert(self.lists[kind], data)
if data ~= "" then
table.insert(self.lists[kind], data)
end
end
end
end
@ -97,8 +100,10 @@ function whitelist:init()
local f = open(file_path, "r")
if f then
for line in f:lines() do
table.insert(whitelists[kind], line)
i = i + 1
if line ~= "" then
table.insert(whitelists[kind], line)
i = i + 1
end
end
f:close()
end
@ -108,9 +113,15 @@ function whitelist:init()
local ok
ok, err = self.datastore:set("plugin_whitelist_lists_" .. key, whitelists, nil, true)
if not ok then
return self:ret(false, "can't store whitelist list into datastore : " .. err)
return self:ret(false, "can't store whitelist " .. key .. " list into datastore : " .. err)
end
self.logger:log(
INFO,
"successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI for the service: " .. key
)
i = 0
whitelists = {
["IP"] = {},
["RDNS"] = {},
@ -119,7 +130,7 @@ function whitelist:init()
["URI"] = {},
}
end
return self:ret(true, "successfully loaded " .. tostring(i) .. " IP/network/rDNS/ASN/User-Agent/URI")
return self:ret(true, "successfully loaded all IP/network/rDNS/ASN/User-Agent/URI")
end
function whitelist:set()