mirror of
https://github.com/argoproj/argo-cd
synced 2026-04-21 17:07:16 +00:00
feat: Add health checks for Coralogix (#23853)
Signed-off-by: Daniel Leinov <daniellei@jfrog.com> Co-authored-by: Pasha Kostohrys <pasha.kostohrys@gmail.com>
This commit is contained in:
parent
0a1572b9d9
commit
dcf1965c52
10 changed files with 337 additions and 0 deletions
21
resource_customizations/coralogix.com/Alert/health.lua
Normal file
21
resource_customizations/coralogix.com/Alert/health.lua
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
hs = {}
|
||||||
|
|
||||||
|
hs.status = "Progressing"
|
||||||
|
hs.message = "Waiting for status to be updated"
|
||||||
|
|
||||||
|
if obj.status ~= nil and obj.status.conditions ~= nil then
|
||||||
|
for i, condition in ipairs(obj.status.conditions) do
|
||||||
|
if condition.type == "RemoteSynced" then
|
||||||
|
if condition.status == "True" then
|
||||||
|
hs.status = "Healthy"
|
||||||
|
hs.message = "Resource is ready"
|
||||||
|
return hs
|
||||||
|
elseif condition.status == "False" then
|
||||||
|
hs.status = "Degraded"
|
||||||
|
hs.message = condition.message
|
||||||
|
return hs
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return hs
|
||||||
16
resource_customizations/coralogix.com/Alert/health_test.yaml
Normal file
16
resource_customizations/coralogix.com/Alert/health_test.yaml
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
tests:
|
||||||
|
- healthStatus:
|
||||||
|
status: Degraded
|
||||||
|
message: "error on extracting alert properties: failed to expand notification
|
||||||
|
group: failed to expand webhooks settings: failed to expand webhook
|
||||||
|
setting: failed to expand integration: failed to convert name to
|
||||||
|
integration ID: webhook critical-alerts-webhook not found"
|
||||||
|
inputPath: testdata/degraded_alert.yaml
|
||||||
|
- healthStatus:
|
||||||
|
status: Progressing
|
||||||
|
message: "Waiting for status to be updated"
|
||||||
|
inputPath: testdata/progressing_alert.yaml
|
||||||
|
- healthStatus:
|
||||||
|
status: Healthy
|
||||||
|
message: "Resource is ready"
|
||||||
|
inputPath: testdata/healthy_alert.yaml
|
||||||
58
resource_customizations/coralogix.com/Alert/testdata/degraded_alert.yaml
vendored
Normal file
58
resource_customizations/coralogix.com/Alert/testdata/degraded_alert.yaml
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
apiVersion: coralogix.com/v1beta1
|
||||||
|
kind: Alert
|
||||||
|
metadata:
|
||||||
|
name: bitbucketcontainernotrunning-test
|
||||||
|
spec:
|
||||||
|
alertType:
|
||||||
|
metricThreshold:
|
||||||
|
metricFilter:
|
||||||
|
promql: >-
|
||||||
|
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||||
|
missingValues:
|
||||||
|
replaceWithZero: true
|
||||||
|
rules:
|
||||||
|
- condition:
|
||||||
|
conditionType: moreThan
|
||||||
|
forOverPct: 100
|
||||||
|
ofTheLast:
|
||||||
|
specificValue: 5m
|
||||||
|
threshold: 0
|
||||||
|
override:
|
||||||
|
priority: p1
|
||||||
|
description: >-
|
||||||
|
Bitbucket one of the container is not running
|
||||||
|
entityLabels:
|
||||||
|
app: bitbucket
|
||||||
|
name: Bitbucketcontainernotrunning-test
|
||||||
|
notificationGroup:
|
||||||
|
groupByKeys:
|
||||||
|
- pod
|
||||||
|
webhooks:
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: opsgenie-example
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: critical-alerts-webhook
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
priority: p1
|
||||||
|
status:
|
||||||
|
conditions:
|
||||||
|
- lastTransitionTime: '2025-07-17T07:39:54Z'
|
||||||
|
message: >-
|
||||||
|
error on extracting alert properties: failed to expand notification
|
||||||
|
group: failed to expand webhooks settings: failed to expand webhook
|
||||||
|
setting: failed to expand integration: failed to convert name to
|
||||||
|
integration ID: webhook critical-alerts-webhook not found
|
||||||
|
observedGeneration: 1
|
||||||
|
reason: RemoteCreationFailed
|
||||||
|
status: 'False'
|
||||||
|
type: RemoteSynced
|
||||||
|
|
||||||
53
resource_customizations/coralogix.com/Alert/testdata/healthy_alert.yaml
vendored
Normal file
53
resource_customizations/coralogix.com/Alert/testdata/healthy_alert.yaml
vendored
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
apiVersion: coralogix.com/v1beta1
|
||||||
|
kind: Alert
|
||||||
|
metadata:
|
||||||
|
name: bitbucketcontainernotrunning-test
|
||||||
|
spec:
|
||||||
|
alertType:
|
||||||
|
metricThreshold:
|
||||||
|
metricFilter:
|
||||||
|
promql: >-
|
||||||
|
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||||
|
missingValues:
|
||||||
|
replaceWithZero: true
|
||||||
|
rules:
|
||||||
|
- condition:
|
||||||
|
conditionType: moreThan
|
||||||
|
forOverPct: 100
|
||||||
|
ofTheLast:
|
||||||
|
specificValue: 5m
|
||||||
|
threshold: 0
|
||||||
|
override:
|
||||||
|
priority: p1
|
||||||
|
description: >-
|
||||||
|
Bitbucket one of the container is not running
|
||||||
|
entityLabels:
|
||||||
|
app: bitbucket
|
||||||
|
name: Bitbucketcontainernotrunning-test
|
||||||
|
notificationGroup:
|
||||||
|
groupByKeys:
|
||||||
|
- pod
|
||||||
|
webhooks:
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: opsgenie-example
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: critical-alerts-webhook
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
priority: p1
|
||||||
|
status:
|
||||||
|
conditions:
|
||||||
|
- lastTransitionTime: '2025-07-17T07:39:55Z'
|
||||||
|
message: Remote resource synced
|
||||||
|
observedGeneration: 3
|
||||||
|
reason: RemoteSyncedSuccessfully
|
||||||
|
status: 'True'
|
||||||
|
type: RemoteSynced
|
||||||
46
resource_customizations/coralogix.com/Alert/testdata/progressing_alert.yaml
vendored
Normal file
46
resource_customizations/coralogix.com/Alert/testdata/progressing_alert.yaml
vendored
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
apiVersion: coralogix.com/v1beta1
|
||||||
|
kind: Alert
|
||||||
|
metadata:
|
||||||
|
name: bitbucketcontainernotrunning-test
|
||||||
|
spec:
|
||||||
|
alertType:
|
||||||
|
metricThreshold:
|
||||||
|
metricFilter:
|
||||||
|
promql: >-
|
||||||
|
sum({namespace="bitbucket",pod=~"bitbucket-k8s-.*",condition="false"}) by (pod)
|
||||||
|
missingValues:
|
||||||
|
replaceWithZero: true
|
||||||
|
rules:
|
||||||
|
- condition:
|
||||||
|
conditionType: moreThan
|
||||||
|
forOverPct: 100
|
||||||
|
ofTheLast:
|
||||||
|
specificValue: 5m
|
||||||
|
threshold: 0
|
||||||
|
override:
|
||||||
|
priority: p1
|
||||||
|
description: >-
|
||||||
|
Bitbucket one of the container is not running
|
||||||
|
entityLabels:
|
||||||
|
app: bitbucket
|
||||||
|
name: Bitbucketcontainernotrunning-test
|
||||||
|
notificationGroup:
|
||||||
|
groupByKeys:
|
||||||
|
- pod
|
||||||
|
webhooks:
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: opsgenie-example
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
- integration:
|
||||||
|
integrationRef:
|
||||||
|
backendRef:
|
||||||
|
name: critical-alerts-webhook
|
||||||
|
notifyOn: triggeredAndResolved
|
||||||
|
retriggeringPeriod:
|
||||||
|
minutes: 60
|
||||||
|
priority: p1
|
||||||
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
hs = {}
|
||||||
|
|
||||||
|
hs.status = "Progressing"
|
||||||
|
hs.message = "Waiting for status to be updated"
|
||||||
|
|
||||||
|
if obj.status ~= nil and obj.status.conditions ~= nil then
|
||||||
|
for i, condition in ipairs(obj.status.conditions) do
|
||||||
|
if condition.type == "RemoteSynced" then
|
||||||
|
if condition.status == "True" then
|
||||||
|
hs.status = "Healthy"
|
||||||
|
hs.message = "Resource is ready"
|
||||||
|
return hs
|
||||||
|
elseif condition.status == "False" then
|
||||||
|
hs.status = "Degraded"
|
||||||
|
hs.message = condition.message
|
||||||
|
return hs
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return hs
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
tests:
|
||||||
|
- healthStatus:
|
||||||
|
status: Degraded
|
||||||
|
message: >-
|
||||||
|
error on creating remote recordingRuleGroupSet: SDK API error from /com.coralogixapis.metrics_rule_manager.v1.RuleGroupSets/Create for feature group recording-rules: rpc error: code = InvalidArgument desc = {
|
||||||
|
"groups": {
|
||||||
|
"0": {
|
||||||
|
"rules": {
|
||||||
|
"0": {
|
||||||
|
"record": [
|
||||||
|
{
|
||||||
|
"code": "length",
|
||||||
|
"message": null,
|
||||||
|
"params": {
|
||||||
|
"value": "",
|
||||||
|
"min": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"code": "invalid_promql",
|
||||||
|
"message": "SingleExpr: unexpected token ; want \"\"(\", \"{\", \"-\", \"+\"\"",
|
||||||
|
"params": {
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inputPath: testdata/degraded_recording_rule.yaml
|
||||||
|
- healthStatus:
|
||||||
|
status: Progressing
|
||||||
|
message: "Waiting for status to be updated"
|
||||||
|
inputPath: testdata/progressing_recording_rule.yaml
|
||||||
|
- healthStatus:
|
||||||
|
status: Healthy
|
||||||
|
message: "Resource is ready"
|
||||||
|
inputPath: testdata/healthy_recording_rule.yaml
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
apiVersion: coralogix.com/v1alpha1
|
||||||
|
kind: RecordingRuleGroupSet
|
||||||
|
metadata:
|
||||||
|
name: rules
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: k8s_rules
|
||||||
|
rules:
|
||||||
|
- expr: >-
|
||||||
|
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||||
|
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||||
|
by (namespace)
|
||||||
|
status:
|
||||||
|
conditions:
|
||||||
|
- lastTransitionTime: "2025-07-17T14:41:18Z"
|
||||||
|
message: |-
|
||||||
|
error on creating remote recordingRuleGroupSet: SDK API error from /com.coralogixapis.metrics_rule_manager.v1.RuleGroupSets/Create for feature group recording-rules: rpc error: code = InvalidArgument desc = {
|
||||||
|
"groups": {
|
||||||
|
"0": {
|
||||||
|
"rules": {
|
||||||
|
"0": {
|
||||||
|
"record": [
|
||||||
|
{
|
||||||
|
"code": "length",
|
||||||
|
"message": null,
|
||||||
|
"params": {
|
||||||
|
"value": "",
|
||||||
|
"min": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"code": "invalid_promql",
|
||||||
|
"message": "SingleExpr: unexpected token ; want \"\"(\", \"{\", \"-\", \"+\"\"",
|
||||||
|
"params": {
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
observedGeneration: 1
|
||||||
|
reason: RemoteCreationFailed
|
||||||
|
status: "False"
|
||||||
|
type: RemoteSynced
|
||||||
21
resource_customizations/coralogix.com/RecordingRuleGroupSet/testdata/healthy_recording_rule.yaml
vendored
Normal file
21
resource_customizations/coralogix.com/RecordingRuleGroupSet/testdata/healthy_recording_rule.yaml
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
apiVersion: coralogix.com/v1alpha1
|
||||||
|
kind: RecordingRuleGroupSet
|
||||||
|
metadata:
|
||||||
|
name: rules
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: k8s_rules
|
||||||
|
rules:
|
||||||
|
- expr: >-
|
||||||
|
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||||
|
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||||
|
by (namespace)
|
||||||
|
record: 'namespace:container_cpu_usage_seconds_total:sum_rate'
|
||||||
|
status:
|
||||||
|
conditions:
|
||||||
|
- lastTransitionTime: '2025-05-27T08:49:26Z'
|
||||||
|
message: Remote resource synced
|
||||||
|
observedGeneration: 3
|
||||||
|
reason: RemoteSyncedSuccessfully
|
||||||
|
status: 'True'
|
||||||
|
type: RemoteSynced
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
apiVersion: coralogix.com/v1alpha1
|
||||||
|
kind: RecordingRuleGroupSet
|
||||||
|
metadata:
|
||||||
|
name: rules
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: k8s_rules
|
||||||
|
rules:
|
||||||
|
- expr: >-
|
||||||
|
sum(rate(container_cpu_usage_seconds_total{job="kubelet",
|
||||||
|
metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]))
|
||||||
|
by (namespace)
|
||||||
|
record: 'namespace:container_cpu_usage_seconds_total:sum_rate'
|
||||||
Loading…
Reference in a new issue