mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
fix(otel-collector): improve log level extraction with word boundaries in regex (#1747)
For a log line like
```
x-amz-id-2: WxwS/N175wqLyRlzCXLpGZGszCEbQA0f63uFgdQN1qfcPr2IAmwE/P7HF2b1NdZLg18pNLF3ecTw5CrItXJid/uLe+fxh3jMBiJ7UlUxidw=
```
The level will be inferred as fatal because it contains `CrIt`, which is incorrect.
To fix this, we need to add a word boundary at the start
Ref: HDX-3439
CLAUDE: made a mistake.
```
❌ Test expects "ALERTING" to match "alert" keyword → "ALERTING" won't match with word boundary because "alert" is a substring, not at a word boundary. Expected should be "info",9,"ALERTING system engaged" not "fatal",21.
```
-> incorrect statement
This commit is contained in:
parent
7679b80f13
commit
4c42fdc3a4
9 changed files with 162 additions and 1 deletions
5
.changeset/long-pianos-wait.md
Normal file
5
.changeset/long-pianos-wait.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/otel-collector": minor
|
||||
---
|
||||
|
||||
fix: improve log level extraction with word boundaries in regex
|
||||
|
|
@ -19,7 +19,7 @@ processors:
|
|||
# Infer: extract the first log level keyword from the first 256 characters of the body
|
||||
- set(log.cache["substr"], log.body.string) where Len(log.body.string) < 256
|
||||
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where Len(log.body.string) >= 256
|
||||
- set(log.cache, ExtractPatterns(log.cache["substr"], "(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
|
||||
- set(log.cache, ExtractPatterns(log.cache["substr"], "(?i)(?P<0>\\b(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
|
||||
# Infer: detect FATAL
|
||||
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
|
||||
- set(log.severity_text, "fatal") where log.severity_number == SEVERITY_NUMBER_FATAL
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
SELECT SeverityText, SeverityNumber, Body FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'severity-inference' AND ResourceAttributes['test-id'] = 'infer-superstring' ORDER BY TimestampTime FORMAT CSV
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
"warn",13,"WARNING: disk space running low"
|
||||
"fatal",21,"CRITICAL: database connection pool exhausted"
|
||||
"fatal",21,"EMERGENCY: system failure imminent"
|
||||
"fatal",21,"ALERTING system engaged"
|
||||
"error",17,"ERRORS detected in application"
|
||||
"warn",13,"NOTICED unusual activity in request handler"
|
||||
"debug",5,"DEBUGGING enabled for module"
|
||||
"trace",1,"TRACED request path through gateway"
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
{
|
||||
"resourceLogs": [
|
||||
{
|
||||
"resource": {
|
||||
"attributes": [
|
||||
{
|
||||
"key": "suite-id",
|
||||
"value": {
|
||||
"stringValue": "severity-inference"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "test-id",
|
||||
"value": {
|
||||
"stringValue": "infer-superstring"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"scopeLogs": [
|
||||
{
|
||||
"scope": {},
|
||||
"logRecords": [
|
||||
{
|
||||
"timeUnixNano": "1901999580000000000",
|
||||
"body": {
|
||||
"stringValue": "WARNING: disk space running low"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000001",
|
||||
"body": {
|
||||
"stringValue": "CRITICAL: database connection pool exhausted"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000002",
|
||||
"body": {
|
||||
"stringValue": "EMERGENCY: system failure imminent"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000003",
|
||||
"body": {
|
||||
"stringValue": "ALERTING system engaged"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000004",
|
||||
"body": {
|
||||
"stringValue": "ERRORS detected in application"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000005",
|
||||
"body": {
|
||||
"stringValue": "NOTICED unusual activity in request handler"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000006",
|
||||
"body": {
|
||||
"stringValue": "DEBUGGING enabled for module"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000007",
|
||||
"body": {
|
||||
"stringValue": "TRACED request path through gateway"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
SELECT SeverityText, SeverityNumber, Body FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'severity-inference' AND ResourceAttributes['test-id'] = 'no-infer-substring' ORDER BY TimestampTime FORMAT CSV
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
"info",9,"x-amz-id-2 : abc123/COuECrITmh"
|
||||
"info",9,"txn_id=ab3cdErrF8x processing complete"
|
||||
"info",9,"Forewarn systems check passed"
|
||||
"info",9,"Request backtraced to origin"
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
{
|
||||
"resourceLogs": [
|
||||
{
|
||||
"resource": {
|
||||
"attributes": [
|
||||
{
|
||||
"key": "suite-id",
|
||||
"value": {
|
||||
"stringValue": "severity-inference"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "test-id",
|
||||
"value": {
|
||||
"stringValue": "no-infer-substring"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"scopeLogs": [
|
||||
{
|
||||
"scope": {},
|
||||
"logRecords": [
|
||||
{
|
||||
"timeUnixNano": "1901999580000000000",
|
||||
"body": {
|
||||
"stringValue": "x-amz-id-2 : abc123/COuECrITmh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000001",
|
||||
"body": {
|
||||
"stringValue": "txn_id=ab3cdErrF8x processing complete"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000002",
|
||||
"body": {
|
||||
"stringValue": "Forewarn systems check passed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeUnixNano": "1901999580000000003",
|
||||
"body": {
|
||||
"stringValue": "Request backtraced to origin"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -44,3 +44,15 @@ load 'test_helpers/assertions.bash'
|
|||
sleep 1
|
||||
assert_test_data "data/severity-inference/skip-infer"
|
||||
}
|
||||
|
||||
@test "should not infer severity from keywords embedded mid-word" {
|
||||
emit_otel_data "http://localhost:4318" "data/severity-inference/no-infer-substring"
|
||||
sleep 1
|
||||
assert_test_data "data/severity-inference/no-infer-substring"
|
||||
}
|
||||
|
||||
@test "should infer severity from superstring keywords like WARNING and CRITICAL" {
|
||||
emit_otel_data "http://localhost:4318" "data/severity-inference/infer-superstring"
|
||||
sleep 1
|
||||
assert_test_data "data/severity-inference/infer-superstring"
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue