fleet/server/contexts/ctxerr/metrics.go
Victor Lyuboslavsky 07949df530
Improved OpenTelemetry error handling (#38757)
<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #38756 

- Changed to NOT mark many client errors as exceptions
- Instead, added client_error and server_error metrics that can be used
to alert on unusual error rates

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Added separate metrics for distinguishing between client and server
errors, enhancing observability and monitoring capabilities.

* **Bug Fixes**
* Client request errors no longer incorrectly appear in error tracking
as exceptions; improved accuracy of error reporting to external
services.
* Adjusted logging levels for authentication and enrollment operations
to provide clearer diagnostics.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-01-26 17:07:32 -06:00

56 lines
1.5 KiB
Go

package ctxerr
import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
var (
meter = otel.Meter("fleet")
// clientErrorsCounter counts client errors (4xx) by type.
// These are errors caused by client issues (bad requests, auth failures, etc.)
// and per OTEL semantic conventions should not be treated as server errors.
clientErrorsCounter metric.Int64Counter
// serverErrorsCounter counts server errors (5xx) by type.
// These are errors caused by server issues and should be investigated.
serverErrorsCounter metric.Int64Counter
)
func init() {
var err error
clientErrorsCounter, err = meter.Int64Counter(
"fleet.http.client_errors",
metric.WithDescription("Count of client errors (4xx) by error type"),
metric.WithUnit("{error}"),
)
if err != nil {
panic(err)
}
serverErrorsCounter, err = meter.Int64Counter(
"fleet.http.server_errors",
metric.WithDescription("Count of server errors (5xx) by error type"),
metric.WithUnit("{error}"),
)
if err != nil {
panic(err)
}
}
// clientErrorCounterAttrs returns the metric attributes for client error counters.
func clientErrorCounterAttrs(errorType string) metric.AddOption {
return metric.WithAttributes(
attribute.String("error.type", errorType),
)
}
// serverErrorCounterAttrs returns the metric attributes for server error counters.
func serverErrorCounterAttrs(errorType string) metric.AddOption {
return metric.WithAttributes(
attribute.String("error.type", errorType),
)
}