fleet/server/service/transport_error.go
Victor Lyuboslavsky 07949df530
Improved OpenTelemetry error handling (#38757)
<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #38756 

- Changed to NOT mark many client errors as exceptions
- Instead, added client_error and server_error metrics that can be used
to alert on unusual error rates

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Added separate metrics for distinguishing between client and server
errors, enhancing observability and monitoring capabilities.

* **Bug Fixes**
* Client request errors no longer incorrectly appear in error tracking
as exceptions; improved accuracy of error reporting to external
services.
* Adjusted logging levels for authentication and enrollment operations
to provide clearer diagnostics.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-01-26 17:07:32 -06:00

123 lines
3.5 KiB
Go

package service
import (
"context"
"encoding/json"
"fmt"
"net/http"
"github.com/fleetdm/fleet/v4/server/platform/endpointer"
platform_http "github.com/fleetdm/fleet/v4/server/platform/http"
)
// FleetErrorEncoder handles fleet-specific error encoding for MailError
// and OsqueryError.
func FleetErrorEncoder(ctx context.Context, err error, w http.ResponseWriter, enc *json.Encoder, jsonErr *endpointer.JsonError) bool {
switch e := err.(type) {
case MailError:
jsonErr.Message = "Mail Error"
jsonErr.Errors = []map[string]string{
{
"name": "base",
"reason": e.Message,
},
}
w.WriteHeader(http.StatusInternalServerError)
enc.Encode(jsonErr) //nolint:errcheck
return true
case *OsqueryError:
// osquery expects to receive the node_invalid key when a TLS
// request provides an invalid node_key for authentication. It
// doesn't use the error message provided, but we provide this
// for debugging purposes (and perhaps osquery will use this
// error message in the future).
errMap := map[string]any{
"error": e.Error(),
"uuid": jsonErr.UUID,
}
if e.NodeInvalid() { //nolint:gocritic // ignore ifElseChain
w.WriteHeader(http.StatusUnauthorized)
errMap["node_invalid"] = true
} else if e.Status() != 0 {
w.WriteHeader(e.Status())
} else {
// TODO: osqueryError is not always the result of an internal error on
// our side, it is also used to represent a client error (invalid data,
// e.g. malformed json, carve too large, etc., so 4xx), are we returning
// a 500 because of some osquery-specific requirement?
w.WriteHeader(http.StatusInternalServerError)
}
enc.Encode(errMap) //nolint:errcheck
return true
}
return false
}
// MailError is set when an error performing mail operations
type MailError struct {
Message string
}
func (e MailError) Error() string {
return fmt.Sprintf("a mail error occurred: %s", e.Message)
}
// OsqueryError is the error returned to osquery agents.
type OsqueryError struct {
message string
nodeInvalid bool
StatusCode int
platform_http.ErrorWithUUID
}
var _ platform_http.ErrorUUIDer = (*OsqueryError)(nil)
// Error implements the error interface.
func (e *OsqueryError) Error() string {
return e.message
}
// NodeInvalid returns whether the error returned to osquery
// should contain the node_invalid property.
func (e *OsqueryError) NodeInvalid() bool {
return e.nodeInvalid
}
func (e *OsqueryError) Status() int {
return e.StatusCode
}
// IsClientError implements ErrWithIsClientError.
// OsqueryError is a client error when the node is invalid (auth failure)
// or when the status code is in the 4xx range.
func (e *OsqueryError) IsClientError() bool {
if e.nodeInvalid {
return true
}
if e.StatusCode >= 400 && e.StatusCode < 500 {
return true
}
return false
}
func NewOsqueryError(message string, nodeInvalid bool) *OsqueryError {
return &OsqueryError{
message: message,
nodeInvalid: nodeInvalid,
}
}
// encodeError is a convenience function that calls endpointer.EncodeError
// with the FleetErrorEncoder. Use this for direct error encoding in handlers.
func encodeError(ctx context.Context, err error, w http.ResponseWriter) {
endpointer.EncodeError(ctx, err, w, FleetErrorEncoder)
}
// fleetErrorEncoder is an adapter that wraps endpointer.EncodeError with
// FleetErrorEncoder for use as a kithttp.ErrorEncoder.
func fleetErrorEncoder(ctx context.Context, err error, w http.ResponseWriter) {
endpointer.EncodeError(ctx, err, w, FleetErrorEncoder)
}