mirror of
https://github.com/fleetdm/fleet
synced 2026-05-16 13:38:43 +00:00
Adding telemetry for catching issue #19172 # Docs changes In another PR: https://github.com/fleetdm/fleet/pull/23423/files # Demo <div> <a href="https://www.loom.com/share/233625875eec46508c26ae315cd52d19"> <p>[Demo] Add telemetry for vital fleetd errors - Issue #23413 - Watch Video</p> </a> <a href="https://www.loom.com/share/233625875eec46508c26ae315cd52d19"> <img style="max-width:300px;" src="https://cdn.loom.com/sessions/thumbnails/233625875eec46508c26ae315cd52d19-45ca0ec1b7b5e9e7-full-play.gif"> </a> </div> # Checklist for submitter - [x] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. See [Changes files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/Committing-Changes.md#changes-files) for more information. - [x] Added/updated tests - [x] Manual QA for all new/changed functionality - For Orbit and Fleet Desktop changes: - [x] Orbit runs on macOS, Linux and Windows. Check if the orbit feature/bugfix should only apply to one platform (`runtime.GOOS`). - [x] Manual QA must be performed in the three main OSs, macOS, Windows and Linux. - [x] Auto-update manual QA, from released version of component to new version (see [tools/tuf/test](../tools/tuf/test/README.md)).
90 lines
2.3 KiB
Go
90 lines
2.3 KiB
Go
package ctxerr
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
|
|
"github.com/fleetdm/fleet/v4/server/fleet"
|
|
)
|
|
|
|
type ErrorAgg struct {
|
|
Count int `json:"count"`
|
|
Loc []string `json:"loc"`
|
|
Metadata json.RawMessage `json:"metadata,omitempty"`
|
|
}
|
|
|
|
// Aggregate retrieves all errors in the store and returns an aggregated,
|
|
// json-formatted summary containing:
|
|
// - The number of occurrences of each error
|
|
// - A reduced stack trace used for debugging the error
|
|
// - Additional metadata present for vital errors
|
|
func Aggregate(ctx context.Context) (json.RawMessage, error) {
|
|
const maxTraceLen = 3
|
|
empty := json.RawMessage("[]")
|
|
|
|
storedErrs, err := Retrieve(ctx)
|
|
if err != nil {
|
|
return empty, Wrap(ctx, err, "retrieve on aggregation")
|
|
}
|
|
|
|
aggs := make([]ErrorAgg, len(storedErrs))
|
|
for i, stored := range storedErrs {
|
|
var ferr []fleetErrorJSON
|
|
if err = json.Unmarshal(stored.Chain, &ferr); err != nil {
|
|
return empty, Wrap(ctx, err, "unmarshal on aggregation")
|
|
}
|
|
|
|
stack := aggregateStack(ferr, maxTraceLen)
|
|
meta := getVitalMetadata(ferr)
|
|
aggs[i] = ErrorAgg{stored.Count, stack, meta}
|
|
}
|
|
|
|
return json.Marshal(aggs)
|
|
}
|
|
|
|
// aggregateStack creates a single stack trace by joining all the stack traces in
|
|
// an error chain
|
|
func aggregateStack(chain []fleetErrorJSON, max int) []string {
|
|
stack := make([]string, max)
|
|
stackIdx := 0
|
|
|
|
out:
|
|
for _, e := range chain {
|
|
for _, m := range e.Stack {
|
|
if stackIdx >= max {
|
|
break out
|
|
}
|
|
|
|
stack[stackIdx] = m
|
|
stackIdx++
|
|
}
|
|
}
|
|
|
|
return stack[:stackIdx]
|
|
}
|
|
|
|
func getVitalMetadata(chain []fleetErrorJSON) json.RawMessage {
|
|
for _, e := range chain {
|
|
if len(e.Data) > 0 {
|
|
// Currently, only vital fleetd errors contain metadata.
|
|
// Note: vital errors should not contain any sensitive info
|
|
var fleetdErr fleet.FleetdError
|
|
var err error
|
|
if err = json.Unmarshal(e.Data, &fleetdErr); err != nil || !fleetdErr.Vital {
|
|
continue
|
|
}
|
|
var export = map[string]interface{}{
|
|
"error_source": fleetdErr.ErrorSource,
|
|
"error_source_version": fleetdErr.ErrorSourceVersion,
|
|
"error_message": fleetdErr.ErrorMessage,
|
|
"error_additional_info": fleetdErr.ErrorAdditionalInfo,
|
|
}
|
|
var meta json.RawMessage
|
|
if meta, err = json.Marshal(export); err != nil {
|
|
return nil
|
|
}
|
|
return meta
|
|
}
|
|
}
|
|
return nil
|
|
}
|