mirror of
https://github.com/fleetdm/fleet
synced 2026-05-24 09:28:54 +00:00
229 lines
7.7 KiB
JSON
229 lines
7.7 KiB
JSON
|
|
{
|
||
|
|
"title": "Fleet host cache",
|
||
|
|
"description": "Observability for the Redis-backed host lookup cache fronting LoadHostByNodeKey and LoadHostByOrbitNodeKey. Shows hit rate, lookup volume by result, error volume by operation, and invalidation volume by write-path reason.",
|
||
|
|
"tags": ["redis", "cache", "host-cache"],
|
||
|
|
"layout": [
|
||
|
|
{ "i": "row-overview", "x": 0, "y": 0, "w": 12, "h": 1 },
|
||
|
|
{ "i": "hit-rate", "x": 0, "y": 1, "w": 4, "h": 4 },
|
||
|
|
{ "i": "lookups", "x": 4, "y": 1, "w": 8, "h": 4 },
|
||
|
|
{ "i": "errors", "x": 0, "y": 5, "w": 6, "h": 4 },
|
||
|
|
{ "i": "invalidations", "x": 6, "y": 5, "w": 6, "h": 4 }
|
||
|
|
],
|
||
|
|
"widgets": [
|
||
|
|
{
|
||
|
|
"id": "row-overview",
|
||
|
|
"panelTypes": "row",
|
||
|
|
"title": "Host cache overview",
|
||
|
|
"query": { "queryType": "builder", "promql": [], "clickhouse_sql": [], "builder": { "queryData": [], "queryFormulas": [] } },
|
||
|
|
"selectedLogFields": [],
|
||
|
|
"selectedTracesFields": [],
|
||
|
|
"thresholds": [],
|
||
|
|
"contextLinks": { "linksData": [] }
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": "hit-rate",
|
||
|
|
"panelTypes": "graph",
|
||
|
|
"title": "Hit rate",
|
||
|
|
"description": "A/B where A = rate of hits + negative_hits (both Redis-served, both avoid MySQL), B = rate of all lookups. Target: >= 80% at steady state once the cache warms. Watch for drops during invalidation storms (mass team transfers, re-enrollments).",
|
||
|
|
"yAxisUnit": "percentunit",
|
||
|
|
"legendPosition": "bottom",
|
||
|
|
"query": {
|
||
|
|
"queryType": "builder",
|
||
|
|
"promql": [],
|
||
|
|
"clickhouse_sql": [],
|
||
|
|
"builder": {
|
||
|
|
"queryData": [
|
||
|
|
{
|
||
|
|
"queryName": "A",
|
||
|
|
"dataSource": "metrics",
|
||
|
|
"expression": "A",
|
||
|
|
"disabled": true,
|
||
|
|
"stepInterval": 60,
|
||
|
|
"aggregations": [
|
||
|
|
{
|
||
|
|
"metricName": "fleet.host_cache.lookups",
|
||
|
|
"temporality": "Cumulative",
|
||
|
|
"timeAggregation": "rate",
|
||
|
|
"spaceAggregation": "sum"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"filter": { "expression": "result IN ['hit', 'negative_hit']" },
|
||
|
|
"groupBy": [],
|
||
|
|
"orderBy": [],
|
||
|
|
"selectColumns": [],
|
||
|
|
"functions": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"queryName": "B",
|
||
|
|
"dataSource": "metrics",
|
||
|
|
"expression": "B",
|
||
|
|
"disabled": true,
|
||
|
|
"stepInterval": 60,
|
||
|
|
"aggregations": [
|
||
|
|
{
|
||
|
|
"metricName": "fleet.host_cache.lookups",
|
||
|
|
"temporality": "Cumulative",
|
||
|
|
"timeAggregation": "rate",
|
||
|
|
"spaceAggregation": "sum"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"filter": { "expression": "" },
|
||
|
|
"groupBy": [],
|
||
|
|
"orderBy": [],
|
||
|
|
"selectColumns": [],
|
||
|
|
"functions": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"queryFormulas": [
|
||
|
|
{
|
||
|
|
"queryName": "F1",
|
||
|
|
"expression": "A / B",
|
||
|
|
"legend": "hit rate"
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"thresholds": [
|
||
|
|
{ "index": "1", "keyIndex": 0, "thresholdColor": "Orange", "thresholdFormat": "Line", "thresholdOperator": "<", "thresholdUnit": "percentunit", "thresholdValue": 0.8 }
|
||
|
|
],
|
||
|
|
"selectedLogFields": [],
|
||
|
|
"selectedTracesFields": [],
|
||
|
|
"contextLinks": { "linksData": [] }
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": "lookups",
|
||
|
|
"panelTypes": "graph",
|
||
|
|
"title": "Lookups/sec by result",
|
||
|
|
"description": "Stacked area of cache reads split by outcome. hit = served from Redis; negative_hit = cached NotFound; miss = fell through to MySQL.",
|
||
|
|
"yAxisUnit": "cps",
|
||
|
|
"isStacked": true,
|
||
|
|
"legendPosition": "bottom",
|
||
|
|
"query": {
|
||
|
|
"queryType": "builder",
|
||
|
|
"promql": [],
|
||
|
|
"clickhouse_sql": [],
|
||
|
|
"builder": {
|
||
|
|
"queryData": [
|
||
|
|
{
|
||
|
|
"queryName": "A",
|
||
|
|
"dataSource": "metrics",
|
||
|
|
"expression": "A",
|
||
|
|
"stepInterval": 60,
|
||
|
|
"aggregations": [
|
||
|
|
{
|
||
|
|
"metricName": "fleet.host_cache.lookups",
|
||
|
|
"temporality": "Cumulative",
|
||
|
|
"timeAggregation": "rate",
|
||
|
|
"spaceAggregation": "sum"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"filter": { "expression": "" },
|
||
|
|
"groupBy": [
|
||
|
|
{ "key": "result", "dataType": "string", "type": "tag" }
|
||
|
|
],
|
||
|
|
"legend": "{{result}}",
|
||
|
|
"orderBy": [],
|
||
|
|
"selectColumns": [],
|
||
|
|
"functions": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"queryFormulas": []
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"thresholds": [],
|
||
|
|
"selectedLogFields": [],
|
||
|
|
"selectedTracesFields": [],
|
||
|
|
"contextLinks": { "linksData": [] }
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": "errors",
|
||
|
|
"panelTypes": "graph",
|
||
|
|
"title": "Errors/sec by op",
|
||
|
|
"description": "Redis / JSON errors on the cache path, labeled by operation (get | set | del). Should be flat-zero in steady state; spikes indicate Redis flake or poisoned cache entries.",
|
||
|
|
"yAxisUnit": "cps",
|
||
|
|
"legendPosition": "bottom",
|
||
|
|
"query": {
|
||
|
|
"queryType": "builder",
|
||
|
|
"promql": [],
|
||
|
|
"clickhouse_sql": [],
|
||
|
|
"builder": {
|
||
|
|
"queryData": [
|
||
|
|
{
|
||
|
|
"queryName": "A",
|
||
|
|
"dataSource": "metrics",
|
||
|
|
"expression": "A",
|
||
|
|
"stepInterval": 60,
|
||
|
|
"aggregations": [
|
||
|
|
{
|
||
|
|
"metricName": "fleet.host_cache.errors",
|
||
|
|
"temporality": "Cumulative",
|
||
|
|
"timeAggregation": "rate",
|
||
|
|
"spaceAggregation": "sum"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"filter": { "expression": "" },
|
||
|
|
"groupBy": [
|
||
|
|
{ "key": "op", "dataType": "string", "type": "tag" }
|
||
|
|
],
|
||
|
|
"legend": "{{op}}",
|
||
|
|
"orderBy": [],
|
||
|
|
"selectColumns": [],
|
||
|
|
"functions": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"queryFormulas": []
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"thresholds": [],
|
||
|
|
"selectedLogFields": [],
|
||
|
|
"selectedTracesFields": [],
|
||
|
|
"contextLinks": { "linksData": [] }
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": "invalidations",
|
||
|
|
"panelTypes": "graph",
|
||
|
|
"title": "Invalidations/sec by reason",
|
||
|
|
"description": "Cache invalidations on write paths. update = UpdateHost/SerialUpdateHost/osquery intervals/refetch; enroll = NewHost/EnrollOsquery/EnrollOrbit; team = AddHostsToTeam; delete = DeleteHost*/CleanupExpiredHosts/CleanupIncomingHosts; cert = UpdateHostIdentityCertHostIDBySerial.",
|
||
|
|
"yAxisUnit": "cps",
|
||
|
|
"isStacked": true,
|
||
|
|
"legendPosition": "bottom",
|
||
|
|
"query": {
|
||
|
|
"queryType": "builder",
|
||
|
|
"promql": [],
|
||
|
|
"clickhouse_sql": [],
|
||
|
|
"builder": {
|
||
|
|
"queryData": [
|
||
|
|
{
|
||
|
|
"queryName": "A",
|
||
|
|
"dataSource": "metrics",
|
||
|
|
"expression": "A",
|
||
|
|
"stepInterval": 60,
|
||
|
|
"aggregations": [
|
||
|
|
{
|
||
|
|
"metricName": "fleet.host_cache.invalidations",
|
||
|
|
"temporality": "Cumulative",
|
||
|
|
"timeAggregation": "rate",
|
||
|
|
"spaceAggregation": "sum"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"filter": { "expression": "" },
|
||
|
|
"groupBy": [
|
||
|
|
{ "key": "reason", "dataType": "string", "type": "tag" }
|
||
|
|
],
|
||
|
|
"legend": "{{reason}}",
|
||
|
|
"orderBy": [],
|
||
|
|
"selectColumns": [],
|
||
|
|
"functions": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"queryFormulas": []
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"thresholds": [],
|
||
|
|
"selectedLogFields": [],
|
||
|
|
"selectedTracesFields": [],
|
||
|
|
"contextLinks": { "linksData": [] }
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|