mirror of
https://github.com/fleetdm/fleet
synced 2026-04-21 13:37:30 +00:00
Add dev infrastructure and docs for Prometheus monitoring (#33)
- Set up a simple example of Prometheus monitoring in the development docker-compose.yml. - Add documentation for configuring Prometheus.
This commit is contained in:
parent
138329c371
commit
6cbd10965c
6 changed files with 61 additions and 14 deletions
|
|
@ -45,5 +45,12 @@ services:
|
|||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./tools/app/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
|
||||
volumes:
|
||||
mysql-persistent-volume:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ Kolide Fleet is an infrastructure instrumentation application which has it's own
|
|||
|
||||
## Installing Fleet and its dependencies
|
||||
|
||||
The Fleet server has a few dependencies. To learn more about installing the Fleet server and it's dependencies, see the [Installing Fleet](./installing-fleet.md) guide.
|
||||
The Fleet server has a few infrastructure dependencies. To learn more about installing the Fleet server and it's dependencies, see the [Installing Fleet](./installing-fleet.md) guide.
|
||||
|
||||
## Managing a Fleet server
|
||||
|
||||
|
|
@ -19,6 +19,8 @@ We're prepared a brief guide to help you manage and maintain your Fleet server.
|
|||
|
||||
For more information, you can also read the [Configuring The Fleet Binary](./configuring-the-fleet-binary.md) guide for information on how to configure and customize Fleet for your organization.
|
||||
|
||||
Once the Fleet server is installed and configured, take a look at the [Monitoring & Alerting](./monitoring-alerting.md) documentation.
|
||||
|
||||
## Working with osquery logs
|
||||
|
||||
Fleet allows users to schedule queries, curate packs, and generate a lot of osquery logs. For more information on how you can access these logs as well as examples on what you can do with them, see the [Working With Osquery Logs](./working-with-osquery-logs.md) documentation.
|
||||
|
|
|
|||
|
|
@ -78,9 +78,7 @@ fleet prepare db \
|
|||
|
||||
## How do I monitor a Fleet server?
|
||||
|
||||
Fleet provides a `/healthz` endpoint. If you query it with `curl` it will return an HTTP Status code. `200 OK` means everything is alright. `500 Internal Server Error` means Fleet is having trouble communicating with MySQL or Redis. Check the Fleet logs for additional details.
|
||||
|
||||
The `/metrics` endpoint exposes data ready to be ingested by Prometheus.
|
||||
Fleet provides standard interfaces for monitoring and alerting. See the [Monitoring & Alerting](./monitoring-alerting.md) documentation for details.
|
||||
|
||||
## Why is the "Add User" button disabled?
|
||||
|
||||
|
|
|
|||
32
docs/infrastructure/monitoring-alerting.md
Normal file
32
docs/infrastructure/monitoring-alerting.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Monitoring Fleet
|
||||
|
||||
## Health Checks
|
||||
|
||||
Fleet exposes a basic health check at the `/healthz` endpoint. This is the interface to use for simple monitoring and load-balancer health checks.
|
||||
|
||||
The `/healthz` endpoint will return an `HTTP 200` status if the server is running and has healthy connections to MySQL and Redis. If there are any problems, the endpoint will return an `HTTP 500` status.
|
||||
|
||||
## Metrics
|
||||
|
||||
Fleet exposes server metrics in a format compatible with [Prometheus](https://prometheus.io/). A simple example Prometheus configuration is available in [tools/app/prometheus.yml](/tools/app/prometheus.yml).
|
||||
|
||||
Prometheus can be configured to use a wide range of service discovery mechanisms within AWS, GCP, Azure, Kubernetes, and more. See the Prometheus [configuration documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) for more information on configuring the
|
||||
|
||||
### Alerting
|
||||
|
||||
Prometheus has built-in support for alerting through [Alertmanager](https://prometheus.io/docs/alerting/latest/overview/).
|
||||
|
||||
Consider building alerts for
|
||||
|
||||
- Changes from expected levels of host enrollment
|
||||
- Increased latency on HTTP endpoints
|
||||
- Increased error levels on HTTP endpoints
|
||||
|
||||
```
|
||||
TODO (Seeking Contributors)
|
||||
Add example alerting configurations
|
||||
```
|
||||
|
||||
### Graphing
|
||||
|
||||
Prometheus provides basic graphing capabilities, and integrates tightly with [Grafana](https://prometheus.io/docs/visualization/grafana/) for sophisticated visualizations.
|
||||
|
|
@ -5,12 +5,12 @@ import (
|
|||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/fleetdm/fleet/server/config"
|
||||
"github.com/fleetdm/fleet/server/kolide"
|
||||
"github.com/go-kit/kit/endpoint"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
kithttp "github.com/go-kit/kit/transport/http"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/fleetdm/fleet/server/config"
|
||||
"github.com/fleetdm/fleet/server/kolide"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ type KolideEndpoints struct {
|
|||
StatusResultStore endpoint.Endpoint
|
||||
StatusLiveQuery endpoint.Endpoint
|
||||
ListCarves endpoint.Endpoint
|
||||
GetCarve endpoint.Endpoint
|
||||
GetCarve endpoint.Endpoint
|
||||
GetCarveBlock endpoint.Endpoint
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ func MakeKolideServerEndpoints(svc kolide.Service, jwtKey, urlPrefix string) Kol
|
|||
GetCertificate: authenticatedUser(jwtKey, svc, makeCertificateEndpoint(svc)),
|
||||
ChangeEmail: authenticatedUser(jwtKey, svc, makeChangeEmailEndpoint(svc)),
|
||||
ListCarves: authenticatedUser(jwtKey, svc, makeListCarvesEndpoint(svc)),
|
||||
GetCarve: authenticatedUser(jwtKey, svc, makeGetCarveEndpoint(svc)),
|
||||
GetCarve: authenticatedUser(jwtKey, svc, makeGetCarveEndpoint(svc)),
|
||||
GetCarveBlock: authenticatedUser(jwtKey, svc, makeGetCarveBlockEndpoint(svc)),
|
||||
|
||||
// Authenticated status endpoints
|
||||
|
|
@ -304,7 +304,7 @@ type kolideHandlers struct {
|
|||
StatusResultStore http.Handler
|
||||
StatusLiveQuery http.Handler
|
||||
ListCarves http.Handler
|
||||
GetCarve http.Handler
|
||||
GetCarve http.Handler
|
||||
GetCarveBlock http.Handler
|
||||
}
|
||||
|
||||
|
|
@ -399,8 +399,8 @@ func makeKolideKitHandlers(e KolideEndpoints, opts []kithttp.ServerOption) *koli
|
|||
StatusResultStore: newServer(e.StatusResultStore, decodeNoParamsRequest),
|
||||
StatusLiveQuery: newServer(e.StatusLiveQuery, decodeNoParamsRequest),
|
||||
ListCarves: newServer(e.ListCarves, decodeListCarvesRequest),
|
||||
GetCarve: newServer(e.GetCarve, decodeGetCarveRequest),
|
||||
GetCarveBlock: newServer(e.GetCarveBlock, decodeGetCarveBlockRequest),
|
||||
GetCarve: newServer(e.GetCarve, decodeGetCarveRequest),
|
||||
GetCarveBlock: newServer(e.GetCarveBlock, decodeGetCarveBlockRequest),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -423,12 +423,13 @@ func MakeHandler(svc kolide.Service, config config.KolideConfig, logger kitlog.L
|
|||
|
||||
r := mux.NewRouter()
|
||||
attachKolideAPIRoutes(r, kolideHandlers)
|
||||
addMetrics(r)
|
||||
|
||||
// Results endpoint is handled different due to websockets use
|
||||
r.PathPrefix("/api/v1/kolide/results/").
|
||||
Handler(makeStreamDistributedQueryCampaignResultsHandler(svc, config.Auth.JwtKey, logger)).
|
||||
Name("distributed_query_results")
|
||||
|
||||
addMetrics(r)
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
|
|
@ -439,7 +440,6 @@ func addMetrics(r *mux.Router) {
|
|||
return nil
|
||||
}
|
||||
r.Walk(walkFn)
|
||||
|
||||
}
|
||||
|
||||
func attachKolideAPIRoutes(r *mux.Router, h *kolideHandlers) {
|
||||
|
|
|
|||
8
tools/app/prometheus.yml
Normal file
8
tools/app/prometheus.yml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
scrape_configs:
|
||||
- job_name: fleet
|
||||
scheme: https
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['host.docker.internal:8080']
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
Loading…
Reference in a new issue