From 014ff490d02a5bf49a98a6f823a252d8f0a8907f Mon Sep 17 00:00:00 2001 From: Sharon Katz <121527325+sharon-fdm@users.noreply.github.com> Date: Fri, 10 Nov 2023 10:06:54 -0500 Subject: [PATCH] arch docs (#13382) Tech docs --- docs/Contributing/high-level-architecture.md | 192 +++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 docs/Contributing/high-level-architecture.md diff --git a/docs/Contributing/high-level-architecture.md b/docs/Contributing/high-level-architecture.md new file mode 100644 index 0000000000..8cfe7c3183 --- /dev/null +++ b/docs/Contributing/high-level-architecture.md @@ -0,0 +1,192 @@ +# High level architecture + +- [Overview](#overview) +- [Main System Components](#main-system-components) + +## Overview + +Add text + +## Main System Components + +```mermaid +graph LR; + + subgraph Development + fleet_release_owner[Fleet Release
Owner]; + end + + subgraph Agent + orbit[orbit]; + desktop[Fleet Desktop
Tray App]; + osqueryd[osqueryd]; + desktop_browser[Fleet Desktop
from Browser]; + end + + subgraph Customer Cloud + fleet_server[Fleet
Server]; + db[DB]; + redis[Redis
Live queries' results
go here]; + prometheus[Prometheus Server]; + end + + subgraph FleetDM Cloud + tuf["TUF file server
(default: tuf.fleetctl.com)"]; + datadog[DataDog metrics] + heroku[Usage Analytics
Heroku] + log[Send logs to optional
external location] + end + + subgraph Customer Admin + frontend[API user UI or other] + end + + + fleet_release_owner -- "Release Process" --> tuf; + + orbit -- "Fleet Orbit API (TLS)" --> fleet_server; + orbit -- "Auto Update (TLS)" --> tuf; + desktop -- "Fleet Desktop API (TLS)" --> fleet_server; + osqueryd -- "osquery
remote API (TLS)" --> fleet_server; + desktop_browser -- "My Device API (TLS)" --> fleet_server; + + heroku -- "Metrics from all customers" --> datadog; + + fleet_server <== "Read/Write" ==> db; + fleet_server <== "Read/Write" ==> redis; + redis <==> db; + + prometheus ==> fleet_server; + fleet_server -- "metrics" --> heroku; + fleet_server -- "queries results" --> log; + + frontend <== "API" ==> fleet_server; + +``` + + + +## The path of Live Query + +### 1 - Fleet User initiates the query +```mermaid +graph LR; + it_person[Fleet User
Starts a live query]; + api[API Client Frontend or Fleetctl]; + + subgraph Cloud + server(Server); + dbredis[DB / Redis]; + end + + it_person --> api; + api --> it_person; + + api <-- "1 - Initiate Campaign. Get ID" --> server; + api <-- "2 - Register to notifications with ID" --> server; + api <-- "WEB SOCKET" --> server; + server <-- Notifications --> dbredis; + +``` + +### 2 - Agent returns results +```mermaid +graph LR; + osquery[osquery agent]; + + subgraph Cloud + server(Server); + dbredis[DB / Redis]; + end + + osquery -- 1 ask for queries --> server; + osquery -- 2 return results --> server; + + server <-- 1 return queries if found --> dbredis; + server -- 2 put results in Redis --> dbredis; + +``` + +## The path of a scheduled Query + +### 1 - Fleet User initiates the query +```mermaid +graph LR; + it_person[Fleet User
Creates a scheduled
for a team / global]; + api[API Client Frontend or Fleetctl]; + + subgraph Cloud + server(Server); + db[DB]; + end + + it_person --> api; + api --> server; + server -- Query stored in DB--> db; +``` +### 2 - Agent gets config file (with the scheduled query) +```mermaid +graph LR; + agent[Osquery Agent]; + + subgraph Cloud + server(Server); + db[DB]; + end + + agent -- request download config file --> server; + agent <-- teams and global cfg are merged --> server; + server -- ask for cfg file--> db; +``` + +### 3 - Agent returns results to be (optionally) logged +```mermaid +graph LR; + agent[Osquery Agent
Runs query and sends results]; + + subgraph Cloud + server(Server); + log[Optional External Log
e.g. S3]; + end + + agent --> server; + server --> log; +``` + + +## Agent config options +1 - Config TLS refresh +(Typical period 10 secs) OSQuery pulls down a config file that includes instructions for Scheduled Queries. +If both GLOBAL and TEAM is configured, there will be a config merge done on the Server side. + +2 - Logger TLS +(Typical period10 secs) Frequency of sending the results. (different than the frequency of running the queries) +To be improved: Currently the config file gets downloaded every time even if no change was done. + +3 - Distributed (Typical interval 10 sec) +(Typical period10 secs) OSQuery asks for any Live query to run. + + +## Vulnerability dashboard +Typically hosted on our Heroku servers. +Could be hosted on customer servers. +Uses the Fleet server Token to access specific APIs that give information about vulnerability +status. + +### Vuln dashboard hosted by FleetDM +```mermaid +graph LR; + frontend[Frontend on web browser]; + + subgraph Customer Cloud + fleetServer(Fleet Server); + end + + subgraph Heroku Cloud + vulnServer(Vuln Web Server); + end + + frontend --> vulnServer; + vulnServer --> fleetServer; +``` +