Update host details in distributed query ingestion (#274)

- Introduce a new pattern for defining/ingesting detail queries
- Add many relevant host details:
  - Platform
  - osquery Version
  - Memory
  - Hostname
  - UUID
  - OS Version
  - Uptime
  - Primary interface MAC
  - Primary interface IP
- Fix parsing for inconsistent JSON schema returned from osquery
- Tests
This commit is contained in:
Zachary Wasserman 2016-10-04 17:17:55 -07:00 committed by GitHub
parent 10dd855a77
commit 19f6eddfeb
7 changed files with 417 additions and 57 deletions

View file

@ -20,10 +20,11 @@ func (orm gormDB) EnrollHost(uuid, hostname, ip, platform string, nodeKeySize in
case gorm.ErrRecordNotFound:
// Create new Host
host = kolide.Host{
UUID: uuid,
HostName: hostname,
IPAddress: ip,
Platform: platform,
UUID: uuid,
HostName: hostname,
IPAddress: ip,
Platform: platform,
DetailUpdateTime: time.Unix(0, 0).Add(24 * time.Hour),
}
default:

View file

@ -79,7 +79,13 @@ func (orm *inmem) EnrollHost(uuid, hostname, ip, platform string, nodeKeySize in
return nil, errors.New("missing uuid for host enrollment")
}
host := kolide.Host{UUID: uuid}
host := kolide.Host{
UUID: uuid,
HostName: hostname,
IPAddress: ip,
Platform: platform,
DetailUpdateTime: time.Unix(0, 0).Add(24 * time.Hour),
}
for _, h := range orm.hosts {
if h.UUID == uuid {
host = *h

View file

@ -34,12 +34,19 @@ type HostPayload struct {
}
type Host struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time
UpdatedAt time.Time
NodeKey string `gorm:"unique_index:idx_host_unique_nodekey"`
HostName string
UUID string `gorm:"unique_index:idx_host_unique_uuid"`
IPAddress string
Platform string
ID uint `gorm:"primary_key"`
CreatedAt time.Time
UpdatedAt time.Time
DetailUpdateTime time.Time // Time that the host details were last updated
NodeKey string `gorm:"unique_index:idx_host_unique_nodekey"`
HostName string
UUID string `gorm:"unique_index:idx_host_unique_uuid"`
IPAddress string
Platform string
OsqueryVersion string
OSVersion string
Uptime time.Duration
PhysicalMemory int `sql:"type:bigint"`
PrimaryMAC string
PrimaryIP string
}

View file

@ -2,7 +2,11 @@ package service
import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
hostctx "github.com/kolide/kolide-ose/server/contexts/host"
"github.com/kolide/kolide-ose/server/errors"
@ -154,30 +158,131 @@ const hostLabelQueryPrefix = "kolide_label_query_"
// provided as a detail query.
const hostDetailQueryPrefix = "kolide_detail_query_"
// detailQueries defines the detail queries that should be run on the host, as
// well as how the results of those queries should be ingested into the
// kolide.Host data model. This map should not be modified at runtime.
var detailQueries = map[string]struct {
Query string
IngestFunc func(host *kolide.Host, rows []map[string]string) error
}{
"osquery_info": {
Query: "select * from osquery_info limit 1",
IngestFunc: func(host *kolide.Host, rows []map[string]string) error {
if len(rows) != 1 {
return osqueryError{
message: fmt.Sprintf("expected 1 row but got %d", len(rows)),
}
}
host.Platform = rows[0]["build_platform"]
host.OsqueryVersion = rows[0]["version"]
return nil
},
},
"system_info": {
Query: "select * from system_info limit 1",
IngestFunc: func(host *kolide.Host, rows []map[string]string) error {
if len(rows) != 1 {
return osqueryError{
message: fmt.Sprintf("expected 1 row but got %d", len(rows)),
}
}
var err error
host.PhysicalMemory, err = strconv.Atoi(rows[0]["physical_memory"])
if err != nil {
return err
}
host.HostName = rows[0]["hostname"]
host.UUID = rows[0]["uuid"]
return nil
},
},
"os_version": {
Query: "select * from os_version limit 1",
IngestFunc: func(host *kolide.Host, rows []map[string]string) error {
if len(rows) != 1 {
return osqueryError{
message: fmt.Sprintf("expected 1 row but got %d", len(rows)),
}
}
host.OSVersion = fmt.Sprintf(
"%s %s.%s.%s",
rows[0]["name"],
rows[0]["major"],
rows[0]["minor"],
rows[0]["patch"],
)
return nil
},
},
"uptime": {
Query: "select * from uptime limit 1",
IngestFunc: func(host *kolide.Host, rows []map[string]string) error {
if len(rows) != 1 {
return osqueryError{
message: fmt.Sprintf("expected 1 row but got %d", len(rows)),
}
}
uptimeSeconds, err := strconv.Atoi(rows[0]["total_seconds"])
if err != nil {
return err
}
host.Uptime = time.Duration(uptimeSeconds) * time.Second
return nil
},
},
"network_interface": {
Query: `select * from interface_details id join interface_addresses ia
on ia.interface = id.interface where broadcast != ""
order by (ibytes + obytes) desc limit 1`,
IngestFunc: func(host *kolide.Host, rows []map[string]string) error {
if len(rows) != 1 {
return osqueryError{
message: fmt.Sprintf("expected 1 row but got %d", len(rows)),
}
}
host.PrimaryMAC = rows[0]["mac"]
host.PrimaryIP = rows[0]["address"]
return nil
},
},
}
// detailUpdateInterval determines how often the detail queries should be
// updated
const detailUpdateInterval = 1 * time.Hour
// hostDetailQueries returns the map of queries that should be executed by
// osqueryd to fill in the host details
func hostDetailQueries(host kolide.Host) map[string]string {
func (svc service) hostDetailQueries(host kolide.Host) map[string]string {
queries := make(map[string]string)
if host.Platform == "" {
queries[hostDetailQueryPrefix+"platform"] = "select build_platform from osquery_info;"
if host.DetailUpdateTime.After(svc.clock.Now().Add(-detailUpdateInterval)) {
// No need to update already fresh details
return queries
}
for name, query := range detailQueries {
queries[hostDetailQueryPrefix+name] = query.Query
}
return queries
}
func (svc service) GetDistributedQueries(ctx context.Context) (map[string]string, error) {
queries := make(map[string]string)
host, ok := hostctx.FromContext(ctx)
if !ok {
return nil, osqueryError{message: "internal error: missing host from request context"}
}
queries = hostDetailQueries(host)
if len(queries) > 0 {
// If the host details need to be updated, we should do so
// before checking for any other queries
return queries, nil
}
queries := svc.hostDetailQueries(host)
// Retrieve the label queries that should be updated
cutoff := svc.clock.Now().Add(-svc.config.Osquery.LabelUpdateInterval)
@ -195,6 +300,66 @@ func (svc service) GetDistributedQueries(ctx context.Context) (map[string]string
return queries, nil
}
func (svc service) SubmitDistributedQueryResults(ctx context.Context, results kolide.OsqueryDistributedQueryResults) error {
// ingestDetailQuery takes the results of a detail query and modifies the
// provided kolide.Host appropriately.
func (svc service) ingestDetailQuery(host *kolide.Host, name string, rows []map[string]string) error {
trimmedQuery := strings.TrimPrefix(name, hostDetailQueryPrefix)
query, ok := detailQueries[trimmedQuery]
if !ok {
return osqueryError{message: "unknown detail query " + trimmedQuery}
}
err := query.IngestFunc(host, rows)
if err != nil {
return osqueryError{
message: fmt.Sprintf("ingesting query %s: %s", name, err.Error()),
}
}
return nil
}
func (svc service) ingestLabelQuery(host *kolide.Host, query string, rows []map[string]string) error {
trimmedQuery := strings.TrimPrefix(query, hostLabelQueryPrefix)
switch trimmedQuery {
}
return nil
}
func (svc service) SubmitDistributedQueryResults(ctx context.Context, results kolide.OsqueryDistributedQueryResults) error {
host, ok := hostctx.FromContext(ctx)
if !ok {
return osqueryError{message: "internal error: missing host from request context"}
}
err := svc.ds.MarkHostSeen(&host, svc.clock.Now())
if err != nil {
return osqueryError{message: "failed to update host seen: " + err.Error()}
}
for query, rows := range results {
switch {
case strings.HasPrefix(query, hostDetailQueryPrefix):
err = svc.ingestDetailQuery(&host, query, rows)
case strings.HasPrefix(query, hostLabelQueryPrefix):
err = svc.ingestLabelQuery(&host, query, rows)
default:
// TODO ingest regular distributed query results
}
if err != nil {
return osqueryError{message: "failed to ingest result: " + err.Error()}
}
}
host.DetailUpdateTime = svc.clock.Now()
err = svc.ds.SaveHost(&host)
if err != nil {
return osqueryError{message: "failed to update host details: " + err.Error()}
}
return nil
}

View file

@ -205,31 +205,36 @@ func TestSubmitResultLogs(t *testing.T) {
}
func TestHostDetailQueries(t *testing.T) {
mockClock := clock.NewMockClock()
host := kolide.Host{
ID: 1,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
NodeKey: "test_key",
HostName: "test_hostname",
UUID: "test_uuid",
ID: 1,
CreatedAt: mockClock.Now(),
UpdatedAt: mockClock.Now(),
DetailUpdateTime: mockClock.Now(),
NodeKey: "test_key",
HostName: "test_hostname",
UUID: "test_uuid",
}
queries := hostDetailQueries(host)
assert.Len(t, queries, 1)
if assert.Contains(t, queries, "kolide_detail_query_platform") {
assert.Equal(t,
"select build_platform from osquery_info;",
queries["kolide_detail_query_platform"],
svc := service{clock: mockClock}
queries := svc.hostDetailQueries(host)
assert.Empty(t, queries)
// Advance the time
mockClock.AddTime(1*time.Hour + 1*time.Minute)
queries = svc.hostDetailQueries(host)
assert.Len(t, queries, len(detailQueries))
for name, _ := range queries {
assert.True(t,
strings.HasPrefix(name, hostDetailQueryPrefix),
fmt.Sprintf("%s not prefixed with %s", name, hostDetailQueryPrefix),
)
}
host.Platform = "test_platform"
queries = hostDetailQueries(host)
assert.Len(t, queries, 0)
}
func TestGetDistributedQueries(t *testing.T) {
func TestLabelQueries(t *testing.T) {
ds, err := datastore.New("gorm-sqlite3", ":memory:")
assert.Nil(t, err)
@ -250,29 +255,22 @@ func TestGetDistributedQueries(t *testing.T) {
ctx = hostctx.NewContext(ctx, *host)
// With no platform set, we should get the details query
// With a new host, we should get the detail queries
queries, err := svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
assert.Len(t, queries, 1)
if assert.Contains(t, queries, "kolide_detail_query_platform") {
assert.Equal(t,
"select build_platform from osquery_info;",
queries["kolide_detail_query_platform"],
)
}
assert.Len(t, queries, len(detailQueries))
// Simulate the detail queries being added
host.DetailUpdateTime = mockClock.Now().Add(-1 * time.Minute)
host.Platform = "darwin"
ds.SaveHost(host)
ctx = hostctx.NewContext(ctx, *host)
// With the platform set, we should get the label queries (but none
// exist yet)
queries, err = svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
assert.Len(t, queries, 0)
// Add some queries and labels to ensure they are returned
labelQueries := []*kolide.Query{
&kolide.Query{
ID: 1,
@ -353,6 +351,11 @@ func TestGetDistributedQueries(t *testing.T) {
// Advance the time
mockClock.AddTime(1*time.Hour + 1*time.Minute)
// Keep the host details fresh
host.DetailUpdateTime = mockClock.Now().Add(-1 * time.Minute)
ds.SaveHost(host)
ctx = hostctx.NewContext(ctx, *host)
// Now we should get all the label queries again
queries, err = svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
@ -461,3 +464,145 @@ func TestGetClientConfig(t *testing.T) {
assert.Len(t, config.Packs, 1)
assert.Len(t, config.Packs["monitoring"].Queries, 1)
}
func TestDetailQueries(t *testing.T) {
ds, err := datastore.New("gorm-sqlite3", ":memory:")
assert.Nil(t, err)
mockClock := clock.NewMockClock()
svc, err := newTestServiceWithClock(ds, mockClock)
assert.Nil(t, err)
ctx := context.Background()
nodeKey, err := svc.EnrollAgent(ctx, "", "host123")
assert.Nil(t, err)
host, err := ds.AuthenticateHost(nodeKey)
require.Nil(t, err)
ctx = hostctx.NewContext(ctx, *host)
// With a new host, we should get the detail queries
queries, err := svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
assert.Len(t, queries, len(detailQueries))
resultJSON := `
{
"kolide_detail_query_network_interface": [
{
"address": "192.168.0.1",
"broadcast": "192.168.0.255",
"ibytes": "1601207629",
"ierrors": "0",
"interface": "en0",
"ipackets": "25698094",
"last_change": "1474233476",
"mac": "5f:3d:4b:10:25:82",
"mask": "255.255.255.0",
"metric": "0",
"mtu": "1453",
"obytes": "2607283152",
"oerrors": "0",
"opackets": "12264603",
"point_to_point": "",
"type": "6"
}
],
"kolide_detail_query_os_version": [
{
"build": "15G1004",
"major": "10",
"minor": "10",
"name": "Mac OS X",
"patch": "6"
}
],
"kolide_detail_query_osquery_info": [
{
"build_distro": "10.10",
"build_platform": "darwin",
"config_hash": "3c6e4537c4d0eb71a7c6dda19d",
"config_valid": "1",
"extensions": "active",
"pid": "38113",
"start_time": "1475603155",
"version": "1.8.2",
"watcher": "38112"
}
],
"kolide_detail_query_system_info": [
{
"computer_name": "computer",
"cpu_brand": "Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz",
"cpu_logical_cores": "8",
"cpu_physical_cores": "4",
"cpu_subtype": "Intel x86-64h Haswell",
"cpu_type": "x86_64h",
"hardware_model": "MacBookPro11,4",
"hardware_serial": "ABCDEFGH",
"hardware_vendor": "Apple Inc.",
"hardware_version": "1.0",
"hostname": "computer.local",
"physical_memory": "17179869184",
"uuid": "uuid"
}
],
"kolide_detail_query_uptime": [
{
"days": "20",
"hours": "0",
"minutes": "48",
"seconds": "13",
"total_seconds": "1730893"
}
]
}
`
var results kolide.OsqueryDistributedQueryResults
err = json.Unmarshal([]byte(resultJSON), &results)
require.Nil(t, err)
// Verify that results are ingested properly
svc.SubmitDistributedQueryResults(ctx, results)
// Make sure the result saved to the datastore
host, err = ds.AuthenticateHost(nodeKey)
require.Nil(t, err)
// osquery_info
assert.Equal(t, "darwin", host.Platform)
assert.Equal(t, "1.8.2", host.OsqueryVersion)
// system_info
assert.Equal(t, 17179869184, host.PhysicalMemory)
assert.Equal(t, "computer.local", host.HostName)
assert.Equal(t, "uuid", host.UUID)
// os_version
assert.Equal(t, "Mac OS X 10.10.6", host.OSVersion)
// uptime
assert.Equal(t, 1730893*time.Second, host.Uptime)
// network_interface
assert.Equal(t, "5f:3d:4b:10:25:82", host.PrimaryMAC)
assert.Equal(t, "192.168.0.1", host.PrimaryIP)
ctx = hostctx.NewContext(ctx, *host)
// Now no detail queries should be required
queries, err = svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
assert.Len(t, queries, 0)
// Advance clock and queries should exist again
mockClock.AddTime(1*time.Hour + 1*time.Minute)
queries, err = svc.GetDistributedQueries(ctx)
assert.Nil(t, err)
assert.Len(t, queries, len(detailQueries))
}

View file

@ -4,6 +4,8 @@ import (
"encoding/json"
"net/http"
"github.com/kolide/kolide-ose/server/kolide"
"golang.org/x/net/context"
)
@ -35,11 +37,41 @@ func decodeGetDistributedQueriesRequest(ctx context.Context, r *http.Request) (i
}
func decodeSubmitDistributedQueryResultsRequest(ctx context.Context, r *http.Request) (interface{}, error) {
var req submitDistributedQueryResultsRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
// When a distributed query has no results, the JSON schema is
// inconsistent, so we use this shim and massage into a consistent
// schema. For example (simplified from actual osqueryd 1.8.2 output):
// {
// "queries": {
// "query_with_no_results": "", // <- Note string instead of array
// "query_with_results": [{"foo":"bar","baz":"bang"}]
// },
// "node_key":"IGXCXknWQ1baTa8TZ6rF3kAPZ4\/aTsui"
// }
type distributedQueryResultsShim struct {
NodeKey string `json:"node_key"`
Results map[string]json.RawMessage `json:"queries"`
}
var shim distributedQueryResultsShim
if err := json.NewDecoder(r.Body).Decode(&shim); err != nil {
return nil, err
}
results := kolide.OsqueryDistributedQueryResults{}
for query, raw := range shim.Results {
queryResults := []map[string]string{}
// No need to handle error because the empty array is what we
// want if there was an error parsing the JSON (the error
// indicates that osquery sent us incosistently schemaed JSON)
_ = json.Unmarshal(raw, &queryResults)
results[query] = queryResults
}
req := submitDistributedQueryResultsRequest{
NodeKey: shim.NodeKey,
Results: results,
}
return req, nil
}

View file

@ -93,9 +93,12 @@ func TestDecodeSubmitDistributedQueryResultsRequest(t *testing.T) {
"id2": {
{"col3": "val5", "col4": "val6"},
},
"id3": {},
}, params.Results)
}).Methods("POST")
// Note we explicitly test the case that requires using the shim
// because of the inconsistent JSON schema
var body bytes.Buffer
body.Write([]byte(`{
"node_key": "key",
@ -106,7 +109,8 @@ func TestDecodeSubmitDistributedQueryResultsRequest(t *testing.T) {
],
"id2": [
{"col3": "val5", "col4": "val6"}
]
],
"id3": ""
}
}`))