mirror of
https://github.com/fleetdm/fleet
synced 2026-05-22 16:39:01 +00:00
Add host enrollment cooldown period (#112)
This addresses an issue some users experienced in which performance problems were encountered when hosts were "competing" for enrollment using the same osquery host identifier. The issue is addressed by adding a cooldown period for host enrollment, preventing the same (as judged by osquery host identifier) host from enrolling more than once per minute. When users end up in the problematic scenario, they will see quite a bit of error logs due to this issue. For now that's probably a good thing as users need to be aware of the lack of visibility. We can explore rate limiting the logging if that becomes an issue for someone. Fixes #102
This commit is contained in:
parent
3ce7351049
commit
beb7e8b965
7 changed files with 121 additions and 41 deletions
1
go.mod
1
go.mod
|
|
@ -48,6 +48,7 @@ require (
|
|||
github.com/patrickmn/sortutil v0.0.0-20120526081524-abeda66eb583
|
||||
github.com/pelletier/go-toml v1.1.0 // indirect
|
||||
github.com/pkg/errors v0.8.1
|
||||
github.com/pressly/goose v2.6.0+incompatible
|
||||
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829
|
||||
github.com/russellhaering/gosaml2 v0.3.1
|
||||
github.com/russellhaering/goxmldsig v0.0.0-20180430223755-7acd5e4a6ef7
|
||||
|
|
|
|||
2
go.sum
2
go.sum
|
|
@ -166,6 +166,8 @@ github.com/pkg/term v0.0.0-20190109203006-aa71e9d9e942 h1:A7GG7zcGjl3jqAqGPmcNjd
|
|||
github.com/pkg/term v0.0.0-20190109203006-aa71e9d9e942/go.mod h1:eCbImbZ95eXtAUIbLAuAVnBnwf83mjf6QIVH8SHYwqQ=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pressly/goose v2.6.0+incompatible h1:3f8zIQ8rfgP9tyI0Hmcs2YNAqUCL1c+diLe3iU8Qd/k=
|
||||
github.com/pressly/goose v2.6.0+incompatible/go.mod h1:m+QHWCqxR3k8D9l7qfzuC/djtlfzxr34mozWDYEu1z8=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 h1:D+CiwcpGTW6pL6bv6KI3KbyEyCKyS+1JWS2h8PNDnGA=
|
||||
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs=
|
||||
|
|
|
|||
|
|
@ -229,14 +229,18 @@ func testListHostsStatus(t *testing.T, ds kolide.Datastore) {
|
|||
|
||||
func testEnrollHost(t *testing.T, ds kolide.Datastore) {
|
||||
test.AddAllHostsLabel(t, ds)
|
||||
var hosts []*kolide.Host
|
||||
enrollSecretName := "default"
|
||||
for _, tt := range enrollTests {
|
||||
h, err := ds.EnrollHost(tt.uuid, tt.nodeKey, "default")
|
||||
h, err := ds.EnrollHost(tt.uuid, tt.nodeKey, enrollSecretName)
|
||||
require.Nil(t, err)
|
||||
|
||||
hosts = append(hosts, h)
|
||||
assert.Equal(t, tt.uuid, h.OsqueryHostID)
|
||||
assert.NotEmpty(t, h.NodeKey)
|
||||
assert.Equal(t, tt.nodeKey, h.NodeKey)
|
||||
assert.Equal(t, enrollSecretName, h.EnrollSecretName)
|
||||
|
||||
// This host should not be allowed to re-enroll immediately
|
||||
_, err = ds.EnrollHost(tt.uuid, tt.nodeKey+"new", enrollSecretName+"new")
|
||||
require.Error(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"github.com/fleetdm/fleet/server/kolide"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
|
|
@ -287,44 +288,74 @@ func (d *Datastore) EnrollHost(osqueryHostID, nodeKey, secretName string) (*koli
|
|||
return nil, fmt.Errorf("missing osquery host identifier")
|
||||
}
|
||||
|
||||
zeroTime := time.Unix(0, 0).Add(24 * time.Hour)
|
||||
sqlInsert := `
|
||||
INSERT INTO hosts (
|
||||
detail_update_time,
|
||||
label_update_time,
|
||||
osquery_host_id,
|
||||
seen_time,
|
||||
node_key,
|
||||
enroll_secret_name
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
node_key = VALUES(node_key)
|
||||
`
|
||||
var host kolide.Host
|
||||
err := d.withRetryTxx(func(tx *sqlx.Tx) error {
|
||||
zeroTime := time.Unix(0, 0).Add(24 * time.Hour)
|
||||
|
||||
var result sql.Result
|
||||
result, err := d.db.Exec(sqlInsert, zeroTime, zeroTime, osqueryHostID, time.Now().UTC(), nodeKey, secretName)
|
||||
var id int64
|
||||
err := tx.Get(&host, `SELECT id, last_enroll_time FROM hosts WHERE osquery_host_id = ?`, osqueryHostID)
|
||||
if err != nil {
|
||||
// Create new host record
|
||||
sqlInsert := `
|
||||
INSERT INTO hosts (
|
||||
detail_update_time,
|
||||
label_update_time,
|
||||
osquery_host_id,
|
||||
seen_time,
|
||||
node_key,
|
||||
enroll_secret_name
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
result, err := tx.Exec(sqlInsert, zeroTime, zeroTime, osqueryHostID, time.Now().UTC(), nodeKey, secretName)
|
||||
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "insert host")
|
||||
}
|
||||
|
||||
id, _ = result.LastInsertId()
|
||||
} else {
|
||||
// Prevent hosts from enrolling too often with the same identifier.
|
||||
// Prior to adding this we saw many hosts (probably VMs) with the
|
||||
// same identifier competing for enrollment and causing perf issues.
|
||||
if time.Since(host.LastEnrollTime) < kolide.HostEnrollCooldown {
|
||||
return backoff.Permanent(fmt.Errorf("host identified by %s enrolling too often", osqueryHostID))
|
||||
}
|
||||
id = int64(host.ID)
|
||||
// Update existing host record
|
||||
sqlUpdate := `
|
||||
UPDATE hosts
|
||||
SET node_key = ?,
|
||||
enroll_secret_name = ?,
|
||||
last_enroll_time = NOW()
|
||||
WHERE osquery_host_id = ?
|
||||
`
|
||||
_, err := tx.Exec(sqlUpdate, nodeKey, secretName, osqueryHostID)
|
||||
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "update host")
|
||||
}
|
||||
}
|
||||
|
||||
sqlSelect := `
|
||||
SELECT * FROM hosts WHERE id = ? LIMIT 1
|
||||
`
|
||||
err = tx.Get(&host, sqlSelect, id)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "getting the host to return")
|
||||
}
|
||||
|
||||
_, err = tx.Exec(`INSERT IGNORE INTO label_membership (host_id, label_id) VALUES (?, (SELECT id FROM labels WHERE name = 'All Hosts' AND label_type = 1))`, id)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "insert new host into all hosts label")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "inserting")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
id, _ := result.LastInsertId()
|
||||
sqlSelect := `
|
||||
SELECT * FROM hosts WHERE id = ? LIMIT 1
|
||||
`
|
||||
host := &kolide.Host{}
|
||||
err = d.db.Get(host, sqlSelect, id)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "getting the host to return")
|
||||
}
|
||||
|
||||
_, err = d.db.Exec(`INSERT IGNORE INTO label_membership (host_id, label_id) VALUES (?, (SELECT id FROM labels WHERE name = 'All Hosts' AND label_type = 1))`, id)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "insert new host into all hosts label")
|
||||
}
|
||||
|
||||
return host, nil
|
||||
|
||||
return &host, nil
|
||||
}
|
||||
|
||||
func (d *Datastore) AuthenticateHost(nodeKey string) (*kolide.Host, error) {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ func Up_20200504120000(tx *sql.Tx) error {
|
|||
"ADD COLUMN `additional` JSON DEFAULT NULL;",
|
||||
)
|
||||
if err != nil {
|
||||
errors.Wrap(err, "add additional column")
|
||||
return errors.Wrap(err, "add additional column")
|
||||
}
|
||||
|
||||
_, err = tx.Exec(
|
||||
|
|
@ -24,7 +24,7 @@ func Up_20200504120000(tx *sql.Tx) error {
|
|||
"ADD COLUMN `additional_queries` JSON DEFAULT NULL;",
|
||||
)
|
||||
if err != nil {
|
||||
errors.Wrap(err, "add additional_queries column")
|
||||
return errors.Wrap(err, "add additional_queries column")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
package tables
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func init() {
|
||||
MigrationClient.AddMigration(Up_20201208121729, Down_20201208121729)
|
||||
}
|
||||
|
||||
func Up_20201208121729(tx *sql.Tx) error {
|
||||
_, err := tx.Exec(
|
||||
"ALTER TABLE `hosts` " +
|
||||
"ADD COLUMN `last_enroll_time` TIMESTAMP DEFAULT CURRENT_TIMESTAMP;",
|
||||
)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "add last_enroll_time column")
|
||||
}
|
||||
|
||||
_, err = tx.Exec("UPDATE hosts SET last_enroll_time = created_at")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "set last_enroll_time")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Down_20201208121729(tx *sql.Tx) error {
|
||||
return nil
|
||||
}
|
||||
|
|
@ -36,6 +36,12 @@ const (
|
|||
// online interval to avoid flapping of hosts that check in a bit later
|
||||
// than their expected checkin interval.
|
||||
OnlineIntervalBuffer = 30
|
||||
|
||||
// HostEnrollCooldown is how often a host can enroll. Users sometimes deploy
|
||||
// osquery in a configuration in which multiple hosts use the same host
|
||||
// identifier and Fleet needs to have a cooldown period to prevent this from
|
||||
// cascading into out of control host enrollment.
|
||||
HostEnrollCooldown = 1 * time.Minute
|
||||
)
|
||||
|
||||
type HostStore interface {
|
||||
|
|
@ -45,6 +51,9 @@ type HostStore interface {
|
|||
SaveHost(host *Host) error
|
||||
DeleteHost(hid uint) error
|
||||
Host(id uint) (*Host, error)
|
||||
// EnrollHost will enroll a new host with the given identifier, setting the
|
||||
// node key, and enroll secret used for the host. Implementations of this
|
||||
// method should respect HostEnrollCooldown.
|
||||
EnrollHost(osqueryHostId, nodeKey, secretName string) (*Host, error)
|
||||
ListHosts(opt HostListOptions) ([]*Host, error)
|
||||
// AuthenticateHost authenticates and returns host metadata by node key.
|
||||
|
|
@ -100,6 +109,7 @@ type Host struct {
|
|||
OsqueryHostID string `json:"-" db:"osquery_host_id"`
|
||||
DetailUpdateTime time.Time `json:"detail_updated_at" db:"detail_update_time"` // Time that the host details were last updated
|
||||
LabelUpdateTime time.Time `json:"label_updated_at" db:"label_update_time"` // Time that the host details were last updated
|
||||
LastEnrollTime time.Time `json:"last_enrolled_at" db:"last_enroll_time"` // Time that the host last enrolled
|
||||
SeenTime time.Time `json:"seen_time" db:"seen_time"` // Time that the host was last "seen"
|
||||
NodeKey string `json:"-" db:"node_key"`
|
||||
HostName string `json:"hostname" db:"host_name"` // there is a fulltext index on this field
|
||||
|
|
|
|||
Loading…
Reference in a new issue