refactor vuln process command (#10088)

propagate error and properly defer store unlock
This commit is contained in:
Benjamin Edwards 2023-02-23 23:22:36 -05:00 committed by GitHub
parent 590822115e
commit 6f8abe3bc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"github.com/fleetdm/fleet/v4/server"
"github.com/fleetdm/fleet/v4/server/contexts/license"
"os"
"time"
@ -34,18 +33,18 @@ vulnerability processing. By default the Fleet server command internally manages
'cron' style jobs, but setting 'vulnerabilities.disable_schedule=true' or 'FLEET_VULNERABILITIES_DISABLE_SCHEDULE=true'
will disable it on the server allowing the user configure their own 'cron' mechanism. Successful processing will be indicated
by an exit code of zero.`,
Run: func(cmd *cobra.Command, args []string) {
RunE: func(cmd *cobra.Command, args []string) (err error) {
cfg := configManager.LoadConfig()
if dev {
applyDevFlags(&cfg)
}
logger := initLogger(cfg)
logger = kitlog.With(logger, fleet.CronVulnerabilities)
licenseInfo, err := initLicense(cfg, devLicense, devExpiredLicense)
if err != nil {
initFatal(
err,
"failed to load license - for help use https://fleetdm.com/contact",
)
return err
}
if licenseInfo != nil && licenseInfo.IsPremium() && licenseInfo.IsExpired() {
@ -54,13 +53,13 @@ by an exit code of zero.`,
ds, err := mysql.New(cfg.Mysql, clock.C)
if err != nil {
initFatal(err, "creating db connection")
return err
}
// we need to ensure this command isn't running with an out-of-date database
status, err := ds.MigrationStatus(cmd.Context())
if err != nil {
initFatal(err, "retrieving migration status")
return err
}
var migrationError error
@ -75,59 +74,53 @@ by an exit code of zero.`,
migrationError = errors.New("database migrations incompatible with current version")
}
if migrationError != nil {
initFatal(migrationError, "refusing to continue processing vulnerabilities, database out of sync")
return fmt.Errorf("refusing to continue processing vulnerabilities err: %w", migrationError)
}
ctx, cancel := context.WithTimeout(context.Background(), lockDuration)
ctx, cancel := context.WithTimeout(cmd.Context(), lockDuration)
defer cancel()
ctx = license.NewContext(ctx, licenseInfo)
instanceID, err := server.GenerateRandomText(64)
if err != nil {
initFatal(errors.New("error generating random instance identifier"), "")
}
// using the same lock name as the cron scheduled version of vuln processing, that way if we fail to obtain the lock
// it's most likely due to vulnerabilities.disable_schedule=false but still trying to run external vuln processing command
lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), instanceID, lockDuration)
lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command", lockDuration)
if err != nil {
initFatal(err, "failed to obtain vuln processing lock")
return fmt.Errorf("failed to obtain vuln processing lock: %w", err)
}
if !lock {
initFatal(errors.New("vulnerabilities processing locked"),
"failed to obtain vuln processing lock, something else still has lock ownership")
return errors.New("vulnerabilities processing locked")
}
defer func() {
err = ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command")
if err != nil {
initFatal(err, "failed to release vuln processing lock")
uerr := ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command")
if uerr != nil {
err = fmt.Errorf("failed to release vulnerability processing lock: %w", uerr)
}
cancel()
}()
logger := initLogger(cfg)
logger = kitlog.With(logger, fleet.CronVulnerabilities)
appConfig, err := ds.AppConfig(ctx)
if err != nil {
initFatal(err, "error fetching app config during vulnerability processing")
return err
}
vulnConfig := cfg.Vulnerabilities
vulnPath := configureVulnPath(vulnConfig, appConfig, logger)
// this really shouldn't ever be empty string since it's defaulted, but could be due to some misconfiguration
// we'll throw an error here since the entire point of this command is to process vulnerabilities
if vulnPath == "" {
initFatal(errors.New("vuln path empty, check environment variables or app config yml"), "error during vulnerability processing")
return errors.New("vuln path empty, check environment variables or app config yml")
}
level.Info(logger).Log("msg", "scanning vulnerabilities")
start := time.Now()
err = scanVulnerabilities(ctx, ds, logger, &vulnConfig, appConfig, vulnPath)
if err != nil {
// errors during vuln processing should bubble up, so you know the job is failing without having to scour logs, e.g. non-zero exit code
initFatal(fmt.Errorf("scanning vulnerabilities: %w", err), "error during vulnerability processing")
return fmt.Errorf("scanning vulnerabilities err: %w", err)
}
err = ds.SyncHostsSoftware(ctx, time.Now())
if err != nil {
// though vulnerability processing succeeded, we'll still fatally error here to indicate there was a problem
initFatal(err, "failed to sync host software")
return fmt.Errorf("sync hosts software err: %w", err)
}
level.Info(logger).Log("msg", "vulnerability processing finished", "took", time.Now().Sub(start))
return
},
@ -140,6 +133,7 @@ by an exit code of zero.`,
"lock_duration",
time.Second*60*60,
"the duration (https://pkg.go.dev/time#ParseDuration) the lock should be obtained, ideally this duration is less than the interval in which the job runs (defaults to 60m). If vuln processing isn't finished before this duration the command will exit with a non-zero status code.")
vulnProcessingCmd.SilenceUsage = true
return vulnProcessingCmd
}