From 6f8abe3bc31c8ded9d6a52b319f0a999ddc8f5c5 Mon Sep 17 00:00:00 2001 From: Benjamin Edwards Date: Thu, 23 Feb 2023 23:22:36 -0500 Subject: [PATCH] refactor vuln process command (#10088) propagate error and properly defer store unlock --- cmd/fleet/vuln_process.go | 54 +++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/cmd/fleet/vuln_process.go b/cmd/fleet/vuln_process.go index e4c284189b..c6b46339ed 100644 --- a/cmd/fleet/vuln_process.go +++ b/cmd/fleet/vuln_process.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "github.com/fleetdm/fleet/v4/server" "github.com/fleetdm/fleet/v4/server/contexts/license" "os" "time" @@ -34,18 +33,18 @@ vulnerability processing. By default the Fleet server command internally manages 'cron' style jobs, but setting 'vulnerabilities.disable_schedule=true' or 'FLEET_VULNERABILITIES_DISABLE_SCHEDULE=true' will disable it on the server allowing the user configure their own 'cron' mechanism. Successful processing will be indicated by an exit code of zero.`, - Run: func(cmd *cobra.Command, args []string) { + RunE: func(cmd *cobra.Command, args []string) (err error) { cfg := configManager.LoadConfig() if dev { applyDevFlags(&cfg) } + logger := initLogger(cfg) + logger = kitlog.With(logger, fleet.CronVulnerabilities) + licenseInfo, err := initLicense(cfg, devLicense, devExpiredLicense) if err != nil { - initFatal( - err, - "failed to load license - for help use https://fleetdm.com/contact", - ) + return err } if licenseInfo != nil && licenseInfo.IsPremium() && licenseInfo.IsExpired() { @@ -54,13 +53,13 @@ by an exit code of zero.`, ds, err := mysql.New(cfg.Mysql, clock.C) if err != nil { - initFatal(err, "creating db connection") + return err } // we need to ensure this command isn't running with an out-of-date database status, err := ds.MigrationStatus(cmd.Context()) if err != nil { - initFatal(err, "retrieving migration status") + return err } var migrationError error @@ -75,59 +74,53 @@ by an exit code of zero.`, migrationError = errors.New("database migrations incompatible with current version") } if migrationError != nil { - initFatal(migrationError, "refusing to continue processing vulnerabilities, database out of sync") + return fmt.Errorf("refusing to continue processing vulnerabilities err: %w", migrationError) } - ctx, cancel := context.WithTimeout(context.Background(), lockDuration) + ctx, cancel := context.WithTimeout(cmd.Context(), lockDuration) + defer cancel() ctx = license.NewContext(ctx, licenseInfo) - instanceID, err := server.GenerateRandomText(64) - if err != nil { - initFatal(errors.New("error generating random instance identifier"), "") - } // using the same lock name as the cron scheduled version of vuln processing, that way if we fail to obtain the lock // it's most likely due to vulnerabilities.disable_schedule=false but still trying to run external vuln processing command - lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), instanceID, lockDuration) + lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command", lockDuration) if err != nil { - initFatal(err, "failed to obtain vuln processing lock") + return fmt.Errorf("failed to obtain vuln processing lock: %w", err) } if !lock { - initFatal(errors.New("vulnerabilities processing locked"), - "failed to obtain vuln processing lock, something else still has lock ownership") + return errors.New("vulnerabilities processing locked") } + defer func() { - err = ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command") - if err != nil { - initFatal(err, "failed to release vuln processing lock") + uerr := ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command") + if uerr != nil { + err = fmt.Errorf("failed to release vulnerability processing lock: %w", uerr) } - cancel() }() - - logger := initLogger(cfg) - logger = kitlog.With(logger, fleet.CronVulnerabilities) - appConfig, err := ds.AppConfig(ctx) if err != nil { - initFatal(err, "error fetching app config during vulnerability processing") + return err } vulnConfig := cfg.Vulnerabilities vulnPath := configureVulnPath(vulnConfig, appConfig, logger) // this really shouldn't ever be empty string since it's defaulted, but could be due to some misconfiguration // we'll throw an error here since the entire point of this command is to process vulnerabilities if vulnPath == "" { - initFatal(errors.New("vuln path empty, check environment variables or app config yml"), "error during vulnerability processing") + return errors.New("vuln path empty, check environment variables or app config yml") } level.Info(logger).Log("msg", "scanning vulnerabilities") + start := time.Now() err = scanVulnerabilities(ctx, ds, logger, &vulnConfig, appConfig, vulnPath) if err != nil { // errors during vuln processing should bubble up, so you know the job is failing without having to scour logs, e.g. non-zero exit code - initFatal(fmt.Errorf("scanning vulnerabilities: %w", err), "error during vulnerability processing") + return fmt.Errorf("scanning vulnerabilities err: %w", err) } err = ds.SyncHostsSoftware(ctx, time.Now()) if err != nil { // though vulnerability processing succeeded, we'll still fatally error here to indicate there was a problem - initFatal(err, "failed to sync host software") + return fmt.Errorf("sync hosts software err: %w", err) } + level.Info(logger).Log("msg", "vulnerability processing finished", "took", time.Now().Sub(start)) return }, @@ -140,6 +133,7 @@ by an exit code of zero.`, "lock_duration", time.Second*60*60, "the duration (https://pkg.go.dev/time#ParseDuration) the lock should be obtained, ideally this duration is less than the interval in which the job runs (defaults to 60m). If vuln processing isn't finished before this duration the command will exit with a non-zero status code.") + vulnProcessingCmd.SilenceUsage = true return vulnProcessingCmd }