refactor vuln process command (#10088)

propagate error and properly defer store unlock
2026-05-23 08:58:41 +00:00 · 2023-02-23 23:22:36 -05:00 · 2023-02-23 23:22:36 -05:00 · 6f8abe3bc3
commit 6f8abe3bc3
parent 590822115e
1 changed files with 24 additions and 30 deletions
--- a/cmd/fleet/vuln_process.go
+++ b/cmd/fleet/vuln_process.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"github.com/fleetdm/fleet/v4/server"
 	"github.com/fleetdm/fleet/v4/server/contexts/license"
 	"os"
 	"time"
@ -34,18 +33,18 @@ vulnerability processing. By default the Fleet server command internally manages
 'cron' style jobs, but setting 'vulnerabilities.disable_schedule=true' or 'FLEET_VULNERABILITIES_DISABLE_SCHEDULE=true' 
 will disable it on the server allowing the user configure their own 'cron' mechanism. Successful processing will be indicated
 by an exit code of zero.`,
-		Run: func(cmd *cobra.Command, args []string) {
+		RunE: func(cmd *cobra.Command, args []string) (err error) {
 			cfg := configManager.LoadConfig()
 			if dev {
 				applyDevFlags(&cfg)
 			}

+			logger := initLogger(cfg)
+			logger = kitlog.With(logger, fleet.CronVulnerabilities)
+
 			licenseInfo, err := initLicense(cfg, devLicense, devExpiredLicense)
 			if err != nil {
-				initFatal(
-					err,
-					"failed to load license - for help use https://fleetdm.com/contact",
-				)
+				return err
 			}

 			if licenseInfo != nil && licenseInfo.IsPremium() && licenseInfo.IsExpired() {
@ -54,13 +53,13 @@ by an exit code of zero.`,

 			ds, err := mysql.New(cfg.Mysql, clock.C)
 			if err != nil {
-				initFatal(err, "creating db connection")
+				return err
 			}

 			// we need to ensure this command isn't running with an out-of-date database
 			status, err := ds.MigrationStatus(cmd.Context())
 			if err != nil {
-				initFatal(err, "retrieving migration status")
+				return err
 			}

 			var migrationError error
@ -75,59 +74,53 @@ by an exit code of zero.`,
 				migrationError = errors.New("database migrations incompatible with current version")
 			}
 			if migrationError != nil {
-				initFatal(migrationError, "refusing to continue processing vulnerabilities, database out of sync")
+				return fmt.Errorf("refusing to continue processing vulnerabilities err: %w", migrationError)
 			}

-			ctx, cancel := context.WithTimeout(context.Background(), lockDuration)
+			ctx, cancel := context.WithTimeout(cmd.Context(), lockDuration)
+			defer cancel()
 			ctx = license.NewContext(ctx, licenseInfo)
-			instanceID, err := server.GenerateRandomText(64)
-			if err != nil {
-				initFatal(errors.New("error generating random instance identifier"), "")
-			}
 			// using the same lock name as the cron scheduled version of vuln processing, that way if we fail to obtain the lock
 			// it's most likely due to vulnerabilities.disable_schedule=false but still trying to run external vuln processing command
-			lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), instanceID, lockDuration)
+			lock, err := ds.Lock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command", lockDuration)
 			if err != nil {
-				initFatal(err, "failed to obtain vuln processing lock")
+				return fmt.Errorf("failed to obtain vuln processing lock: %w", err)
 			}
 			if !lock {
-				initFatal(errors.New("vulnerabilities processing locked"),
-					"failed to obtain vuln processing lock, something else still has lock ownership")
+				return errors.New("vulnerabilities processing locked")
 			}
+
 			defer func() {
-				err = ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command")
-				if err != nil {
-					initFatal(err, "failed to release vuln processing lock")
+				uerr := ds.Unlock(ctx, string(fleet.CronVulnerabilities), "vuln_processing_command")
+				if uerr != nil {
+					err = fmt.Errorf("failed to release vulnerability processing lock: %w", uerr)
 				}
-				cancel()
 			}()
-
-			logger := initLogger(cfg)
-			logger = kitlog.With(logger, fleet.CronVulnerabilities)
-
 			appConfig, err := ds.AppConfig(ctx)
 			if err != nil {
-				initFatal(err, "error fetching app config during vulnerability processing")
+				return err
 			}
 			vulnConfig := cfg.Vulnerabilities
 			vulnPath := configureVulnPath(vulnConfig, appConfig, logger)
 			// this really shouldn't ever be empty string since it's defaulted, but could be due to some misconfiguration
 			// we'll throw an error here since the entire point of this command is to process vulnerabilities
 			if vulnPath == "" {
-				initFatal(errors.New("vuln path empty, check environment variables or app config yml"), "error during vulnerability processing")
+				return errors.New("vuln path empty, check environment variables or app config yml")
 			}
 			level.Info(logger).Log("msg", "scanning vulnerabilities")
+			start := time.Now()
 			err = scanVulnerabilities(ctx, ds, logger, &vulnConfig, appConfig, vulnPath)
 			if err != nil {
 				// errors during vuln processing should bubble up, so you know the job is failing without having to scour logs, e.g. non-zero exit code
-				initFatal(fmt.Errorf("scanning vulnerabilities: %w", err), "error during vulnerability processing")
+				return fmt.Errorf("scanning vulnerabilities err: %w", err)
 			}

 			err = ds.SyncHostsSoftware(ctx, time.Now())
 			if err != nil {
 				// though vulnerability processing succeeded, we'll still fatally error here to indicate there was a problem
-				initFatal(err, "failed to sync host software")
+				return fmt.Errorf("sync hosts software err: %w", err)
 			}
+			level.Info(logger).Log("msg", "vulnerability processing finished", "took", time.Now().Sub(start))

 			return
 		},
@ -140,6 +133,7 @@ by an exit code of zero.`,
 		"lock_duration",
 		time.Second*60*60,
 		"the duration (https://pkg.go.dev/time#ParseDuration) the lock should be obtained, ideally this duration is less than the interval in which the job runs (defaults to 60m). If vuln processing isn't finished before this duration the command will exit with a non-zero status code.")
+	vulnProcessingCmd.SilenceUsage = true

 	return vulnProcessingCmd
 }