From 91e41ec67065666f5ddc3d1bd5db212024ffc44e Mon Sep 17 00:00:00 2001 From: Zachary Winnerman <98712682+zwinnerman-fleetdm@users.noreply.github.com> Date: Fri, 19 May 2023 16:00:51 -0400 Subject: [PATCH] Allow for parallel spinup of sandbox instances (#11779) # Checklist for submitter If some of the following don't apply, delete the relevant line. - [ ] Changes file added for user-visible changes in `changes/` or `orbit/changes/`. See [Changes files](https://fleetdm.com/docs/contributing/committing-changes#changes-files) for more information. - [ ] Documented any API changes (docs/Using-Fleet/REST-API.md or docs/Contributing/API-for-contributors.md) - [ ] Documented any permissions changes - [ ] Input data is properly validated, `SELECT *` is avoided, SQL injection is prevented (using placeholders for values in statements) - [ ] Added support on fleet's osquery simulator `cmd/osquery-perf` for new osquery data ingestion features. - [ ] Added/updated tests - [ ] Manual QA for all new/changed functionality - For Orbit and Fleet Desktop changes: - [ ] Manual QA must be performed in the three main OSs, macOS, Windows and Linux. - [ ] Auto-update manual QA, from released version of component to new version (see [tools/tuf/test](../tools/tuf/test/README.md)). Closes #7118 --- .../sandbox/PreProvisioner/lambda/main.go | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/infrastructure/sandbox/PreProvisioner/lambda/main.go b/infrastructure/sandbox/PreProvisioner/lambda/main.go index 5304cd62fa..29c53cf3f7 100644 --- a/infrastructure/sandbox/PreProvisioner/lambda/main.go +++ b/infrastructure/sandbox/PreProvisioner/lambda/main.go @@ -280,10 +280,12 @@ func handler(ctx context.Context, name NullEvent) error { if unclaimedCount >= options.QueuedInstances { return nil } - numToReady := min(options.MaxInstances-totalCount, options.QueuedInstances-unclaimedCount) + has_init := false // deploy terraform to initialize everything - for i := int64(0); i < numToReady; i++ { - if i == 0 { + // If there's an error during spinup, the program exits, so it either makes progress or fails completely, never running forever + for min(options.MaxInstances-totalCount, options.QueuedInstances-unclaimedCount) > 0 { + if !has_init { + has_init = true if err := initTerraform(); err != nil { return err } @@ -297,12 +299,26 @@ func handler(ctx context.Context, name NullEvent) error { return err } instanceID := fmt.Sprintf("t%s", uuid.New().String()[:8]) + // This should fail if the instance id we pick already exists since it will collide with the primary key in dynamodb + // This also actually puts the claim in place if err := runTerraform(instanceID, redisDatabase, enrollSecret); err != nil { return err } if err = buildPackages(instanceID, enrollSecret); err != nil { return err } + + // Refresh the count variables + totalCount, unclaimedCount, err = getInstancesCount() + if err != nil { + return err + } + if totalCount >= options.MaxInstances { + return nil + } + if unclaimedCount >= options.QueuedInstances { + return nil + } } return nil }