diff --git a/.github/workflows/db-upgrade-test.yml b/.github/workflows/db-upgrade-test.yml new file mode 100644 index 0000000000..b20ecde8b5 --- /dev/null +++ b/.github/workflows/db-upgrade-test.yml @@ -0,0 +1,44 @@ +# This workflow can be used to test DB upgrades between two Fleet versions. +name: DB upgrade test + +on: + workflow_dispatch: # allow manual action + inputs: + from-version: + description: "Docker tag of Fleet starting version, e.g. 'v4.64.2'" + required: true + type: string + to-version: + description: "Docker tag of Fleet version to upgrade to, e.g. 'rc-minor-fleet-v4.65.0'" + required: true + type: string + +defaults: + run: + # fail-fast using bash -eo pipefail. See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference + shell: bash + +permissions: + contents: read + +jobs: + run-db-upgrade-test: + runs-on: ubuntu-latest + steps: + - name: Harden Runner + uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0 + with: + egress-policy: audit + + - name: Checkout Code + uses: actions/checkout@629c2de402a417ea7690ca6ce3f33229e27606a5 # v2 + + - name: Install Go + uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0 + with: + go-version-file: 'go.mod' + + - name: Run upgrade test + run: | + cd test/upgrade + FLEET_VERSION_A=${{ github.event.inputs.from-version }} FLEET_VERSION_B=${{ github.event.inputs.to-version }} go test -v \ No newline at end of file diff --git a/test/upgrade/README.md b/test/upgrade/README.md index f7ba1e1b2c..5afcc20fbd 100644 --- a/test/upgrade/README.md +++ b/test/upgrade/README.md @@ -1,16 +1,15 @@ # Upgrade Tests -The tests located in `test/upgrade` are intended to test fleet upgrades with online migrations as proposed in [#6376](https://github.com/fleetdm/fleet/pull/6376). -To run the tests, you need to specify the from and to versions. For example +This tool can be used to test DB upgrades between two Fleet versions. -``` -$ FLEET_VERSION_A=v4.16.0 FLEET_VERSION_B=v4.18.0 go test ./test/upgrade +To run the tests, you need to specify the "from" and "to" versions, for example: +```sh +FLEET_VERSION_A=v4.16.0 FLEET_VERSION_B=v4.18.0 go test ./test/upgrade ``` Ensure that Docker is installed with Compose V2. To check if you have the correct version, run the following command - -``` -$ docker compose version +```sh +docker compose version Docker Compose version v2.6.0 ``` diff --git a/test/upgrade/docker-compose.yaml b/test/upgrade/docker-compose.yml similarity index 51% rename from test/upgrade/docker-compose.yaml rename to test/upgrade/docker-compose.yml index ac703c2108..82ccc3687c 100644 --- a/test/upgrade/docker-compose.yaml +++ b/test/upgrade/docker-compose.yml @@ -13,21 +13,8 @@ services: redis: image: redis:6 - # reverse proxy and tls termination for fleet-a and fleet-b fleet: - image: nginx - volumes: - # don't mount the config. These will be copied manually so that - # we can reload nginx without recreating containers and getting a new public port each time. - # - ./nginx/fleet-a.conf:/etc/nginx/conf.d/default.conf - - ./fleet.crt:/etc/nginx/fleet.crt - - ./fleet.key:/etc/nginx/fleet.key - ports: - - "443" - - fleet-a: - &default-fleet - image: fleetdm/fleet:${FLEET_VERSION_A:-latest} + image: fleetdm/fleet:${FLEET_VERSION:-latest} environment: FLEET_MYSQL_ADDRESS: mysql:3306 FLEET_MYSQL_DATABASE: fleet @@ -35,33 +22,28 @@ services: FLEET_MYSQL_PASSWORD: fleet FLEET_REDIS_ADDRESS: redis:6379 FLEET_SERVER_ADDRESS: 0.0.0.0:8080 - FLEET_SERVER_TLS: 'false' - FLEET_LOGGING_JSON: 'true' - FLEET_LICENSE_KEY: ${FLEET_LICENSE_KEY} + FLEET_SERVER_CERT: /fleet.crt + FLEET_SERVER_KEY: /fleet.key + FLEET_LOGGING_JSON: "true" FLEET_OSQUERY_LABEL_UPDATE_INTERVAL: 1m FLEET_VULNERABILITIES_CURRENT_INSTANCE_CHECKS: "yes" FLEET_VULNERABILITIES_DATABASES_PATH: /fleet/vulndb FLEET_VULNERABILITIES_PERIODICITY: 5m - FLEET_LOGGING_DEBUG: 'true' - # This can be configured for testing purposes but otherwise uses the - # typical default of provided. - FLEET_OSQUERY_HOST_IDENTIFIER: ${FLEET_OSQUERY_HOST_IDENTIFIER:-provided} + FLEET_LOGGING_DEBUG: "true" + volumes: + - ./fleet.crt:/fleet.crt + - ./fleet.key:/fleet.key ports: - "8080" depends_on: - mysql - redis - # Uses a different version than fleet-a - fleet-b: - <<: *default-fleet - image: fleetdm/fleet:${FLEET_VERSION_B:-latest} - osquery: image: "osquery/osquery:4.7.0-ubuntu20.04" volumes: - ./fleet.crt:/etc/osquery/fleet.crt - ./osquery.flags:/etc/osquery/osquery.flags environment: - ENROLL_SECRET: "${ENROLL_SECRET}" + ENROLL_SECRET: "${ENROLL_SECRET:-foobar}" command: osqueryd --flagfile=/etc/osquery/osquery.flags diff --git a/test/upgrade/fleet_test.go b/test/upgrade/fleet_test.go index b338996b78..41e40ae4f7 100644 --- a/test/upgrade/fleet_test.go +++ b/test/upgrade/fleet_test.go @@ -3,13 +3,14 @@ package upgrade import ( "bytes" "context" + "crypto/tls" "errors" "fmt" + "io" "math/rand" "net/http" "os" "os/exec" - "path/filepath" "strconv" "testing" "time" @@ -18,17 +19,12 @@ import ( "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" + "github.com/fleetdm/fleet/v4/pkg/fleethttp" "github.com/fleetdm/fleet/v4/server/service" _ "github.com/go-sql-driver/mysql" "github.com/jmoiron/sqlx" ) -// Slots correspond to docker-compose fleet services, either fleet-a or fleet-b -const ( - slotA = "a" - slotB = "b" -) - func init() { rand.Seed(time.Now().Unix()) } @@ -45,6 +41,7 @@ type Fleet struct { Token string dockerClient client.ContainerAPIClient + t *testing.T } // NewFleet starts fleet and it's dependencies with the specified version. @@ -60,15 +57,16 @@ func NewFleet(t *testing.T, version string) *Fleet { f := &Fleet{ ProjectName: projectName, - FilePath: "docker-compose.yaml", + FilePath: "docker-compose.yml", Version: version, dockerClient: dockerClient, + t: t, } t.Cleanup(f.cleanup) if err := f.Start(); err != nil { - t.Fatalf("start fleet: %v", err) + t.Fatalf("start fleet version A: %v", err) } return f @@ -76,15 +74,15 @@ func NewFleet(t *testing.T, version string) *Fleet { func (f *Fleet) Start() error { env := map[string]string{ - "FLEET_VERSION_A": f.Version, + "FLEET_VERSION": f.Version, } - _, err := f.execCompose(env, "pull", "--parallel") + _, err := f.execCompose(env, "pull") if err != nil { return err } // start mysql and wait until ready - _, err = f.execCompose(env, "up", "-d", "mysql") + _, err = f.execCompose(env, "up", "--remove-orphans", "-d", "mysql") if err != nil { return err } @@ -92,38 +90,19 @@ func (f *Fleet) Start() error { return err } - // run the migrations using the fleet-a service - _, err = f.execCompose(env, "run", "-T", "fleet-a", "fleet", "prepare", "db", "--no-prompt") + // run the migrations using the fleet starting version + _, err = f.execCompose(env, "run", "-T", "fleet", "fleet", "prepare", "db", "--no-prompt") if err != nil { return err } - // start fleet-a - _, err = f.execCompose(env, "up", "-d", "fleet-a", "fleet") + // start fleet + _, err = f.execCompose(env, "up", "--remove-orphans", "-d", "fleet", "fleet") if err != nil { return err } - // copy the nginx conf and reload nginx without creating a new container - srcPath := filepath.Join("nginx", "fleet-a.conf") - _, err = f.execCompose(env, "cp", srcPath, "fleet:/etc/nginx/conf.d/default.conf") - if err != nil { - return err - } - - // drop to one nginx worker process regardless of CPU count to ensure repointing to the correct - // Fleet container happens quickly - _, err = f.execCompose(env, "exec", "-T", "fleet", "sed", "-i", "s/auto/1/", "/etc/nginx/nginx.conf") - if err != nil { - return err - } - - _, err = f.execCompose(env, "exec", "-T", "fleet", "nginx", "-s", "reload") - if err != nil { - return err - } - - if err := f.waitFleet(slotA); err != nil { + if err := f.waitFleet(); err != nil { return err } @@ -136,7 +115,7 @@ func (f *Fleet) Start() error { // Client returns a fleet client that uses the fleet API. func (f *Fleet) Client() (*service.Client, error) { - port, err := f.getPublicPort("fleet", 443) + port, err := f.getPublicPort("fleet", 8080) if err != nil { return nil, fmt.Errorf("get fleet port: %v", err) } @@ -168,6 +147,8 @@ func (f *Fleet) setupFleet() error { } func (f *Fleet) waitMYSQL() error { + f.t.Log("waiting for MySQL container to respond...") + // get the random mysql host port assigned by docker port, err := f.getPublicPort("mysql", 3306) if err != nil { @@ -175,9 +156,10 @@ func (f *Fleet) waitMYSQL() error { } dsn := fmt.Sprintf("fleet:fleet@tcp(localhost:%d)/fleet", port) + f.t.Logf("dsn: %s", dsn) retryInterval := 5 * time.Second - timeout := 1 * time.Minute + timeout := 5 * time.Minute ticker := time.NewTicker(retryInterval) defer ticker.Stop() @@ -204,7 +186,7 @@ func (f *Fleet) getPublicPort(serviceName string, privatePort uint16) (uint16, e // get the random fleet host port assigned by docker argsName := filters.Arg("name", containerName) - containers, err := f.dockerClient.ContainerList(context.TODO(), container.ListOptions{Filters: filters.NewArgs(argsName), All: true}) + containers, err := f.dockerClient.ContainerList(context.TODO(), container.ListOptions{Filters: filters.NewArgs(argsName)}) if err != nil { return 0, err } @@ -219,8 +201,10 @@ func (f *Fleet) getPublicPort(serviceName string, privatePort uint16) (uint16, e return 0, errors.New("private port not found") } -func (f *Fleet) waitFleet(slot string) error { - containerName := fmt.Sprintf("%s-fleet-%s-1", f.ProjectName, slot) +func (f *Fleet) waitFleet() error { + f.t.Logf("waiting for fleet %s to be healthy...", f.Version) + + containerName := fmt.Sprintf("%s-fleet-1", f.ProjectName) // get the random fleet host port assigned by docker argsName := filters.Arg("name", containerName) @@ -232,15 +216,18 @@ func (f *Fleet) waitFleet(slot string) error { return errors.New("no fleet container found") } port := containers[0].Ports[0].PublicPort - healthURL := fmt.Sprintf("http://localhost:%d/healthz", port) + healthURL := fmt.Sprintf("https://localhost:%d/healthz", port) + f.t.Logf("fleet URL: %s", healthURL) retryStrategy := backoff.NewExponentialBackOff() retryStrategy.MaxInterval = 1 * time.Second + //nolint:gosec // G107: Ok to trust docker here + client := fleethttp.NewClient(fleethttp.WithTLSClientConfig(&tls.Config{InsecureSkipVerify: true})) + if err := backoff.Retry( func() error { - //nolint:gosec // G107: Ok to trust docker here - resp, err := http.Get(healthURL) + resp, err := client.Get(healthURL) if err != nil { return err } @@ -253,6 +240,7 @@ func (f *Fleet) waitFleet(slot string) error { ); err != nil { return fmt.Errorf("check health: %v", err) } + f.t.Log("fleet is healthy") return nil } @@ -281,9 +269,12 @@ func (f *Fleet) execCompose(env map[string]string, args ...string) (string, erro var stdout, stderr bytes.Buffer cmd := exec.Command("docker", args...) + f.t.Log(cmd.String()) cmd.Env = e - cmd.Stdout = &stdout - cmd.Stderr = &stderr + wout := io.MultiWriter(&stdout, os.Stdout) + werr := io.MultiWriter(&stderr, os.Stderr) + cmd.Stdout = wout + cmd.Stderr = werr err := cmd.Run() if err != nil { return "", fmt.Errorf("docker: %v %s", err, stderr.String()) @@ -314,7 +305,7 @@ func (f *Fleet) StartHost() (string, error) { env := map[string]string{ "ENROLL_SECRET": enrollSecret, } - output, err := f.execCompose(env, "run", "-d", "-T", "osquery") + output, err := f.execCompose(env, "run", "--remove-orphans", "-d", "-T", "osquery") if err != nil { return "", err } @@ -333,46 +324,78 @@ func (f *Fleet) StartHost() (string, error) { } // Upgrade upgrades fleet to a specified version. -func (f *Fleet) Upgrade(toVersion string) error { +func (f *Fleet) Upgrade(from, to string) error { + // stop fleet env := map[string]string{ - "FLEET_VERSION_B": toVersion, + "FLEET_VERSION": from, } - // run migrations using fleet-b - serviceName := "fleet-b" - _, err := f.execCompose(env, "run", "-T", serviceName, "fleet", "prepare", "db", "--no-prompt") - if err != nil { + if _, err := f.execCompose(env, "rm", "-s", "-v", "fleet"); err != nil { + return fmt.Errorf("bring fleet down: %v", err) + } + + // run migrations + env = map[string]string{ + "FLEET_VERSION": to, + } + // we need to pull the new version + if _, err := f.execCompose(env, "pull"); err != nil { + return err + } + if _, err := f.execCompose(env, "run", "--remove-orphans", "-T", "fleet", "fleet", "prepare", "db", "--no-prompt"); err != nil { return fmt.Errorf("run migrations: %v", err) } - // start the service - _, err = f.execCompose(env, "up", "-d", serviceName) - if err != nil { - return fmt.Errorf("start fleet: %v", err) + // start the new version + if _, err := f.execCompose(env, "up", "--remove-orphans", "-d", "fleet", "fleet"); err != nil { + return fmt.Errorf("start fleet version B: %v", err) } + f.Version = to + // wait until healthy - if err := f.waitFleet(slotB); err != nil { + if err := f.waitFleet(); err != nil { return fmt.Errorf("wait for fleet to be healthy: %v", err) } - // copy the nginx conf and reload nginx without creating a new container - srcPath := filepath.Join("nginx", "fleet-b.conf") - _, err = f.execCompose(env, "cp", srcPath, "fleet:/etc/nginx/conf.d/default.conf") - if err != nil { - return err - } - - _, err = f.execCompose(env, "exec", "-T", "fleet", "nginx", "-s", "reload") - if err != nil { - return err - } - - // even with only one worker process, graceful reload of nginx workers doesn't happen instantly, - // so we add a wait here to let workers swap so they're pointed at the upgraded Fleet server - time.Sleep(250 * time.Millisecond) - - f.Version = toVersion + f.t.Log("upgraded successfully") return nil } + +func enrollHost(t *testing.T, f *Fleet) (string, error) { + client, err := f.Client() + if err != nil { + return "", fmt.Errorf("creating fleet client: %w", err) + } + + // enroll a host + hostname, err := f.StartHost() + if err != nil { + return "", fmt.Errorf("creating fleet client: %w", err) + } + + // wait until host is enrolled and software is listed + retryStrategy := backoff.NewExponentialBackOff() + retryStrategy.InitialInterval = 5 * time.Second + retryStrategy.MaxInterval = 5 * time.Minute + + if err := backoff.Retry(func() error { + host, err := client.HostByIdentifier(hostname) + if err != nil { + t.Logf("get host by identifier %s: %s", hostname, err) + return err + } + + if len(host.Software) == 0 { + t.Logf("software for %s not reported yet", hostname) + return errors.New("no software reported yet") + } + + return nil + }, retryStrategy); err != nil { + return "", fmt.Errorf("host enroll retry: %w", err) + } + + return hostname, nil +} diff --git a/test/upgrade/nginx/fleet-a.conf b/test/upgrade/nginx/fleet-a.conf deleted file mode 100644 index 682a15c1bf..0000000000 --- a/test/upgrade/nginx/fleet-a.conf +++ /dev/null @@ -1,11 +0,0 @@ -server { - listen 443 ssl; - server_name fleet; - ssl_certificate fleet.crt; - ssl_certificate_key fleet.key; - - location / { - proxy_pass http://fleet-a:8080; - } -} - diff --git a/test/upgrade/nginx/fleet-b.conf b/test/upgrade/nginx/fleet-b.conf deleted file mode 100644 index c54967e505..0000000000 --- a/test/upgrade/nginx/fleet-b.conf +++ /dev/null @@ -1,11 +0,0 @@ -server { - listen 443 ssl; - server_name fleet; - ssl_certificate fleet.crt; - ssl_certificate_key fleet.key; - - location / { - proxy_pass http://fleet-b:8080; - } -} - diff --git a/test/upgrade/osquery.flags b/test/upgrade/osquery.flags index a81ffc1367..9e60e5f9c9 100644 --- a/test/upgrade/osquery.flags +++ b/test/upgrade/osquery.flags @@ -1,7 +1,8 @@ ---verbose=true +--verbose --debug +--tls_dump ---tls_hostname=fleet +--tls_hostname=fleet:8080 --tls_server_certs=/etc/osquery/fleet.crt --enroll_secret_env=ENROLL_SECRET diff --git a/test/upgrade/upgrade_test.go b/test/upgrade/upgrade_test.go index 4b1b9549ea..73ca4e51be 100644 --- a/test/upgrade/upgrade_test.go +++ b/test/upgrade/upgrade_test.go @@ -3,37 +3,10 @@ package upgrade import ( "os" "testing" - "time" "github.com/stretchr/testify/require" ) -func enrollHost(t *testing.T, f *Fleet) string { - client, err := f.Client() - require.NoError(t, err) - - // enroll a host - hostname, err := f.StartHost() - require.NoError(t, err) - - // wait until host is enrolled and software is listed - require.Eventually(t, func() bool { - host, err := client.HostByIdentifier(hostname) - if err != nil { - t.Logf("get host: %v", err) - return false - } - - if len(host.Software) == 0 { - return false - } - - return true - }, 5*time.Minute, 5*time.Second) - - return hostname -} - func TestUpgradeAToB(t *testing.T) { versionA := os.Getenv("FLEET_VERSION_A") if versionA == "" { @@ -47,11 +20,15 @@ func TestUpgradeAToB(t *testing.T) { f := NewFleet(t, versionA) - enrollHost(t, f) + hostname, err := enrollHost(t, f) + require.NoError(t, err) + t.Logf("first host %s enrolled successfully", hostname) - err := f.Upgrade(versionB) + err = f.Upgrade(versionA, versionB) require.NoError(t, err) // enroll another host with the new version - enrollHost(t, f) + hostname, err = enrollHost(t, f) + require.NoError(t, err) + t.Logf("second host %s enrolled successfully", hostname) }