Fixed GitOps failing to delete a certificate authority (#41693)

<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #38036

# Checklist for submitter

If some of the following don't apply, delete the relevant line.

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Bug Fixes**
* GitOps now correctly orders operations so certificate authorities can
be removed only after referencing certificate templates are handled,
preventing failed deletions during config updates.
* Improved user-facing error when a CA cannot be deleted because
certificate templates still reference it, with guidance to remove
templates first.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Victor Lyuboslavsky 2026-03-16 15:51:28 -05:00 committed by GitHub
parent b7b5d4190e
commit 902b4af289
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 248 additions and 22 deletions

View file

@ -0,0 +1 @@
* Fixed GitOps failing to delete a certificate authority when certificate templates still reference it in fleet configs.

View file

@ -489,6 +489,15 @@ func gitopsCommand() *cli.Command {
return err
}
// Capture CAs before DoGitOps (which deletes the key from OrgSettings).
// DoGitOps processes CA creates/updates inline (with skipDeletes=true).
// CA deletions are always deferred to a post-op so that team configs can
// clean up certificate templates (which have FK references to CAs) first.
var deferredCAs any
if isGlobalConfig && !flDryRun {
deferredCAs = config.OrgSettings["certificate_authorities"]
}
assumptions, err := fleetClient.DoGitOps(
c.Context,
config,
@ -505,6 +514,21 @@ func gitopsCommand() *cli.Command {
if err != nil {
return err
}
// Schedule CA deletions as a post-op after all team configs have been processed.
if isGlobalConfig && !flDryRun {
allPostOps = append(allPostOps, func() error {
groupedCAs, caErr := fleet.ValidateCertificateAuthoritiesSpec(deferredCAs)
if caErr != nil {
return fmt.Errorf("invalid certificate_authorities: %w", caErr)
}
if caErr = fleetClient.ApplyCertificateAuthoritiesSpec(*groupedCAs, fleet.ApplySpecOptions{}, fleet.BatchApplyCertificateAuthoritiesOpts{}); caErr != nil {
return fmt.Errorf("applying certificate authorities: %w", caErr)
}
return nil
})
}
if config.TeamName != nil {
teamNames = append(teamNames, *config.TeamName)
} else {

View file

@ -456,11 +456,25 @@ func TestGitOpsBasicGlobalPremium(t *testing.T) {
upserts := make([]*fleet.CertificateAuthority, 0, len(ops.Add)+len(ops.Update))
upserts = append(upserts, ops.Add...)
upserts = append(upserts, ops.Update...)
g, err := fleet.GroupCertificateAuthoritiesByType(upserts)
if err != nil {
return err
if len(upserts) > 0 {
g, err := fleet.GroupCertificateAuthoritiesByType(upserts)
if err != nil {
return err
}
// Merge into stored state per CA type, like a real DB upsert.
if g.NDESSCEP != nil {
storedCAs.NDESSCEP = g.NDESSCEP
}
if len(g.DigiCert) > 0 {
storedCAs.DigiCert = g.DigiCert
}
if len(g.CustomScepProxy) > 0 {
storedCAs.CustomScepProxy = g.CustomScepProxy
}
if len(g.Hydrant) > 0 {
storedCAs.Hydrant = g.Hydrant
}
}
storedCAs = *g
return nil
}

View file

@ -148,6 +148,15 @@ func (s *enterpriseIntegrationGitopsTestSuite) TearDownTest() {
t := s.T()
ctx := context.Background()
mysql.ExecAdhocSQL(t, s.DS, func(q sqlx.ExtContext) error {
// Delete certificate templates before CAs and teams to avoid FK constraints.
if _, err := q.ExecContext(ctx, `DELETE FROM certificate_templates`); err != nil {
return err
}
_, err := q.ExecContext(ctx, `DELETE FROM certificate_authorities`)
return err
})
teams, err := s.DS.ListTeams(ctx, fleet.TeamFilter{User: test.UserAdmin}, fleet.ListOptions{})
require.NoError(t, err)
for _, tm := range teams {
@ -751,6 +760,153 @@ reports:
assert.Empty(t, groupedCAs.CustomScepProxy)
}
// TestDeleteCAWithCertificateTemplates tests the GitOps ordering when deleting a certificate
// authority that is referenced by certificate templates on a team.
func (s *enterpriseIntegrationGitopsTestSuite) TestDeleteCAWithCertificateTemplates() {
t := s.T()
user := s.createGitOpsUser(t)
fleetctlConfig := s.createFleetctlConfig(t, user)
scepServer := scep_server.StartTestSCEPServer(t)
t.Setenv("FLEET_URL", s.Server.URL)
// Step 1: Create a CA and a team with a certificate template referencing it via GitOps.
globalFileWithCA := s.writeConfigFile(t, fmt.Sprintf(`
agent_options:
controls:
org_settings:
server_settings:
server_url: $FLEET_URL
org_info:
org_name: Fleet
secrets:
certificate_authorities:
custom_scep_proxy:
- name: TestSCEP
url: %s
challenge: challenge
policies:
reports:
`, scepServer.URL+"/scep"))
teamFileWithCert := s.writeConfigFile(t, `
name: CA Test Team
controls:
android_settings:
certificates:
- name: TestCert
certificate_authority_name: TestSCEP
subject_name: "CN=test,O=Fleet"
settings:
secrets:
- secret: ca_test_team_secret
agent_options:
policies:
reports:
software:
`)
// Apply both files to create the CA, team, and certificate template.
fleetctl.RunAppForTest(t, []string{
"gitops", "--config", fleetctlConfig.Name(),
"-f", globalFileWithCA, "-f", teamFileWithCert,
})
// Verify the CA was created via GitOps.
groupedCAs, err := s.DS.GetGroupedCertificateAuthorities(t.Context(), false)
require.NoError(t, err)
require.Len(t, groupedCAs.CustomScepProxy, 1)
require.Equal(t, "TestSCEP", groupedCAs.CustomScepProxy[0].Name)
// Verify the team was created and the certificate template exists.
teams, err := s.DS.ListTeams(t.Context(), fleet.TeamFilter{User: test.UserAdmin}, fleet.ListOptions{})
require.NoError(t, err)
var teamID uint
for _, tm := range teams {
if tm.Name == "CA Test Team" {
teamID = tm.ID
break
}
}
require.NotZero(t, teamID, "team 'CA Test Team' should exist")
certTemplates, _, err := s.DS.GetCertificateTemplatesByTeamID(t.Context(), teamID, fleet.ListOptions{})
require.NoError(t, err)
require.Len(t, certTemplates, 1)
require.Equal(t, "TestCert", certTemplates[0].Name)
// Step 2: Run gitops removing the CA but WITHOUT the team file.
// This should fail because the team's certificate templates still reference the CA.
globalFileWithoutCA := s.writeConfigFile(t, `
agent_options:
controls:
org_settings:
server_settings:
server_url: $FLEET_URL
org_info:
org_name: Fleet
secrets:
policies:
reports:
`)
_, err = fleetctl.RunAppNoChecks([]string{
"gitops", "--config", fleetctlConfig.Name(),
"-f", globalFileWithoutCA,
})
require.Error(t, err)
require.Contains(t, err.Error(), fleet.DeleteCAReferencedByTemplatesErrMsg)
// Verify CA and certificate template still exist.
groupedCAs, err = s.DS.GetGroupedCertificateAuthorities(t.Context(), false)
require.NoError(t, err)
require.Len(t, groupedCAs.CustomScepProxy, 1)
certTemplates, _, err = s.DS.GetCertificateTemplatesByTeamID(t.Context(), teamID, fleet.ListOptions{})
require.NoError(t, err)
require.Len(t, certTemplates, 1)
// Step 3: Run gitops removing BOTH the CA and the certificate template.
// This should succeed because the postOp ordering ensures certificate templates
// are deleted before the CA.
teamFileWithoutCert := s.writeConfigFile(t, `
name: CA Test Team
controls:
settings:
secrets:
- secret: ca_test_team_secret
agent_options:
policies:
reports:
software:
`)
fleetctl.RunAppForTest(t, []string{
"gitops", "--config", fleetctlConfig.Name(),
"-f", globalFileWithoutCA, "-f", teamFileWithoutCert,
})
// Verify CA and certificate template are deleted.
groupedCAs, err = s.DS.GetGroupedCertificateAuthorities(t.Context(), false)
require.NoError(t, err)
assert.Empty(t, groupedCAs.CustomScepProxy)
certTemplates, _, err = s.DS.GetCertificateTemplatesByTeamID(t.Context(), teamID, fleet.ListOptions{})
require.NoError(t, err)
assert.Empty(t, certTemplates)
}
func (s *enterpriseIntegrationGitopsTestSuite) writeConfigFile(t *testing.T, content string) string {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "*.yml")
require.NoError(t, err)
_, err = f.WriteString(content)
require.NoError(t, err)
require.NoError(t, f.Close())
return f.Name()
}
// TestUnsetConfigurationProfileLabels tests the removal of labels associated with a
// configuration profile via gitops.
func (s *enterpriseIntegrationGitopsTestSuite) TestUnsetConfigurationProfileLabels() {

View file

@ -481,12 +481,12 @@ func (svc *Service) DeleteCertificateAuthority(ctx context.Context, certificateA
return nil
}
func (svc *Service) BatchApplyCertificateAuthorities(ctx context.Context, incoming fleet.GroupedCertificateAuthorities, dryRun bool, viaGitOps bool) error {
func (svc *Service) BatchApplyCertificateAuthorities(ctx context.Context, incoming fleet.GroupedCertificateAuthorities, opts fleet.BatchApplyCertificateAuthoritiesOpts) error {
if err := svc.authz.Authorize(ctx, &fleet.CertificateAuthority{}, fleet.ActionWrite); err != nil {
return err
}
if !viaGitOps {
if !opts.ViaGitOps {
// Note: This check is here primarily for future reference to help make the usage intent
// clear and to differentiate behavior from dual-use endpoints that support patch semantics (e.g., app config)
return fleet.NewInvalidArgumentError("gitops", "certificate_authorities: batch apply is intended only for use with gitops")
@ -502,7 +502,11 @@ func (svc *Service) BatchApplyCertificateAuthorities(ctx context.Context, incomi
return nil
}
if dryRun {
if opts.SkipDeletes {
ops.Delete = nil
}
if opts.DryRun {
svc.logger.DebugContext(ctx, "batch apply certificate authorities: no certificate authority changes to apply")
return nil
}

View file

@ -350,6 +350,11 @@ func batchDeleteCertificateAuthorities(ctx context.Context, tx sqlx.ExtContext,
_, err := tx.ExecContext(ctx, stmt, args...)
if err != nil {
if isMySQLForeignKey(err) {
return &fleet.ConflictError{
Message: "Couldn't delete certificate authority. " + fleet.DeleteCAReferencedByTemplatesErrMsg + ". Please remove the certificate templates first.",
}
}
return ctxerr.Wrap(ctx, err, "deleting certificate authorities")
}
@ -363,10 +368,10 @@ func (ds *Datastore) BatchApplyCertificateAuthorities(ctx context.Context, ops f
upserts = append(upserts, ops.Update...)
return ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error {
if err := batchDeleteCertificateAuthorities(ctx, tx, ops.Delete); err != nil {
if err := batchUpsertCertificateAuthorities(ctx, tx, ds.serverPrivateKey, upserts); err != nil {
return err
}
if err := batchUpsertCertificateAuthorities(ctx, tx, ds.serverPrivateKey, upserts); err != nil {
if err := batchDeleteCertificateAuthorities(ctx, tx, ops.Delete); err != nil {
return err
}
return nil
@ -396,7 +401,7 @@ func (ds *Datastore) DeleteCertificateAuthority(ctx context.Context, certificate
if err != nil {
if isMySQLForeignKey(err) {
return nil, fleet.ConflictError{
Message: "Couldn't delete. This certificate authority is used in a certificate. Please remove the certificate first.",
Message: "Couldn't delete certificate authority. " + fleet.DeleteCAReferencedByTemplatesErrMsg + ". Please remove the certificate templates first.",
}
}
return nil, ctxerr.Wrap(ctx, err, fmt.Sprintf("deleting certificate authority with id %d", certificateAuthorityID))

View file

@ -402,7 +402,7 @@ func testDeleteCertificateAuthority(t *testing.T, ds *Datastore) {
require.Error(t, err)
var conflictErr fleet.ConflictError
require.ErrorAs(t, err, &conflictErr)
require.Contains(t, conflictErr.Error(), "certificate authority is used in a certificate")
require.Contains(t, conflictErr.Error(), fleet.DeleteCAReferencedByTemplatesErrMsg)
}
func testUpdateCertificateAuthorityByID(t *testing.T, ds *Datastore) {

View file

@ -701,6 +701,13 @@ func ValidateCertificateAuthoritiesSpec(incoming interface{}) (*GroupedCertifica
return &groupedCAs, nil
}
// BatchApplyCertificateAuthoritiesOpts controls which operations the batch apply endpoint performs.
type BatchApplyCertificateAuthoritiesOpts struct {
DryRun bool
ViaGitOps bool
SkipDeletes bool // Process creates/updates only; skip deletions.
}
// CertificateAuthoritiesBatchOperations groups the operations for batch processing of certificate authorities.
type CertificateAuthoritiesBatchOperations struct {
Delete []*CertificateAuthority

View file

@ -534,6 +534,11 @@ var (
SCEPRenewalIDWithoutURLChallengeErrMsg = "Variable \"$FLEET_VAR_" + string(FleetVarSCEPRenewalID) + "\" can't be used if variables for SCEP URL and Challenge are not specified."
)
const (
// DeleteCAReferencedByTemplatesErrMsg is the error substring used when a CA cannot be deleted because certificate templates still reference it.
DeleteCAReferencedByTemplatesErrMsg = "Certificate templates still reference it"
)
// ConflictError is used to indicate a conflict, such as a UUID conflict in the DB.
type ConflictError struct {
Message string

View file

@ -1441,7 +1441,7 @@ type Service interface {
UpdateCertificateAuthority(ctx context.Context, id uint, p CertificateAuthorityUpdatePayload) error
RequestCertificate(ctx context.Context, p RequestCertificatePayload) (*string, error)
// BatchApplyCertificateAuthorities applies the given certificate authorities spec
BatchApplyCertificateAuthorities(ctx context.Context, groupedCAs GroupedCertificateAuthorities, dryRun bool, viaGitOps bool) error
BatchApplyCertificateAuthorities(ctx context.Context, groupedCAs GroupedCertificateAuthorities, opts BatchApplyCertificateAuthoritiesOpts) error
// GetGroupedCertificateAuthorities retrieves the grouped certificate authorities
GetGroupedCertificateAuthorities(ctx context.Context, includeSecrets bool) (*GroupedCertificateAuthorities, error)

View file

@ -885,7 +885,7 @@ type UpdateCertificateAuthorityFunc func(ctx context.Context, id uint, p fleet.C
type RequestCertificateFunc func(ctx context.Context, p fleet.RequestCertificatePayload) (*string, error)
type BatchApplyCertificateAuthoritiesFunc func(ctx context.Context, groupedCAs fleet.GroupedCertificateAuthorities, dryRun bool, viaGitOps bool) error
type BatchApplyCertificateAuthoritiesFunc func(ctx context.Context, groupedCAs fleet.GroupedCertificateAuthorities, opts fleet.BatchApplyCertificateAuthoritiesOpts) error
type GetGroupedCertificateAuthoritiesFunc func(ctx context.Context, includeSecrets bool) (*fleet.GroupedCertificateAuthorities, error)
@ -5234,11 +5234,11 @@ func (s *Service) RequestCertificate(ctx context.Context, p fleet.RequestCertifi
return s.RequestCertificateFunc(ctx, p)
}
func (s *Service) BatchApplyCertificateAuthorities(ctx context.Context, groupedCAs fleet.GroupedCertificateAuthorities, dryRun bool, viaGitOps bool) error {
func (s *Service) BatchApplyCertificateAuthorities(ctx context.Context, groupedCAs fleet.GroupedCertificateAuthorities, opts fleet.BatchApplyCertificateAuthoritiesOpts) error {
s.mu.Lock()
s.BatchApplyCertificateAuthoritiesFuncInvoked = true
s.mu.Unlock()
return s.BatchApplyCertificateAuthoritiesFunc(ctx, groupedCAs, dryRun, viaGitOps)
return s.BatchApplyCertificateAuthoritiesFunc(ctx, groupedCAs, opts)
}
func (s *Service) GetGroupedCertificateAuthorities(ctx context.Context, includeSecrets bool) (*fleet.GroupedCertificateAuthorities, error) {

View file

@ -177,6 +177,7 @@ func (svc *Service) RequestCertificate(ctx context.Context, p fleet.RequestCerti
type batchApplyCertificateAuthoritiesRequest struct {
CertificateAuthorities fleet.GroupedCertificateAuthorities `json:"certificate_authorities"`
DryRun bool `json:"dry_run"`
SkipDeletes bool `json:"skip_deletes"`
}
// TODO(hca): do we need to return anything to facilitate logging by the gitops client?
@ -189,8 +190,11 @@ func (r batchApplyCertificateAuthoritiesResponse) Error() error { return r.Err }
func batchApplyCertificateAuthoritiesEndpoint(ctx context.Context, request interface{}, svc fleet.Service) (fleet.Errorer, error) {
req := request.(*batchApplyCertificateAuthoritiesRequest)
// Call the service method to apply the certificate authorities spec
err := svc.BatchApplyCertificateAuthorities(ctx, req.CertificateAuthorities, req.DryRun, true)
err := svc.BatchApplyCertificateAuthorities(ctx, req.CertificateAuthorities, fleet.BatchApplyCertificateAuthoritiesOpts{
DryRun: req.DryRun,
ViaGitOps: true,
SkipDeletes: req.SkipDeletes,
})
if err != nil {
return &batchApplyCertificateAuthoritiesResponse{Err: err}, nil
}
@ -198,7 +202,7 @@ func batchApplyCertificateAuthoritiesEndpoint(ctx context.Context, request inter
return &batchApplyCertificateAuthoritiesResponse{}, nil
}
func (svc *Service) BatchApplyCertificateAuthorities(ctx context.Context, incoming fleet.GroupedCertificateAuthorities, dryRun bool, viaGitOps bool) error {
func (svc *Service) BatchApplyCertificateAuthorities(ctx context.Context, incoming fleet.GroupedCertificateAuthorities, opts fleet.BatchApplyCertificateAuthoritiesOpts) error {
if err := svc.authz.Authorize(ctx, &fleet.CertificateAuthority{}, fleet.ActionWrite); err != nil {
return err
}

View file

@ -578,7 +578,9 @@ func (c *Client) ApplyGroup(
}
if specs.CertificateAuthorities != nil {
if err := c.ApplyCertificateAuthoritiesSpec(*specs.CertificateAuthorities, opts.ApplySpecOptions); err != nil {
// In GitOps, skip deletes here. CA deletions are deferred to a post-op so that team configs
// can clean up certificate templates (which have FK references to CAs) first.
if err := c.ApplyCertificateAuthoritiesSpec(*specs.CertificateAuthorities, opts.ApplySpecOptions, fleet.BatchApplyCertificateAuthoritiesOpts{SkipDeletes: viaGitOps}); err != nil {
// only do this custom message for gitops as we reference the applying filename which only makes sense in gitops
if err.Error() == "missing or invalid license" && viaGitOps && filename != nil {
return nil, nil, nil, nil, fmt.Errorf("Couldn't edit \"%s\" at \"certificate_authorities\": Missing or invalid license. Certificate authorities are available in Fleet Premium only.", *filename)

View file

@ -15,11 +15,15 @@ func (c *Client) GetCertificateAuthoritiesSpec(includeSecrets bool) (*fleet.Grou
}
// ApplyCertificateAuthoritiesSpec applies the certificate authorities.
func (c *Client) ApplyCertificateAuthoritiesSpec(groupedCAs fleet.GroupedCertificateAuthorities, opts fleet.ApplySpecOptions) error {
req := batchApplyCertificateAuthoritiesRequest{CertificateAuthorities: groupedCAs, DryRun: opts.DryRun}
func (c *Client) ApplyCertificateAuthoritiesSpec(groupedCAs fleet.GroupedCertificateAuthorities, specOpts fleet.ApplySpecOptions, opts fleet.BatchApplyCertificateAuthoritiesOpts) error {
req := batchApplyCertificateAuthoritiesRequest{
CertificateAuthorities: groupedCAs,
DryRun: specOpts.DryRun,
SkipDeletes: opts.SkipDeletes,
}
verb, path := "POST", "/api/latest/fleet/spec/certificate_authorities"
var responseBody batchApplyCertificateAuthoritiesResponse
return c.authenticatedRequestWithQuery(req, verb, path, &responseBody, opts.RawQuery())
return c.authenticatedRequestWithQuery(req, verb, path, &responseBody, specOpts.RawQuery())
}
// GetCertificateAuthorities fetches the list of certificate authorities