End-user authentication for Window/Linux setup experience: agent (#34847)

<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #34528 

# Details

This PR implements the agent changes for allowing Fleet admins to
require that users authenticate with an IdP prior to having their
devices set up. I'll comment on changes inline but the high-level is:

1. Orbit calls the enroll endpoint as usual. This is triggered lazily by
any one of a number of subsystems like device token rotation or
requesting Fleet config
2. If the enroll endpoint returns the new `ErrEndUserAuthRequired`
response, then it opens a window to the `/mdm/sso` Fleet page and
retries the enroll endpoint every 30 seconds indefinitely.
3. Any other non-200 response to the enroll request is treated as before
(limited # of retries, with backoff)

# Checklist for submitter

If some of the following don't apply, delete the relevant line.

- [ ] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/guides/committing-
changes.md#changes-files) for more information.
Will add changelog when story is one.

## Testing

- [X] Added/updated automated tests
Added test for new retry logic

- [X] QA'd all new/changed functionality manually

This is kinda hard to test without the associated backend PR:
https://github.com/fleetdm/fleet/pull/34835

## fleetd/orbit/Fleet Desktop

- [X] Verified compatibility with the latest released version of Fleet
(see [Must
rule](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/workflows/fleetd-development-and-release-strategy.md))
This is compatible with all Fleet versions, since older ones won't send
the new error.
- [X] If the change applies to only one platform, confirmed that
`runtime.GOOS` is used as needed to isolate changes
This is compatible with all platforms, although it currently should only
ever run on Windows and Linux since macOS devices will have end-user
auth taken care of before they even download Orbit.
- [ ] Verified that fleetd runs on macOS, Linux and Windows
Testing this now.
- [ ] Verified auto-update works from the released version of component
to the new version (see [tools/tuf/test](../tools/tuf/test/README.md))


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Added SSO (Single Sign-On) enrollment support for end-user
authentication
  * Enhanced error messaging for authentication-required scenarios

* **Bug Fixes**
  * Improved error handling and retry logic for enrollment failures

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Scott Gress 2025-11-03 16:41:57 -06:00 committed by GitHub
parent 59a73b1e47
commit b482e07605
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 195 additions and 19 deletions

View file

@ -55,6 +55,7 @@ import (
"github.com/fleetdm/fleet/v4/pkg/certificate"
"github.com/fleetdm/fleet/v4/pkg/file"
"github.com/fleetdm/fleet/v4/pkg/fleethttpsig"
"github.com/fleetdm/fleet/v4/pkg/open"
retrypkg "github.com/fleetdm/fleet/v4/pkg/retry"
"github.com/fleetdm/fleet/v4/pkg/secure"
"github.com/fleetdm/fleet/v4/server/fleet"
@ -1119,6 +1120,16 @@ func main() {
return fmt.Errorf("error new orbit client: %w", err)
}
// Set the function that will be called to open the SSO window if an enroll
// request returns an "end user authentication required" error.
orbitClient.SetOpenSSOWindowFunc(func() error {
err = open.Browser(fleetURL + "/mdm/sso?initiator=setup_experience&host_uuid=" + orbitHostInfo.HardwareUUID)
if err != nil {
return fmt.Errorf("opening browser: %w", err)
}
return nil
})
// If the server can't be reached, we want to fail quickly on any blocking network calls
// so that desktop can be launched as soon as possible.
serverIsReachable := orbitClient.Ping() == nil

View file

@ -5,10 +5,32 @@ import (
"time"
)
// ErrorOutcome tells retry.Do how to react to a returned error.
// Use with WithErrorFilter to control retry behavior.
type ErrorOutcome int
const (
// ErrorOutcomeNormalRetry indicates that the error is retryable
// and the retry loop should continue as normal.
ErrorOutcomeNormalRetry ErrorOutcome = iota
// ErrorOutcomeResetAttempts indicates that the retry attempts counter
// should be reset to zero.
// Useful for hijacking the retry cycle to retry indefinitely
// until a certain condition is met.
ErrorOutcomeResetAttempts
// ErrorOutcomeIgnore indicates that the error should be ignored
// and the retry loop should exit successfully.
ErrorOutcomeIgnore
// ErrorOutcomeDoNotRetry indicates that the error is not retryable
// and the retry loop should exit with the error.
ErrorOutcomeDoNotRetry
)
type config struct {
initialInterval time.Duration
backoffMultiplier int
maxAttempts int
errorFilter func(error) ErrorOutcome
}
// Option allows to configure the behavior of retry.Do
@ -37,6 +59,14 @@ func WithMaxAttempts(a int) Option {
}
}
// WithErrorFilter sets a function that maps errors to retry outcomes.
// The filter is evaluated before maxattempts/backoff handling.
func WithErrorFilter(f func(error) ErrorOutcome) Option {
return func(c *config) {
c.errorFilter = f
}
}
// Do executes the provided function, if the function returns a
// non-nil error it performs a retry according to the options
// provided.
@ -62,6 +92,19 @@ func Do(fn func() error, opts ...Option) error {
if err == nil {
return nil
}
if cfg.errorFilter != nil {
switch cfg.errorFilter(err) {
case ErrorOutcomeIgnore:
return nil
case ErrorOutcomeResetAttempts:
attempts = 0
backoff = 1
case ErrorOutcomeDoNotRetry:
return err
default:
// continue with normal retry
}
}
if cfg.maxAttempts != 0 && attempts >= cfg.maxAttempts {
return err

View file

@ -68,4 +68,79 @@ func TestRetryDo(t *testing.T) {
require.NoError(t, err)
require.Equal(t, maxAttempts, count)
})
t.Run("with error filter (test ignore)", func(t *testing.T) {
count := 0
err := Do(func() error {
count++
if count == 1 {
return errors.New("normal")
}
if count == 2 {
return errors.New("reset")
}
if count == 3 {
return errors.New("ignore")
}
return nil
},
WithInterval(50*time.Millisecond),
// We should actually run 3 times, but since one
// of the errors causes a reset, we set max attempts to 2
// to ensure that the reset logic is exercised.
WithMaxAttempts(2),
WithErrorFilter(func(err error) ErrorOutcome {
if err.Error() == "normal" {
return ErrorOutcomeNormalRetry
}
if err.Error() == "reset" {
return ErrorOutcomeResetAttempts
}
if err.Error() == "ignore" {
return ErrorOutcomeIgnore
}
return ErrorOutcomeDoNotRetry
}),
)
require.NoError(t, err)
require.Equal(t, 3, count)
})
t.Run("with error filter (test noretry)", func(t *testing.T) {
count := 0
err := Do(func() error {
count++
if count == 1 {
return errors.New("normal")
}
if count == 2 {
return errors.New("reset")
}
if count == 3 {
return errors.New("stop")
}
return nil
},
WithInterval(50*time.Millisecond),
// We should only actually run 3 times, setting this to 10
// tests that the DoNotRetry logic is exercised.
WithMaxAttempts(10),
WithErrorFilter(func(err error) ErrorOutcome {
if err.Error() == "normal" {
return ErrorOutcomeNormalRetry
}
if err.Error() == "reset" {
return ErrorOutcomeResetAttempts
}
if err.Error() == "stop" {
return ErrorOutcomeDoNotRetry
}
return ErrorOutcomeNormalRetry
}),
)
require.ErrorContains(t, err, "stop")
require.Equal(t, 3, count)
})
}

View file

@ -53,6 +53,9 @@ func (bc *baseClient) parseResponse(verb, path string, response *http.Response,
if strings.Contains(errText, "password reset required") {
return ErrPasswordResetRequired
}
if strings.Contains(errText, "END_USER_AUTH_REQUIRED") {
return ErrEndUserAuthRequired
}
return ErrUnauthenticated
case http.StatusPaymentRequired:
return ErrMissingLicense

View file

@ -16,6 +16,9 @@ var (
ErrUnauthenticated = errors.New("unauthenticated, or invalid token")
ErrPasswordResetRequired = errors.New("Password reset required. Please sign into the Fleet UI to update your password, then log in again with: fleetctl login.")
ErrMissingLicense = errors.New("missing or invalid license")
// ErrEndUserAuthRequired is returned when an action (such as enrolling a device)
// requires end user authentication
ErrEndUserAuthRequired = errors.New("end user authentication required")
)
type SetupAlreadyErr interface {

View file

@ -67,6 +67,13 @@ type OrbitClient struct {
// If set then it will be deleted on HTTP 401 errors from Fleet and it will cause ExecuteConfigReceivers
// to terminate to trigger a restart.
hostIdentityCertPath string
// initiatedIdpAuth is a flag indicating whether a window has been opened
// to the sign-on page for the organization's Identity Provider.
initiatedIdpAuth bool
// openSSOWindow is a function that opens a browser window to the SSO URL.
openSSOWindow func() error
}
// time-to-live for config cache
@ -79,6 +86,10 @@ type configCache struct {
err error
}
func (oc *OrbitClient) SetOpenSSOWindowFunc(f func() error) {
oc.openSSOWindow = f
}
func (oc *OrbitClient) request(verb string, path string, params interface{}, resp interface{}) error {
return oc.requestWithExternal(verb, path, params, resp, false)
}
@ -537,25 +548,11 @@ func (oc *OrbitClient) getNodeKeyOrEnroll() (string, error) {
default:
return "", fmt.Errorf("read orbit node key file: %w", err)
}
var (
orbitNodeKey_ string
endpointDoesNotExist bool
)
var orbitNodeKey_ string
if err := retry.Do(
func() error {
var err error
orbitNodeKey_, err = oc.enrollAndWriteNodeKeyFile()
switch {
case err == nil:
return nil
case errors.Is(err, notFoundErr{}):
// Do not retry if the endpoint does not exist.
endpointDoesNotExist = true
return nil
default:
logging.LogErrIfEnvNotSet(constant.SilenceEnrollLogErrorEnvVar, err, "enroll failed, retrying")
return err
}
return err
},
// The below configuration means the following retry intervals (exponential backoff):
// 10s, 20s, 40s, 80s, 160s and then return the failure (max attempts = 6)
@ -563,12 +560,46 @@ func (oc *OrbitClient) getNodeKeyOrEnroll() (string, error) {
retry.WithInterval(orbitEnrollRetryInterval()),
retry.WithMaxAttempts(constant.OrbitEnrollMaxRetries),
retry.WithBackoffMultiplier(constant.OrbitEnrollBackoffMultiplier),
retry.WithErrorFilter(func(err error) (errorOutcome retry.ErrorOutcome) {
log.Info().Err(err).Msg("orbit enroll attempt failed")
switch {
case errors.Is(err, notFoundErr{}):
// Do not retry if the endpoint does not exist.
return retry.ErrorOutcomeDoNotRetry
case errors.Is(err, ErrEndUserAuthRequired):
// If we get an ErrEndUserAuthRequired error, then the user
// needs to authenticate with the identity provider.
//
// Open a browser window to the sign-on page and
// then keep retrying until they authenticate.
log.Debug().Msg("enroll unauthenticated, waiting for end-user to authenticate via SSO")
if !oc.initiatedIdpAuth {
if oc.openSSOWindow == nil {
log.Error().Msg("SSO window open function not set")
return retry.ErrorOutcomeNormalRetry
}
log.Debug().Msg("opening SSO window")
openWindowErr := oc.openSSOWindow()
if openWindowErr != nil {
log.Error().Err(openWindowErr).Msg("opening SSO window")
return retry.ErrorOutcomeNormalRetry
}
oc.initiatedIdpAuth = true
}
// Sleep for 20 seconds, making the total retry interval 30 seconds
time.Sleep(20 * time.Second)
return retry.ErrorOutcomeResetAttempts
default:
logging.LogErrIfEnvNotSet(constant.SilenceEnrollLogErrorEnvVar, err, "enroll failed, retrying")
return retry.ErrorOutcomeNormalRetry
}
}),
); err != nil {
if errors.Is(err, notFoundErr{}) {
return "", errors.New("enroll endpoint does not exist")
}
return "", fmt.Errorf("orbit node key enroll failed, attempts=%d", constant.OrbitEnrollMaxRetries)
}
if endpointDoesNotExist {
return "", errors.New("enroll endpoint does not exist")
}
return orbitNodeKey_, nil
}

View file

@ -19,6 +19,16 @@ $metadata['mdm.test.com'] = array(
'simplesaml.nameidattribute' => 'email',
);
# Use for local testing of devices on the same network.
$metadata['mdm.host.docker.internal'] = array(
'AssertionConsumerService' => [
'https://host.docker.internal:8080/api/v1/fleet/mdm/sso/callback',
],
'NameIDFormat' => 'urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddres',
'simplesaml.nameidattribute' => 'email',
);
# Used for testing when sso_settings.entity_id ("sso.test.com") is different than
# server_settings.server_url (usually "https://localhost:8080").
$metadata['sso.test.com'] = array(