Wait for CERT_INSTALL delegation to be available before attempting certificate enrollment (#43065)

<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #43064 

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

## Release Notes

* **New Features**
* Certificate enrollment now verifies system delegation availability
before attempting installation, preventing unnecessary failures.

* **Bug Fixes**
* Enhanced error messages to include specific certificate alias and
delegation status information for better troubleshooting.
* Improved handling of system state exceptions during the enrollment
process.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Victor Lyuboslavsky 2026-04-07 11:16:02 -05:00 committed by GitHub
parent 65f1448d6f
commit 4457459422
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 56 additions and 4 deletions

View file

@ -56,7 +56,7 @@ class CertificateEnrollmentHandler(private val scepClient: ScepClient, private v
serialNumber = result.serialNumber,
)
} else {
EnrollmentResult.Failure("Certificate installation failed")
EnrollmentResult.Failure("Certificate installation failed for alias '${config.name}': installKeyPair returned false")
}
} catch (e: ScepEnrollmentException) {
// SCEP server rejected enrollment (e.g., PENDING status, invalid challenge)
@ -76,6 +76,9 @@ class CertificateEnrollmentHandler(private val scepClient: ScepClient, private v
} catch (e: IllegalArgumentException) {
// Configuration validation failed
EnrollmentResult.Failure("Invalid configuration: ${e.message}", e, isRetryable = false)
} catch (e: IllegalStateException) {
// Delegation or system state issue (e.g. CERT_INSTALL not granted)
EnrollmentResult.Failure("Certificate installation failed: ${e.message}", e, isRetryable = false)
} catch (e: Exception) {
// Unexpected errors
EnrollmentResult.Failure("Unexpected error during enrollment: ${e.message}", e, isRetryable = false)

View file

@ -1,5 +1,6 @@
package com.fleetdm.agent
import android.app.admin.DevicePolicyManager
import android.content.Context
import android.util.Log
import androidx.work.CoroutineWorker
@ -18,12 +19,28 @@ import kotlinx.coroutines.sync.Mutex
class CertificateEnrollmentWorker(context: Context, workerParams: WorkerParameters) : CoroutineWorker(context, workerParams) {
override suspend fun doWork(): Result {
// Gate on CERT_INSTALL delegation before acquiring the mutex. After a fresh MDM enrollment, the delegated
// scope may not be available immediately. Checking here (before any SCEP work) avoids consuming single-use
// SCEP challenges. This is a read-only check that doesn't need mutual exclusion.
val attempt = runAttemptCount + 1
val dpm = applicationContext.getSystemService(DevicePolicyManager::class.java)
val scopes = dpm?.getDelegatedScopes(null, applicationContext.packageName) ?: emptyList()
if (!scopes.contains(DevicePolicyManager.DELEGATION_CERT_INSTALL)) {
if (attempt >= MAX_DELEGATION_ATTEMPTS) {
Log.w(TAG, "CERT_INSTALL delegation unavailable after $attempt attempts, deferring to next scheduled run. Scopes: $scopes")
return Result.success()
}
Log.w(TAG, "CERT_INSTALL delegation not available yet (attempt $attempt/$MAX_DELEGATION_ATTEMPTS), will retry. Scopes: $scopes")
return Result.retry()
}
// Skip if another enrollment is already running (periodic + one-time work are tracked separately)
if (!enrollmentMutex.tryLock()) {
Log.d(TAG, "Skipping enrollment, another run is already in progress")
return Result.success()
}
return try {
Log.d(TAG, "Starting certificate enrollment worker (attempt $attempt)")
doEnrollment()
} finally {
enrollmentMutex.unlock()
@ -32,8 +49,6 @@ class CertificateEnrollmentWorker(context: Context, workerParams: WorkerParamete
private suspend fun doEnrollment(): Result {
return try {
Log.d(TAG, "Starting certificate enrollment worker (attempt ${runAttemptCount + 1})")
// Get orchestrator from Application
val orchestrator = AgentApplication.getCertificateOrchestrator(applicationContext)
@ -150,6 +165,10 @@ class CertificateEnrollmentWorker(context: Context, workerParams: WorkerParamete
private const val TAG = "fleet-CertificateEnrollmentWorker"
private const val MAX_RETRY_ATTEMPTS = 5
// Higher limit for delegation gate since it's a lightweight check and delegation can take minutes to propagate.
// With exponential backoff from 10s, 7 attempts covers ~5 minutes before the periodic 15-minute schedule takes over.
private const val MAX_DELEGATION_ATTEMPTS = 7
// Mutex to prevent concurrent enrollment runs across all worker instances
private val enrollmentMutex = Mutex()
}

View file

@ -858,6 +858,13 @@ class CertificateOrchestrator(
val dpm = context.getSystemService(DevicePolicyManager::class.java)
?: error("DevicePolicyManager not available")
// Defensive check: verify delegation is present before consuming the certificate. The worker-level gate
// should catch this earlier, but this protects against code paths that skip the worker.
val scopes = dpm.getDelegatedScopes(null, context.packageName)
if (!scopes.contains(DevicePolicyManager.DELEGATION_CERT_INSTALL)) {
error("CERT_INSTALL delegation not granted to ${context.packageName}, current scopes: $scopes")
}
// The admin component is null because the caller is a DELEGATED application,
// not the Device Policy Controller itself. The DPM recognizes the delegation
// via the calling package's UID and the granted CERT_INSTALL scope.
@ -872,7 +879,7 @@ class CertificateOrchestrator(
if (success) {
Log.i(TAG, "Certificate successfully installed with alias: $alias")
} else {
FleetLog.e(TAG, "Certificate installation failed. Check MDM policy and delegation status.")
FleetLog.e(TAG, "Certificate installation failed for alias '$alias': installKeyPair returned false")
}
return success

View file

@ -137,6 +137,24 @@ class CertificateEnrollmentHandlerTest {
assertEquals("SHA512withRSA", mockScepClient.capturedConfig?.signatureAlgorithm)
}
@Test
fun `handler handles IllegalStateException from installer as non-retryable failure`() = runTest {
mockInstaller.exceptionToThrow = IllegalStateException(
"CERT_INSTALL delegation not granted to com.fleetdm.agent, current scopes: []",
)
val template = TestCertificateTemplateFactory.create(name = "device-cert")
val result = handler.handleEnrollment(template, TestCertificateTemplateFactory.DEFAULT_SCEP_URL)
assertTrue(mockInstaller.wasInstallCalled)
assertTrue(result is CertificateEnrollmentHandler.EnrollmentResult.Failure)
val failure = result as CertificateEnrollmentHandler.EnrollmentResult.Failure
assertFalse(failure.isRetryable)
assertTrue(failure.reason.contains("CERT_INSTALL delegation not granted"))
assertTrue(failure.reason.startsWith("Certificate installation failed:"))
}
@Test
fun `handler uses default values for optional parameters`() = runTest {
val template = TestCertificateTemplateFactory.create()

View file

@ -10,6 +10,7 @@ import java.security.cert.Certificate
*/
class MockCertificateInstaller : CertificateEnrollmentHandler.CertificateInstaller {
var shouldSucceed = true
var exceptionToThrow: Exception? = null
var wasInstallCalled = false
var capturedAlias: String? = null
var capturedPrivateKey: PrivateKey? = null
@ -20,11 +21,13 @@ class MockCertificateInstaller : CertificateEnrollmentHandler.CertificateInstall
capturedAlias = alias
capturedPrivateKey = privateKey
capturedCertificateChain = certificateChain
exceptionToThrow?.let { throw it }
return shouldSucceed
}
fun reset() {
shouldSucceed = true
exceptionToThrow = null
wasInstallCalled = false
capturedAlias = null
capturedPrivateKey = null

View file

@ -0,0 +1,2 @@
- Wait for CERT_INSTALL delegation to be available before attempting certificate enrollment, preventing permanent failures after fresh MDM enrollment.
- Improved certificate installation failure messages to include delegation status and certificate alias.