fix: make webhook payload handlers recover from panics (#24862)

Signed-off-by: Jakub Ciolek <jakub@ciolek.dev>
This commit is contained in:
Jakub Ciolek 2025-10-09 23:26:22 +02:00 committed by GitHub
parent 25ee9cc36d
commit bf0661ea81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 123 additions and 2 deletions

View file

@ -26,10 +26,14 @@ import (
"github.com/go-playground/webhooks/v6/github"
"github.com/go-playground/webhooks/v6/gitlab"
log "github.com/sirupsen/logrus"
"github.com/argoproj/argo-cd/v3/util/guard"
)
const payloadQueueSize = 50000
const panicMsgAppSet = "panic while processing applicationset-controller webhook event"
type WebhookHandler struct {
sync.WaitGroup // for testing
github *github.Webhook
@ -102,6 +106,7 @@ func NewWebhookHandler(webhookParallelism int, argocdSettingsMgr *argosettings.S
}
func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
compLog := log.WithField("component", "applicationset-webhook")
for i := 0; i < webhookParallelism; i++ {
h.Add(1)
go func() {
@ -111,7 +116,7 @@ func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
if !ok {
return
}
h.HandleEvent(payload)
guard.RecoverAndLog(func() { h.HandleEvent(payload) }, compLog, panicMsgAppSet)
}
}()
}

20
util/guard/guard.go Normal file
View file

@ -0,0 +1,20 @@
package guard
import (
"runtime/debug"
)
// Minimal logger contract; avoids depending on any specific logging package.
type Logger interface{ Errorf(string, ...any) }
// Run executes fn and recovers a panic, logging a component-specific message.
func RecoverAndLog(fn func(), log Logger, msg string) {
defer func() {
if r := recover(); r != nil {
if log != nil {
log.Errorf("%s: %v %s", msg, r, debug.Stack())
}
}
}()
fn()
}

92
util/guard/guard_test.go Normal file
View file

@ -0,0 +1,92 @@
package guard
import (
"fmt"
"strings"
"sync"
"testing"
)
type nop struct{}
func (nop) Errorf(string, ...any) {}
// recorder is a thread-safe logger that captures formatted messages.
type recorder struct {
mu sync.Mutex
calls int
msgs []string
}
func (r *recorder) Errorf(format string, args ...any) {
r.mu.Lock()
defer r.mu.Unlock()
r.calls++
r.msgs = append(r.msgs, fmt.Sprintf(format, args...))
}
func TestRun_Recovers(_ *testing.T) {
RecoverAndLog(func() { panic("boom") }, nop{}, "msg") // fails if panic escapes
}
func TestRun_AllowsNextCall(t *testing.T) {
ran := false
RecoverAndLog(func() { panic("boom") }, nop{}, "msg")
RecoverAndLog(func() { ran = true }, nop{}, "msg")
if !ran {
t.Fatal("expected second callback to run")
}
}
func TestRun_LogsMessageAndStack(t *testing.T) {
r := &recorder{}
RecoverAndLog(func() { panic("boom") }, r, "msg")
if r.calls != 1 {
t.Fatalf("expected 1 log call, got %d", r.calls)
}
got := strings.Join(r.msgs, "\n")
if !strings.Contains(got, "msg") {
t.Errorf("expected log to contain message %q; got %q", "msg", got)
}
if !strings.Contains(got, "boom") {
t.Errorf("expected log to contain panic value %q; got %q", "boom", got)
}
// Heuristic check that a stack trace was included.
if !strings.Contains(got, "guard.go") && !strings.Contains(got, "runtime/panic.go") && !strings.Contains(got, "goroutine") {
t.Errorf("expected log to contain a stack trace; got %q", got)
}
}
func TestRun_NilLoggerDoesNotPanic(_ *testing.T) {
var l Logger // nil
RecoverAndLog(func() { panic("boom") }, l, "ignored")
}
func TestRun_NoPanicDoesNotLog(t *testing.T) {
r := &recorder{}
ran := false
RecoverAndLog(func() { ran = true }, r, "msg")
if !ran {
t.Fatal("expected fn to run")
}
if r.calls != 0 {
t.Fatalf("expected 0 log calls, got %d", r.calls)
}
}
func TestRun_ConcurrentPanicsLogged(t *testing.T) {
r := &recorder{}
const n = 10
var wg sync.WaitGroup
wg.Add(n)
for i := 0; i < n; i++ {
go func(i int) {
defer wg.Done()
RecoverAndLog(func() { panic(fmt.Sprintf("boom-%d", i)) }, r, "msg")
}(i)
}
wg.Wait()
if r.calls != n {
t.Fatalf("expected %d log calls, got %d", n, r.calls)
}
}

View file

@ -37,6 +37,7 @@ import (
"github.com/argoproj/argo-cd/v3/util/db"
"github.com/argoproj/argo-cd/v3/util/git"
"github.com/argoproj/argo-cd/v3/util/glob"
"github.com/argoproj/argo-cd/v3/util/guard"
"github.com/argoproj/argo-cd/v3/util/settings"
)
@ -52,6 +53,8 @@ const usernameRegex = `[\w\.][\w\.-]{0,30}[\w\.\$-]?`
const payloadQueueSize = 50000
const panicMsgServer = "panic while processing api-server webhook event"
var _ settingsSource = &settings.SettingsManager{}
type ArgoCDWebhookHandler struct {
@ -127,6 +130,7 @@ func NewHandler(namespace string, applicationNamespaces []string, webhookParalle
}
func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
compLog := log.WithField("component", "api-server-webhook")
for i := 0; i < webhookParallelism; i++ {
a.Add(1)
go func() {
@ -136,7 +140,7 @@ func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
if !ok {
return
}
a.HandleEvent(payload)
guard.RecoverAndLog(func() { a.HandleEvent(payload) }, compLog, panicMsgServer)
}
}()
}