mirror of
https://github.com/argoproj/argo-cd
synced 2026-04-21 17:07:16 +00:00
fix: make webhook payload handlers recover from panics (#24862)
Signed-off-by: Jakub Ciolek <jakub@ciolek.dev>
This commit is contained in:
parent
25ee9cc36d
commit
bf0661ea81
4 changed files with 123 additions and 2 deletions
|
|
@ -26,10 +26,14 @@ import (
|
|||
"github.com/go-playground/webhooks/v6/github"
|
||||
"github.com/go-playground/webhooks/v6/gitlab"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/argoproj/argo-cd/v3/util/guard"
|
||||
)
|
||||
|
||||
const payloadQueueSize = 50000
|
||||
|
||||
const panicMsgAppSet = "panic while processing applicationset-controller webhook event"
|
||||
|
||||
type WebhookHandler struct {
|
||||
sync.WaitGroup // for testing
|
||||
github *github.Webhook
|
||||
|
|
@ -102,6 +106,7 @@ func NewWebhookHandler(webhookParallelism int, argocdSettingsMgr *argosettings.S
|
|||
}
|
||||
|
||||
func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
|
||||
compLog := log.WithField("component", "applicationset-webhook")
|
||||
for i := 0; i < webhookParallelism; i++ {
|
||||
h.Add(1)
|
||||
go func() {
|
||||
|
|
@ -111,7 +116,7 @@ func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
|
|||
if !ok {
|
||||
return
|
||||
}
|
||||
h.HandleEvent(payload)
|
||||
guard.RecoverAndLog(func() { h.HandleEvent(payload) }, compLog, panicMsgAppSet)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
|
|
|||
20
util/guard/guard.go
Normal file
20
util/guard/guard.go
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
package guard
|
||||
|
||||
import (
|
||||
"runtime/debug"
|
||||
)
|
||||
|
||||
// Minimal logger contract; avoids depending on any specific logging package.
|
||||
type Logger interface{ Errorf(string, ...any) }
|
||||
|
||||
// Run executes fn and recovers a panic, logging a component-specific message.
|
||||
func RecoverAndLog(fn func(), log Logger, msg string) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if log != nil {
|
||||
log.Errorf("%s: %v %s", msg, r, debug.Stack())
|
||||
}
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}
|
||||
92
util/guard/guard_test.go
Normal file
92
util/guard/guard_test.go
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
package guard
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type nop struct{}
|
||||
|
||||
func (nop) Errorf(string, ...any) {}
|
||||
|
||||
// recorder is a thread-safe logger that captures formatted messages.
|
||||
type recorder struct {
|
||||
mu sync.Mutex
|
||||
calls int
|
||||
msgs []string
|
||||
}
|
||||
|
||||
func (r *recorder) Errorf(format string, args ...any) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.calls++
|
||||
r.msgs = append(r.msgs, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func TestRun_Recovers(_ *testing.T) {
|
||||
RecoverAndLog(func() { panic("boom") }, nop{}, "msg") // fails if panic escapes
|
||||
}
|
||||
|
||||
func TestRun_AllowsNextCall(t *testing.T) {
|
||||
ran := false
|
||||
RecoverAndLog(func() { panic("boom") }, nop{}, "msg")
|
||||
RecoverAndLog(func() { ran = true }, nop{}, "msg")
|
||||
if !ran {
|
||||
t.Fatal("expected second callback to run")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_LogsMessageAndStack(t *testing.T) {
|
||||
r := &recorder{}
|
||||
RecoverAndLog(func() { panic("boom") }, r, "msg")
|
||||
if r.calls != 1 {
|
||||
t.Fatalf("expected 1 log call, got %d", r.calls)
|
||||
}
|
||||
got := strings.Join(r.msgs, "\n")
|
||||
if !strings.Contains(got, "msg") {
|
||||
t.Errorf("expected log to contain message %q; got %q", "msg", got)
|
||||
}
|
||||
if !strings.Contains(got, "boom") {
|
||||
t.Errorf("expected log to contain panic value %q; got %q", "boom", got)
|
||||
}
|
||||
// Heuristic check that a stack trace was included.
|
||||
if !strings.Contains(got, "guard.go") && !strings.Contains(got, "runtime/panic.go") && !strings.Contains(got, "goroutine") {
|
||||
t.Errorf("expected log to contain a stack trace; got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_NilLoggerDoesNotPanic(_ *testing.T) {
|
||||
var l Logger // nil
|
||||
RecoverAndLog(func() { panic("boom") }, l, "ignored")
|
||||
}
|
||||
|
||||
func TestRun_NoPanicDoesNotLog(t *testing.T) {
|
||||
r := &recorder{}
|
||||
ran := false
|
||||
RecoverAndLog(func() { ran = true }, r, "msg")
|
||||
if !ran {
|
||||
t.Fatal("expected fn to run")
|
||||
}
|
||||
if r.calls != 0 {
|
||||
t.Fatalf("expected 0 log calls, got %d", r.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_ConcurrentPanicsLogged(t *testing.T) {
|
||||
r := &recorder{}
|
||||
const n = 10
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(n)
|
||||
for i := 0; i < n; i++ {
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
RecoverAndLog(func() { panic(fmt.Sprintf("boom-%d", i)) }, r, "msg")
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
if r.calls != n {
|
||||
t.Fatalf("expected %d log calls, got %d", n, r.calls)
|
||||
}
|
||||
}
|
||||
|
|
@ -37,6 +37,7 @@ import (
|
|||
"github.com/argoproj/argo-cd/v3/util/db"
|
||||
"github.com/argoproj/argo-cd/v3/util/git"
|
||||
"github.com/argoproj/argo-cd/v3/util/glob"
|
||||
"github.com/argoproj/argo-cd/v3/util/guard"
|
||||
"github.com/argoproj/argo-cd/v3/util/settings"
|
||||
)
|
||||
|
||||
|
|
@ -52,6 +53,8 @@ const usernameRegex = `[\w\.][\w\.-]{0,30}[\w\.\$-]?`
|
|||
|
||||
const payloadQueueSize = 50000
|
||||
|
||||
const panicMsgServer = "panic while processing api-server webhook event"
|
||||
|
||||
var _ settingsSource = &settings.SettingsManager{}
|
||||
|
||||
type ArgoCDWebhookHandler struct {
|
||||
|
|
@ -127,6 +130,7 @@ func NewHandler(namespace string, applicationNamespaces []string, webhookParalle
|
|||
}
|
||||
|
||||
func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
|
||||
compLog := log.WithField("component", "api-server-webhook")
|
||||
for i := 0; i < webhookParallelism; i++ {
|
||||
a.Add(1)
|
||||
go func() {
|
||||
|
|
@ -136,7 +140,7 @@ func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
|
|||
if !ok {
|
||||
return
|
||||
}
|
||||
a.HandleEvent(payload)
|
||||
guard.RecoverAndLog(func() { a.HandleEvent(payload) }, compLog, panicMsgServer)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue