From 02de363d9cc3cbdf4de0b56ff21eb6d4c7cc3ca1 Mon Sep 17 00:00:00 2001 From: pbhatnagar-oss Date: Tue, 22 Jul 2025 10:09:07 -0700 Subject: [PATCH] fix(metrics): grpc prometheus stats missing (#23877) (#23838) Signed-off-by: pbhatnagar-oss --- docs/operator-manual/metrics.md | 5 ++++- reposerver/metrics/metrics.go | 2 ++ reposerver/server.go | 4 +--- server/metrics/metrics.go | 2 ++ server/server.go | 8 +++----- test/e2e/{kubectl_metrics_test.go => metrics_test.go} | 2 ++ 6 files changed, 14 insertions(+), 9 deletions(-) rename test/e2e/{kubectl_metrics_test.go => metrics_test.go} (94%) diff --git a/docs/operator-manual/metrics.md b/docs/operator-manual/metrics.md index 8a28b3b00d..95a87d9b23 100644 --- a/docs/operator-manual/metrics.md +++ b/docs/operator-manual/metrics.md @@ -209,6 +209,7 @@ argocd_cluster_labels{label_environment="production",label_team_name="team3",nam Metrics about API Server API request and response activity (request totals, response codes, etc...). Scraped at the `argocd-server-metrics:8083/metrics` endpoint. +For GRPC metrics to show up environment variable ARGOCD_ENABLE_GRPC_TIME_HISTOGRAM must be set to true. | Metric | Type | Description |---------------------------------------------------|:---------:|---------------------------------------------------------------------------------------------| @@ -249,9 +250,11 @@ Scraped at the `argocd-server-metrics:8083/metrics` endpoint. ## Repo Server Metrics -Metrics about the Repo Server. +Metrics about the Repo Server. The gRPC metrics are not exposed by default. Metrics can be enabled using +`ARGOCD_ENABLE_GRPC_TIME_HISTOGRAM=true` environment variable. Scraped at the `argocd-repo-server:8084/metrics` endpoint. + | Metric | Type | Description | | --------------------------------------- | :-------: | ------------------------------------------------------------------------- | | `argocd_git_request_duration_seconds` | histogram | Git requests duration seconds. | diff --git a/reposerver/metrics/metrics.go b/reposerver/metrics/metrics.go index 4fd9cde3ba..52c8ac592a 100644 --- a/reposerver/metrics/metrics.go +++ b/reposerver/metrics/metrics.go @@ -19,6 +19,7 @@ type MetricsServer struct { repoPendingRequestsGauge *prometheus.GaugeVec redisRequestCounter *prometheus.CounterVec redisRequestHistogram *prometheus.HistogramVec + PrometheusRegistry *prometheus.Registry } type GitRequestType string @@ -108,6 +109,7 @@ func NewMetricsServer() *MetricsServer { repoPendingRequestsGauge: repoPendingRequestsGauge, redisRequestCounter: redisRequestCounter, redisRequestHistogram: redisRequestHistogram, + PrometheusRegistry: registry, } } diff --git a/reposerver/server.go b/reposerver/server.go index 0495f518a1..1c0ff668a2 100644 --- a/reposerver/server.go +++ b/reposerver/server.go @@ -9,7 +9,6 @@ import ( grpc_prometheus "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" - "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" @@ -63,8 +62,7 @@ func NewServer(metricsServer *metrics.MetricsServer, cache *reposervercache.Cach serverMetricsOptions = append(serverMetricsOptions, grpc_prometheus.WithServerHandlingTimeHistogram()) } serverMetrics := grpc_prometheus.NewServerMetrics(serverMetricsOptions...) - reg := prometheus.NewRegistry() - reg.MustRegister(serverMetrics) + metricsServer.PrometheusRegistry.MustRegister(serverMetrics) serverLog := log.NewEntry(log.StandardLogger()) streamInterceptors := []grpc.StreamServerInterceptor{ diff --git a/server/metrics/metrics.go b/server/metrics/metrics.go index dbae337678..95a4213c0f 100644 --- a/server/metrics/metrics.go +++ b/server/metrics/metrics.go @@ -21,6 +21,7 @@ type MetricsServer struct { extensionRequestCounter *prometheus.CounterVec extensionRequestDuration *prometheus.HistogramVec loginRequestCounter *prometheus.CounterVec + PrometheusRegistry *prometheus.Registry } var ( @@ -102,6 +103,7 @@ func NewMetricsServer(host string, port int) *MetricsServer { extensionRequestCounter: extensionRequestCounter, extensionRequestDuration: extensionRequestDuration, loginRequestCounter: loginRequestCounter, + PrometheusRegistry: registry, } } diff --git a/server/server.go b/server/server.go index 63335854e4..3fb51deabc 100644 --- a/server/server.go +++ b/server/server.go @@ -561,7 +561,6 @@ func (server *ArgoCDServer) Run(ctx context.Context, listeners *Listeners) { server.Shutdown() } }() - metricsServ := metrics.NewMetricsServer(server.MetricsHost, server.MetricsPort) if server.RedisClient != nil { cacheutil.CollectMetrics(server.RedisClient, metricsServ, server.userStateStorage.GetLockObject()) @@ -576,7 +575,7 @@ func (server *ArgoCDServer) Run(ctx context.Context, listeners *Listeners) { server.sessionMgr.CollectMetrics(metricsServ) } server.serviceSet = svcSet - grpcS, appResourceTreeFn := server.newGRPCServer() + grpcS, appResourceTreeFn := server.newGRPCServer(metricsServ.PrometheusRegistry) grpcWebS := grpcweb.WrapServer(grpcS) var httpS *http.Server var httpsS *http.Server @@ -899,14 +898,13 @@ func (server *ArgoCDServer) useTLS() bool { return true } -func (server *ArgoCDServer) newGRPCServer() (*grpc.Server, application.AppResourceTreeFn) { +func (server *ArgoCDServer) newGRPCServer(prometheusRegistry *prometheus.Registry) (*grpc.Server, application.AppResourceTreeFn) { var serverMetricsOptions []grpc_prometheus.ServerMetricsOption if enableGRPCTimeHistogram { serverMetricsOptions = append(serverMetricsOptions, grpc_prometheus.WithServerHandlingTimeHistogram()) } serverMetrics := grpc_prometheus.NewServerMetrics(serverMetricsOptions...) - reg := prometheus.NewRegistry() - reg.MustRegister(serverMetrics) + prometheusRegistry.MustRegister(serverMetrics) sOpts := []grpc.ServerOption{ // Set the both send and receive the bytes limit to be 100MB diff --git a/test/e2e/kubectl_metrics_test.go b/test/e2e/metrics_test.go similarity index 94% rename from test/e2e/kubectl_metrics_test.go rename to test/e2e/metrics_test.go index a53605bdf9..cd5c4672c2 100644 --- a/test/e2e/kubectl_metrics_test.go +++ b/test/e2e/metrics_test.go @@ -103,4 +103,6 @@ func TestKubectlMetrics(t *testing.T) { assert.Contains(t, string(body), "argocd_kubectl_response_size_bytes", "metrics should have contained argocd_kubectl_response_size_bytes") assert.Contains(t, string(body), "argocd_kubectl_rate_limiter_duration_seconds", "metrics should have contained argocd_kubectl_rate_limiter_duration_seconds") assert.Contains(t, string(body), "argocd_kubectl_requests_total", "metrics should have contained argocd_kubectl_requests_total") + assert.Contains(t, string(body), "grpc_server_handled_total", "metrics should have contained grpc_server_handled_total for all the reflected methods") + assert.Contains(t, string(body), "grpc_server_msg_received_total", "metrics should have contained grpc_server_msg_received_total for all the reflected methods") }