argo-cd/cmd/argocd-application-controller/commands/argocd_application_controller.go

285 lines
17 KiB
Go
Raw Normal View History

package commands
import (
"context"
"fmt"
"math"
"os"
"os/signal"
"syscall"
"time"
"github.com/argoproj/pkg/stats"
"github.com/redis/go-redis/v9"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
cmdutil "github.com/argoproj/argo-cd/v2/cmd/util"
"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/controller"
"github.com/argoproj/argo-cd/v2/controller/sharding"
"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned"
"github.com/argoproj/argo-cd/v2/pkg/ratelimiter"
"github.com/argoproj/argo-cd/v2/reposerver/apiclient"
"github.com/argoproj/argo-cd/v2/util/argo"
"github.com/argoproj/argo-cd/v2/util/argo/normalizers"
cacheutil "github.com/argoproj/argo-cd/v2/util/cache"
appstatecache "github.com/argoproj/argo-cd/v2/util/cache/appstate"
"github.com/argoproj/argo-cd/v2/util/cli"
"github.com/argoproj/argo-cd/v2/util/env"
"github.com/argoproj/argo-cd/v2/util/errors"
kubeutil "github.com/argoproj/argo-cd/v2/util/kube"
"github.com/argoproj/argo-cd/v2/util/settings"
"github.com/argoproj/argo-cd/v2/util/tls"
"github.com/argoproj/argo-cd/v2/util/trace"
)
const (
// CLIName is the name of the CLI
feat: dynamic rebalancing of clusters across shards (#15036) * Migrate Application Controller from Statefulset to Deployment Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add sharding deployment logic Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Update sharding logic and add comments Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add heartbeat as an environment variable Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add retry logic, heartbeat timeout environment variable Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * use the logic of pre-specified shard number on application controller pod Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix manifests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix lint and e2e tests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * comment out failing e2e test Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * increase readiness probe interval period Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * "comment out readiness probe to see if e2e tests succeed" Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert commented readiness probe Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert commented test case Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * read environment variable for application controller deployment name Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add nil check on replica count for deployment of application controller Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Address comments Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add Informer, Update documentation, add unit tests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update godoc Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * remove unwanted code and logs Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add more documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert ApplicationController manifest to StatefulSet Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * reverting updated docs Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add documentation for the new dynamic distribution feature Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add an overlay for application controller deployment and update documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix nit Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Marking the feature as alpha Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add feature status link Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert go,mod changes Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update docs to avoid focusing on StatefulSet/Deployment (#26) * update docs to avoid focusing on StatefulSet/Deployment Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> --------- Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> * minor update to the doc Signed-off-by: ishitasequeira <ishiseq29@gmail.com> --------- Signed-off-by: ishitasequeira <ishiseq29@gmail.com> Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> Co-authored-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com>
2023-09-22 19:49:09 +00:00
cliName = common.ApplicationController
// Default time in seconds for application resync period
defaultAppResyncPeriod = 180
// Default time in seconds for application hard resync period
defaultAppHardResyncPeriod = 0
)
func NewCommand() *cobra.Command {
var (
workqueueRateLimit ratelimiter.AppControllerRateLimiterConfig
clientConfig clientcmd.ClientConfig
appResyncPeriod int64
appHardResyncPeriod int64
appResyncJitter int64
repoErrorGracePeriod int64
repoServerAddress string
repoServerTimeoutSeconds int
selfHealTimeoutSeconds int
selfHealBackoffTimeoutSeconds int
selfHealBackoffFactor int
selfHealBackoffCapSeconds int
statusProcessors int
operationProcessors int
glogLevel int
metricsPort int
metricsCacheExpiration time.Duration
metricsAplicationLabels []string
metricsAplicationConditions []string
kubectlParallelismLimit int64
cacheSource func() (*appstatecache.Cache, error)
redisClient *redis.Client
repoServerPlaintext bool
repoServerStrictTLS bool
otlpAddress string
otlpInsecure bool
otlpHeaders map[string]string
otlpAttrs []string
applicationNamespaces []string
persistResourceHealth bool
shardingAlgorithm string
enableDynamicClusterDistribution bool
feat: Implement Server-Side Diff (#13663) * feat: Implement Server-Side Diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * propagate the refreshtype to the diff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Create the serverSideDiff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * chore: add featureflag utility package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * remove featureflag package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add param Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * make ssd configurable with app annotation Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add server-side-diff flags Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * apply the same logic regardless of the refresh type Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix gitops-engine reference Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: add docs related to server-side-diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: update doc Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add config to include mutation webhooks Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * go mod update Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add sdd cache test case Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix ssd cache unit test Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Update clidocs Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update manifests Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Fix procfile Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * additional doc changes Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update gitops-engine version Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> --------- Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com>
2023-12-18 20:37:13 +00:00
serverSideDiff bool
ignoreNormalizerOpts normalizers.IgnoreNormalizerOpts
// argocd k8s event logging flag
enableK8sEvent []string
)
command := cobra.Command{
Use: cliName,
Short: "Run ArgoCD Application Controller",
Long: "ArgoCD application controller is a Kubernetes controller that continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo). This command runs Application Controller in the foreground. It can be configured by following options.",
DisableAutoGenTag: true,
RunE: func(c *cobra.Command, args []string) error {
ctx, cancel := context.WithCancel(c.Context())
defer cancel()
vers := common.GetVersion()
namespace, _, err := clientConfig.Namespace()
errors.CheckError(err)
vers.LogStartupInfo(
"ArgoCD Application Controller",
map[string]any{
"namespace": namespace,
},
)
cli.SetLogFormat(cmdutil.LogFormat)
cli.SetLogLevel(cmdutil.LogLevel)
cli.SetGLogLevel(glogLevel)
config, err := clientConfig.ClientConfig()
errors.CheckError(err)
errors.CheckError(v1alpha1.SetK8SConfigDefaults(config))
feat: dynamic rebalancing of clusters across shards (#15036) * Migrate Application Controller from Statefulset to Deployment Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add sharding deployment logic Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Update sharding logic and add comments Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add heartbeat as an environment variable Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add retry logic, heartbeat timeout environment variable Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * use the logic of pre-specified shard number on application controller pod Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix manifests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix lint and e2e tests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * comment out failing e2e test Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * increase readiness probe interval period Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * "comment out readiness probe to see if e2e tests succeed" Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert commented readiness probe Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert commented test case Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * read environment variable for application controller deployment name Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add nil check on replica count for deployment of application controller Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Address comments Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add Informer, Update documentation, add unit tests Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update godoc Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * remove unwanted code and logs Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add more documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert ApplicationController manifest to StatefulSet Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * reverting updated docs Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add documentation for the new dynamic distribution feature Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add an overlay for application controller deployment and update documentation Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix nit Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Marking the feature as alpha Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * Add feature status link Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * revert go,mod changes Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * update docs to avoid focusing on StatefulSet/Deployment (#26) * update docs to avoid focusing on StatefulSet/Deployment Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> --------- Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> * minor update to the doc Signed-off-by: ishitasequeira <ishiseq29@gmail.com> --------- Signed-off-by: ishitasequeira <ishiseq29@gmail.com> Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> Co-authored-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com>
2023-09-22 19:49:09 +00:00
config.UserAgent = fmt.Sprintf("%s/%s (%s)", common.DefaultApplicationControllerName, vers.Version, vers.Platform)
kubeClient := kubernetes.NewForConfigOrDie(config)
appClient := appclientset.NewForConfigOrDie(config)
hardResyncDuration := time.Duration(appHardResyncPeriod) * time.Second
var resyncDuration time.Duration
if appResyncPeriod == 0 {
// Re-sync should be disabled if period is 0. Set duration to a very long duration
resyncDuration = time.Hour * 24 * 365 * 100
} else {
resyncDuration = time.Duration(appResyncPeriod) * time.Second
}
tlsConfig := apiclient.TLSConfiguration{
DisableTLS: repoServerPlaintext,
StrictValidation: repoServerStrictTLS,
}
// Load CA information to use for validating connections to the
// repository server, if strict TLS validation was requested.
if !repoServerPlaintext && repoServerStrictTLS {
pool, err := tls.LoadX509CertPool(
fmt.Sprintf("%s/controller/tls/tls.crt", env.StringFromEnv(common.EnvAppConfigPath, common.DefaultAppConfigPath)),
fmt.Sprintf("%s/controller/tls/ca.crt", env.StringFromEnv(common.EnvAppConfigPath, common.DefaultAppConfigPath)),
)
if err != nil {
log.Fatalf("%v", err)
}
tlsConfig.Certificates = pool
}
repoClientset := apiclient.NewRepoServerClientset(repoServerAddress, repoServerTimeoutSeconds, tlsConfig)
cache, err := cacheSource()
errors.CheckError(err)
cache.Cache.SetClient(cacheutil.NewTwoLevelClient(cache.Cache.GetClient(), 10*time.Minute))
var appController *controller.ApplicationController
settingsMgr := settings.NewSettingsManager(ctx, kubeClient, namespace, settings.WithRepoOrClusterChangedHandler(func() {
appController.InvalidateProjectsCache()
}))
kubectl := kubeutil.NewKubectl()
fix: infer correct shard in statefulset setup (#17124, #17016) (#17167) * fix: infer correct shard in statefulset setup Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix the case if only a single replica Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: resolving pointer on shard compare Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: add readlock for cluster accessor Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: use defer to protect access of 'shard' Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: revert locking in getclusteraccessor Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: handle nil shard case Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: handle any nil shard value as false Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: handle nil case and fix another missing pointer dereference Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * revert Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: added tests and fixed some behaviour bugs Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * test: add test to validate that Shard value is not overriden Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * fix: added tests and fixe the case when server is changed inside a secret Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> * tests: add test cases for infering the shard logic Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com> --------- Signed-off-by: Lukas Wöhrl <lukas.woehrl@plentymarkets.com>
2024-02-13 16:51:41 +00:00
clusterSharding, err := sharding.GetClusterSharding(kubeClient, settingsMgr, shardingAlgorithm, enableDynamicClusterDistribution)
errors.CheckError(err)
var selfHealBackoff *wait.Backoff
if selfHealBackoffTimeoutSeconds != 0 {
selfHealBackoff = &wait.Backoff{
Duration: time.Duration(selfHealBackoffTimeoutSeconds) * time.Second,
Factor: float64(selfHealBackoffFactor),
Cap: time.Duration(selfHealBackoffCapSeconds) * time.Second,
}
}
appController, err = controller.NewApplicationController(
namespace,
settingsMgr,
kubeClient,
appClient,
repoClientset,
cache,
kubectl,
resyncDuration,
hardResyncDuration,
time.Duration(appResyncJitter)*time.Second,
time.Duration(selfHealTimeoutSeconds)*time.Second,
selfHealBackoff,
time.Duration(repoErrorGracePeriod)*time.Second,
metricsPort,
metricsCacheExpiration,
metricsAplicationLabels,
metricsAplicationConditions,
kubectlParallelismLimit,
persistResourceHealth,
fix: Use the cache for sharding (#15237) * feat(sharding): use a cache Signed-off-by: Alexandre Gaudreault <alexandre.gaudreault@logmein.com> * cluster cmd Signed-off-by: Alexandre Gaudreault <alexandre.gaudreault@logmein.com> * - Assign shard 0 to in-cluster cluster and nil check updates - Caching clusters while sharding: Fixing unit tests - Update generated docs - Debug e2e tests - Default the shardNumber to the number of replicas if it is calculated to a higher value - defered Unlock only when a lock is set - Disabling temporarly other versions of k3s to check if e2e passes - Do not fail if hostname format is not abc-n - Fix unit test and skip some e2e - Skip TestGitSubmoduleHTTPSSupport test - Remove breaking defer c.lock.Unlock() - Reverting testing all k3s version - Default sharding fix Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * fixes related to code review: renaming structure param, moving db initialisation Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Code review Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Set default shard to 0 Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Set different default value for Sts and Deployment mode Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Expose ClusterShardingCache Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Removing use of argoDB.db for DistributionFunction Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Update generated documentation Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Fix comment about NoShardingDistributionFunction and NoShardingAlgorithm Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> --------- Signed-off-by: Alexandre Gaudreault <alexandre.gaudreault@logmein.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Co-authored-by: Alexandre Gaudreault <alexandre.gaudreault@logmein.com>
2024-01-11 06:32:11 +00:00
clusterSharding,
fix: Fixes sharding placement algorithm and allows development of alternative algorithms (#13018) * fix: Extraction of DistributionFunction to allow passing different type of functions to filter clusters by shard - Adding unit tests for sharding - Refresh clusters list on DistributionFunction call Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix: Incorrect conversion of an integer with architecture-dependent bit size from [strconv.Atoi](1) to a lower bit size type uint32 without an upper bound check. Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Added config to switch to round-robin sharding Signed-off-by: Raghavi Shirur <rshirur@redhat.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Documenting sharding more, adding shuffling tests (skipped), re-enable sharding algo env var Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Allow configuration through argocd-cmd-params-cm configMap and key: controller.sharding.algorithm Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * De-duplicate code, remove reflection for default case, shorten distribution methods name, ran codegen on manifests Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> --------- Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: ishitasequeira <ishiseq29@gmail.com> Signed-off-by: Raghavi Shirur <rshirur@redhat.com> Co-authored-by: Raghavi Shirur <raghaviss11@gmail.com>
2023-06-05 13:19:14 +00:00
applicationNamespaces,
&workqueueRateLimit,
feat: Implement Server-Side Diff (#13663) * feat: Implement Server-Side Diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * propagate the refreshtype to the diff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Create the serverSideDiff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * chore: add featureflag utility package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * remove featureflag package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add param Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * make ssd configurable with app annotation Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add server-side-diff flags Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * apply the same logic regardless of the refresh type Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix gitops-engine reference Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: add docs related to server-side-diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: update doc Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add config to include mutation webhooks Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * go mod update Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add sdd cache test case Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix ssd cache unit test Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Update clidocs Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update manifests Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Fix procfile Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * additional doc changes Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update gitops-engine version Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> --------- Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com>
2023-12-18 20:37:13 +00:00
serverSideDiff,
enableDynamicClusterDistribution,
ignoreNormalizerOpts,
enableK8sEvent,
fix: Fixes sharding placement algorithm and allows development of alternative algorithms (#13018) * fix: Extraction of DistributionFunction to allow passing different type of functions to filter clusters by shard - Adding unit tests for sharding - Refresh clusters list on DistributionFunction call Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: ishitasequeira <ishiseq29@gmail.com> * fix: Incorrect conversion of an integer with architecture-dependent bit size from [strconv.Atoi](1) to a lower bit size type uint32 without an upper bound check. Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Added config to switch to round-robin sharding Signed-off-by: Raghavi Shirur <rshirur@redhat.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Documenting sharding more, adding shuffling tests (skipped), re-enable sharding algo env var Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * Allow configuration through argocd-cmd-params-cm configMap and key: controller.sharding.algorithm Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> * De-duplicate code, remove reflection for default case, shorten distribution methods name, ran codegen on manifests Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> --------- Signed-off-by: Akram Ben Aissi <akram.benaissi@gmail.com> Signed-off-by: ishitasequeira <ishiseq29@gmail.com> Signed-off-by: Raghavi Shirur <rshirur@redhat.com> Co-authored-by: Raghavi Shirur <raghaviss11@gmail.com>
2023-06-05 13:19:14 +00:00
)
errors.CheckError(err)
cacheutil.CollectMetrics(redisClient, appController.GetMetricsServer())
stats.RegisterStackDumper()
stats.StartStatsTicker(10 * time.Minute)
stats.RegisterHeapDumper("memprofile")
if otlpAddress != "" {
closeTracer, err := trace.InitTracer(ctx, "argocd-controller", otlpAddress, otlpInsecure, otlpHeaders, otlpAttrs)
if err != nil {
log.Fatalf("failed to initialize tracing: %v", err)
}
defer closeTracer()
}
// Graceful shutdown code
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
s := <-sigCh
log.Printf("got signal %v, attempting graceful shutdown", s)
cancel()
}()
go appController.Run(ctx, statusProcessors, operationProcessors)
<-ctx.Done()
log.Println("clean shutdown")
return nil
},
}
clientConfig = cli.AddKubectlFlagsToCmd(&command)
command.Flags().Int64Var(&appResyncPeriod, "app-resync", int64(env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_TIMEOUT", defaultAppResyncPeriod*time.Second, 0, math.MaxInt64).Seconds()), "Time period in seconds for application resync.")
command.Flags().Int64Var(&appHardResyncPeriod, "app-hard-resync", int64(env.ParseDurationFromEnv("ARGOCD_HARD_RECONCILIATION_TIMEOUT", defaultAppHardResyncPeriod*time.Second, 0, math.MaxInt64).Seconds()), "Time period in seconds for application hard resync.")
command.Flags().Int64Var(&appResyncJitter, "app-resync-jitter", int64(env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_JITTER", 0*time.Second, 0, math.MaxInt64).Seconds()), "Maximum time period in seconds to add as a delay jitter for application resync.")
command.Flags().Int64Var(&repoErrorGracePeriod, "repo-error-grace-period-seconds", int64(env.ParseDurationFromEnv("ARGOCD_REPO_ERROR_GRACE_PERIOD_SECONDS", defaultAppResyncPeriod*time.Second, 0, math.MaxInt64).Seconds()), "Grace period in seconds for ignoring consecutive errors while communicating with repo server.")
command.Flags().StringVar(&repoServerAddress, "repo-server", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER", common.DefaultRepoServerAddr), "Repo server address.")
command.Flags().IntVar(&repoServerTimeoutSeconds, "repo-server-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_TIMEOUT_SECONDS", 60, 0, math.MaxInt64), "Repo server RPC call timeout seconds.")
command.Flags().IntVar(&statusProcessors, "status-processors", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_STATUS_PROCESSORS", 20, 0, math.MaxInt32), "Number of application status processors")
command.Flags().IntVar(&operationProcessors, "operation-processors", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_OPERATION_PROCESSORS", 10, 0, math.MaxInt32), "Number of application operation processors")
command.Flags().StringVar(&cmdutil.LogFormat, "logformat", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_LOGFORMAT", "text"), "Set the logging format. One of: text|json")
command.Flags().StringVar(&cmdutil.LogLevel, "loglevel", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_LOGLEVEL", "info"), "Set the logging level. One of: debug|info|warn|error")
command.Flags().IntVar(&glogLevel, "gloglevel", 0, "Set the glog logging level")
command.Flags().IntVar(&metricsPort, "metrics-port", common.DefaultPortArgoCDMetrics, "Start metrics server on given port")
command.Flags().DurationVar(&metricsCacheExpiration, "metrics-cache-expiration", env.ParseDurationFromEnv("ARGOCD_APPLICATION_CONTROLLER_METRICS_CACHE_EXPIRATION", 0*time.Second, 0, math.MaxInt64), "Prometheus metrics cache expiration (disabled by default. e.g. 24h0m0s)")
command.Flags().IntVar(&selfHealTimeoutSeconds, "self-heal-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_TIMEOUT_SECONDS", 0, 0, math.MaxInt32), "Specifies timeout between application self heal attempts")
command.Flags().IntVar(&selfHealBackoffTimeoutSeconds, "self-heal-backoff-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_TIMEOUT_SECONDS", 2, 0, math.MaxInt32), "Specifies initial timeout of exponential backoff between self heal attempts")
command.Flags().IntVar(&selfHealBackoffFactor, "self-heal-backoff-factor", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_FACTOR", 3, 0, math.MaxInt32), "Specifies factor of exponential timeout between application self heal attempts")
command.Flags().IntVar(&selfHealBackoffCapSeconds, "self-heal-backoff-cap-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_CAP_SECONDS", 300, 0, math.MaxInt32), "Specifies max timeout of exponential backoff between application self heal attempts")
command.Flags().Int64Var(&kubectlParallelismLimit, "kubectl-parallelism-limit", env.ParseInt64FromEnv("ARGOCD_APPLICATION_CONTROLLER_KUBECTL_PARALLELISM_LIMIT", 20, 0, math.MaxInt64), "Number of allowed concurrent kubectl fork/execs. Any value less than 1 means no limit.")
command.Flags().BoolVar(&repoServerPlaintext, "repo-server-plaintext", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_PLAINTEXT", false), "Disable TLS on connections to repo server")
command.Flags().BoolVar(&repoServerStrictTLS, "repo-server-strict-tls", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_STRICT_TLS", false), "Whether to use strict validation of the TLS cert presented by the repo server")
command.Flags().StringSliceVar(&metricsAplicationLabels, "metrics-application-labels", []string{}, "List of Application labels that will be added to the argocd_application_labels metric")
command.Flags().StringSliceVar(&metricsAplicationConditions, "metrics-application-conditions", []string{}, "List of Application conditions that will be added to the argocd_application_conditions metric")
command.Flags().StringVar(&otlpAddress, "otlp-address", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_ADDRESS", ""), "OpenTelemetry collector address to send traces to")
command.Flags().BoolVar(&otlpInsecure, "otlp-insecure", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_INSECURE", true), "OpenTelemetry collector insecure mode")
command.Flags().StringToStringVar(&otlpHeaders, "otlp-headers", env.ParseStringToStringFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_HEADERS", map[string]string{}, ","), "List of OpenTelemetry collector extra headers sent with traces, headers are comma-separated key-value pairs(e.g. key1=value1,key2=value2)")
command.Flags().StringSliceVar(&otlpAttrs, "otlp-attrs", env.StringsFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_ATTRS", []string{}, ","), "List of OpenTelemetry collector extra attrs when send traces, each attribute is separated by a colon(e.g. key:value)")
command.Flags().StringSliceVar(&applicationNamespaces, "application-namespaces", env.StringsFromEnv("ARGOCD_APPLICATION_NAMESPACES", []string{}, ","), "List of additional namespaces that applications are allowed to be reconciled from")
command.Flags().BoolVar(&persistResourceHealth, "persist-resource-health", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_PERSIST_RESOURCE_HEALTH", true), "Enables storing the managed resources health in the Application CRD")
command.Flags().StringVar(&shardingAlgorithm, "sharding-method", env.StringFromEnv(common.EnvControllerShardingAlgorithm, common.DefaultShardingAlgorithm), "Enables choice of sharding method. Supported sharding methods are : [legacy, round-robin, consistent-hashing] ")
// global queue rate limit config
command.Flags().Int64Var(&workqueueRateLimit.BucketSize, "wq-bucket-size", env.ParseInt64FromEnv("WORKQUEUE_BUCKET_SIZE", 500, 1, math.MaxInt64), "Set Workqueue Rate Limiter Bucket Size, default 500")
command.Flags().Float64Var(&workqueueRateLimit.BucketQPS, "wq-bucket-qps", env.ParseFloat64FromEnv("WORKQUEUE_BUCKET_QPS", math.MaxFloat64, 1, math.MaxFloat64), "Set Workqueue Rate Limiter Bucket QPS, default set to MaxFloat64 which disables the bucket limiter")
// individual item rate limit config
// when WORKQUEUE_FAILURE_COOLDOWN is 0 per item rate limiting is disabled(default)
command.Flags().DurationVar(&workqueueRateLimit.FailureCoolDown, "wq-cooldown-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_FAILURE_COOLDOWN_NS", 0, 0, (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Cooldown duration in ns, default 0(per item rate limiter disabled)")
command.Flags().DurationVar(&workqueueRateLimit.BaseDelay, "wq-basedelay-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_BASE_DELAY_NS", time.Millisecond.Nanoseconds(), time.Nanosecond.Nanoseconds(), (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Base Delay duration in nanoseconds, default 1000000 (1ms)")
command.Flags().DurationVar(&workqueueRateLimit.MaxDelay, "wq-maxdelay-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_MAX_DELAY_NS", time.Second.Nanoseconds(), 1*time.Millisecond.Nanoseconds(), (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Max Delay duration in nanoseconds, default 1000000000 (1s)")
command.Flags().Float64Var(&workqueueRateLimit.BackoffFactor, "wq-backoff-factor", env.ParseFloat64FromEnv("WORKQUEUE_BACKOFF_FACTOR", 1.5, 0, math.MaxFloat64), "Set Workqueue Per Item Rate Limiter Backoff Factor, default is 1.5")
command.Flags().BoolVar(&enableDynamicClusterDistribution, "dynamic-cluster-distribution-enabled", env.ParseBoolFromEnv(common.EnvEnableDynamicClusterDistribution, false), "Enables dynamic cluster distribution.")
feat: Implement Server-Side Diff (#13663) * feat: Implement Server-Side Diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * propagate the refreshtype to the diff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Create the serverSideDiff config Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * chore: add featureflag utility package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * remove featureflag package Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add param Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * make ssd configurable with app annotation Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * add server-side-diff flags Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * apply the same logic regardless of the refresh type Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix gitops-engine reference Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: add docs related to server-side-diff Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * docs: update doc Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add config to include mutation webhooks Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Address review comments Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * go mod update Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Add sdd cache test case Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * fix ssd cache unit test Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Update clidocs Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update manifests Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * Fix procfile Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * additional doc changes Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> * update gitops-engine version Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com> --------- Signed-off-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com>
2023-12-18 20:37:13 +00:00
command.Flags().BoolVar(&serverSideDiff, "server-side-diff-enabled", env.ParseBoolFromEnv(common.EnvServerSideDiff, false), "Feature flag to enable ServerSide diff. Default (\"false\")")
command.Flags().DurationVar(&ignoreNormalizerOpts.JQExecutionTimeout, "ignore-normalizer-jq-execution-timeout-seconds", env.ParseDurationFromEnv("ARGOCD_IGNORE_NORMALIZER_JQ_TIMEOUT", 0*time.Second, 0, math.MaxInt64), "Set ignore normalizer JQ execution timeout")
// argocd k8s event logging flag
command.Flags().StringSliceVar(&enableK8sEvent, "enable-k8s-event", env.StringsFromEnv("ARGOCD_ENABLE_K8S_EVENT", argo.DefaultEnableEventList(), ","), "Enable ArgoCD to use k8s event. For disabling all events, set the value as `none`. (e.g --enable-k8s-event=none), For enabling specific events, set the value as `event reason`. (e.g --enable-k8s-event=StatusRefreshed,ResourceCreated)")
cacheSource = appstatecache.AddCacheFlagsToCmd(&command, cacheutil.Options{
OnClientCreated: func(client *redis.Client) {
redisClient = client
},
})
return &command
}