fix(deployment): use hostname for topologyKey instead of region, update resources settings and staging configs (#7005)

This commit is contained in:
Dotan Simha 2025-09-16 14:06:42 +03:00 committed by GitHub
parent e3ee46649c
commit 7c063ca1b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 71 additions and 44 deletions

View file

@ -46,7 +46,7 @@ export function deployApp({
'app',
{
image,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
imagePullSecret: docker.secret,
readinessProbe: '/api/health',
livenessProbe: '/api/health',

View file

@ -49,7 +49,7 @@ export function deployCommerce({
{
image,
imagePullSecret: docker.secret,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
readinessProbe: '/_readiness',
livenessProbe: '/_health',
startupProbe: '/_health',

View file

@ -59,7 +59,7 @@ export function deployEmails({
startupProbe: '/_health',
exposesMetrics: true,
image,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
},
[redis.deployment, redis.service],
)

View file

@ -23,6 +23,9 @@ export function prepareEnvironment(input: {
const appDns = `app.${input.rootDns}`;
const apiDns = `api.${input.rootDns}`;
const isProduction = env === 'production';
const isStaging = env === 'staging';
const isDev = env === 'dev';
return {
envVars: {
@ -33,14 +36,50 @@ export function prepareEnvironment(input: {
RELEASE: input.release,
},
envName: env,
isProduction: env === 'production',
isStaging: env === 'staging',
isDev: env === 'dev',
isProduction,
isStaging,
isDev,
encryptionSecret,
release: input.release,
appDns,
apiDns,
rootDns: input.rootDns,
podsConfig: {
general: {
replicas: isProduction ? 3 : isStaging ? 2 : 1,
},
supertokens: {
replicas: isProduction ? 3 : 1,
},
envoy: {
replicas: isProduction ? 3 : 1,
cpuLimit: isProduction ? '800m' : '150m',
memoryLimit: isProduction ? '1Gi' : '200Mi',
},
schemaService: {
memoryLimit: isProduction ? '2Gi' : '1Gi',
},
usageService: {
replicas: isProduction ? 3 : isStaging ? 2 : 1,
cpuLimit: isProduction ? '900m' : '300m',
maxReplicas: isProduction ? 6 : isStaging ? 3 : 1,
cpuAverageToScale: 60,
},
usageIngestorService: {
replicas: isProduction ? 6 : isStaging ? 2 : 1,
cpuLimit: isProduction ? '900m' : '300m',
maxReplicas: isProduction ? /* numberOfPartitions */ 16 : 2,
cpuAverageToScale: 60,
},
redis: {
memoryLimit: isProduction ? '4Gi' : '100Mi',
cpuLimit: isProduction ? '1000m' : '50m',
},
internalObservability: {
cpuLimit: isProduction ? '512m' : '150m',
memoryLimit: isProduction ? '1000Mi' : '300Mi',
},
},
};
}

View file

@ -107,7 +107,7 @@ export function deployGraphQL({
{
imagePullSecret: docker.secret,
image,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
pdb: true,
readinessProbe: '/_readiness',
livenessProbe: '/_health',

View file

@ -32,7 +32,7 @@ export function deploySchemaPolicy({
livenessProbe: '/_health',
startupProbe: '/_health',
exposesMetrics: true,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
pdb: true,
})
.withConditionalSecret(sentry.enabled, 'SENTRY_DSN', sentry.secret, 'dsn')

View file

@ -32,9 +32,9 @@ export function deployProxy({
})
.deployProxy({
envoy: {
replicas: environment.isProduction ? 3 : 1,
cpu: environment.isProduction ? '800m' : '150m',
memory: environment.isProduction ? '800Mi' : '192Mi',
replicas: environment.podsConfig.envoy.replicas,
cpu: environment.podsConfig.envoy.cpuLimit,
memory: environment.podsConfig.envoy.memoryLimit,
},
tracing: observability.enabled
? { collectorService: observability.observability!.otlpCollectorService }

View file

@ -60,7 +60,7 @@ export function deployPublicGraphQLAPIGateway(args: {
{
imagePullSecret: args.docker.secret,
image: dockerImage,
replicas: args.environment.isProduction ? 3 : 1,
replicas: args.environment.podsConfig.general.replicas,
availabilityOnEveryNode: true,
env: {
GRAPHQL_SERVICE_ENDPOINT: serviceLocalEndpoint(args.graphql.service).apply(

View file

@ -19,15 +19,10 @@ export function deployRedis(input: { environment: Environment }) {
const redisApi = new RedisStore({
password: redisPassword,
}).deploy({
limits: input.environment.isProduction
? {
memory: '6Gi',
cpu: '1000m',
}
: {
memory: '100Mi',
cpu: '50m',
},
limits: {
memory: input.environment.podsConfig.redis.memoryLimit,
cpu: input.environment.podsConfig.redis.cpuLimit,
},
});
const host = serviceLocalHost(redisApi.service);

View file

@ -51,8 +51,8 @@ export function deploySchema({
livenessProbe: '/_health',
startupProbe: '/_health',
exposesMetrics: true,
replicas: environment.isProduction ? 3 : 1,
memoryLimit: '2Gi',
replicas: environment.podsConfig.general.replicas,
memoryLimit: environment.podsConfig.schemaService.memoryLimit,
pdb: true,
},
[redis.deployment, redis.service],

View file

@ -88,7 +88,7 @@ export function deploySuperTokens(
const deployment = new kx.Deployment(
'supertokens',
{
spec: pb.asDeploymentSpec({ replicas: environment.isProduction ? 3 : 1 }),
spec: pb.asDeploymentSpec({ replicas: environment.podsConfig.supertokens.replicas }),
},
{
dependsOn: resourceOptions.dependencies,

View file

@ -39,7 +39,7 @@ export function deployTokens({
startupProbe: '/_health',
exposesMetrics: true,
availabilityOnEveryNode: true,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
image,
env: {
...environment.envVars,

View file

@ -29,10 +29,6 @@ export function deployUsageIngestor({
sentry: Sentry;
}) {
const clickHouseConfig = new pulumi.Config('clickhouse');
const numberOfPartitions = 16;
const replicas = environment.isProduction ? 6 : 1;
const cpuLimit = environment.isProduction ? '600m' : '300m';
const maxReplicas = environment.isProduction ? numberOfPartitions : 2;
// Require migrationV2DataIngestionStartDate only in production and staging
// Remove it once we are done with migration.
@ -46,7 +42,7 @@ export function deployUsageIngestor({
{
image,
imagePullSecret: docker.secret,
replicas,
replicas: environment.podsConfig.usageIngestorService.replicas,
readinessProbe: '/_readiness',
livenessProbe: '/_health',
availabilityOnEveryNode: true,
@ -67,10 +63,10 @@ export function deployUsageIngestor({
pdb: true,
autoScaling: {
cpu: {
cpuAverageToScale: 60,
limit: cpuLimit,
cpuAverageToScale: environment.podsConfig.usageIngestorService.cpuAverageToScale,
limit: environment.podsConfig.usageIngestorService.cpuLimit,
},
maxReplicas,
maxReplicas: environment.podsConfig.usageIngestorService.maxReplicas,
},
},
[clickhouse.deployment, clickhouse.service, dbMigrations].filter(Boolean),

View file

@ -38,9 +38,6 @@ export function deployUsage({
docker: Docker;
sentry: Sentry;
}) {
const replicas = environment.isProduction ? 3 : 1;
const cpuLimit = environment.isProduction ? '600m' : '300m';
const maxReplicas = environment.isProduction ? 6 : 2;
const kafkaBufferDynamic =
kafka.config.bufferDynamic === 'true' || kafka.config.bufferDynamic === '1' ? '1' : '0';
@ -50,7 +47,7 @@ export function deployUsage({
{
image,
imagePullSecret: docker.secret,
replicas,
replicas: environment.podsConfig.usageService.replicas,
readinessProbe: {
initialDelaySeconds: 10,
periodSeconds: 5,
@ -85,10 +82,10 @@ export function deployUsage({
pdb: true,
autoScaling: {
cpu: {
cpuAverageToScale: 60,
limit: cpuLimit,
cpuAverageToScale: environment.podsConfig.usageService.cpuAverageToScale,
limit: environment.podsConfig.usageService.cpuLimit,
},
maxReplicas,
maxReplicas: environment.podsConfig.usageService.maxReplicas,
},
},
[

View file

@ -45,7 +45,7 @@ export function deployWebhooks({
livenessProbe: '/_health',
startupProbe: '/_health',
exposesMetrics: true,
replicas: environment.isProduction ? 3 : 1,
replicas: environment.podsConfig.general.replicas,
image,
},
[redis.deployment, redis.service],

View file

@ -112,8 +112,8 @@ export class Observability {
replicaCount: 1,
resources: {
limits: {
cpu: this.environment.isProduction ? '512m' : '150m',
memory: this.environment.isProduction ? '1000Mi' : '300Mi',
cpu: this.environment.podsConfig.internalObservability.cpuLimit,
memory: this.environment.podsConfig.internalObservability.memoryLimit,
},
},
podAnnotations: {

View file

@ -192,7 +192,7 @@ export class ServiceDeployment {
// and ensure that we are not exposed to downtime issues caused by node failures/restarts:
topologySpreadConstraints.push({
maxSkew: 1,
topologyKey: 'topology.kubernetes.io/zone',
topologyKey: 'kubernetes.io/hostname',
whenUnsatisfiable: 'DoNotSchedule',
labelSelector: {
matchLabels: {