Helm Chart: Move vulnerability processing to be a cronjob by default (#25488)

The existing helm chart is designed to run vulnerability processing on
every container, which requires 4Gi/container.
However, the default for the helm chart is for each container to have a
maximum of 1Gi.

This change switches the default so that vulnerability processing is
disabled in the deployment, and moves vulnerability processing to a
dedicated cronjob that runs 1/day at 1am. (I didn't make that
configurable...)

A few items I think are important to call out:
1. I have commented out alot of environment variables in the cronjob
that existed in the migration and deployment because I don't think
they're required, but I wanted one of you to review and actually say
that they're not necessary.
2. I did not include anything related to osquery or exposing the server
to clients in this since it's not meant to handle clients, just
vulnerability processing.
3. I believe I did everything to make sure cloudSQL will work, but it
should be tested.

# Checklist for submitter
- [ ] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/Committing-Changes.md#changes-files)
for more information.
- [ ] Added/updated automated tests
- [ ] A detailed QA plan exists on the associated ticket (if it isn't
there, work with the product group's QA engineer to add it)
- [x] Manual QA for all new/changed functionality
This commit is contained in:
Phillip Boushy 2025-02-04 08:31:45 -07:00 committed by GitHub
parent aff7eadbdd
commit c1fb4e7b68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 255 additions and 2 deletions

View file

@ -0,0 +1,227 @@
{{- if .Values.vulnProcessing.dedicated }}
apiVersion: batch/v1
kind: CronJob
metadata:
labels:
app: fleet
chart: fleet
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: fleet-vulnprocessing
namespace: {{ .Release.Namespace }}
spec:
schedule: {{ .Values.vulnProcessing.schedule }}
concurrencyPolicy: Forbid
jobTemplate:
spec:
ttlSecondsAfterFinished: 100
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | trim | nindent 12 }}
{{- end }}
labels:
app: fleet
chart: fleet
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
spec:
restartPolicy: Never
shareProcessNamespace: true
containers:
- name: fleet-vulnprocessing
command: ["/bin/sh", "-c"]
args:
- |
/usr/bin/fleet vuln_processing;
{{- if .Values.gke.cloudSQL.enableProxy }}
sql_proxy_pid=$(pgrep cloud_sql_proxy) && kill -INT $sql_proxy_pid;
{{- end }}
image: "{{ .Values.imageRepository }}:{{ .Values.imageTag }}"
resources:
limits:
cpu: {{ .Values.vulnProcessing.resources.limits.cpu }}
memory: {{ .Values.vulnProcessing.resources.limits.memory }}
requests:
cpu: {{ .Values.vulnProcessing.resources.requests.cpu }}
memory: {{ .Values.vulnProcessing.resources.requests.memory }}
env:
## BEGIN FLEET SECTION
- name: FLEET_VULNERABILITIES_DATABASES_PATH
value: /tmp/vuln # /tmp might not work on all cloud providers by default
# - name: FLEET_SERVER_ADDRESS
# value: "0.0.0.0:{{ .Values.fleet.listenPort }}"
# - name: FLEET_AUTH_BCRYPT_COST
# value: "{{ .Values.fleet.auth.bcryptCost }}"
# - name: FLEET_AUTH_SALT_KEY_SIZE
# value: "{{ .Values.fleet.auth.saltKeySize }}"
# - name: FLEET_APP_TOKEN_KEY_SIZE
# value: "{{ .Values.fleet.app.tokenKeySize }}"
# - name: FLEET_APP_TOKEN_VALIDITY_PERIOD
# value: "{{ .Values.fleet.app.inviteTokenValidityPeriod }}"
# - name: FLEET_SESSION_KEY_SIZE
# value: "{{ .Values.fleet.session.keySize }}"
# - name: FLEET_SESSION_DURATION
# value: "{{ .Values.fleet.session.duration }}"
- name: FLEET_LOGGING_DEBUG
value: "{{ .Values.fleet.logging.debug }}"
- name: FLEET_LOGGING_JSON
value: "{{ .Values.fleet.logging.json }}"
- name: FLEET_LOGGING_DISABLE_BANNER
value: "{{ .Values.fleet.logging.disableBanner }}"
# - name: FLEET_SERVER_TLS
# value: "{{ .Values.fleet.tls.enabled }}"
# {{- if .Values.fleet.tls.enabled }}
# - name: FLEET_SERVER_TLS_COMPATIBILITY
# value: "{{ .Values.fleet.tls.compatibility }}"
# - name: FLEET_SERVER_CERT
# value: "/secrets/tls/{{ .Values.fleet.tls.certSecretKey }}"
# - name: FLEET_SERVER_KEY
# value: "/secrets/tls/{{ .Values.fleet.tls.keySecretKey }}"
# {{- end }}
{{- if .Values.fleet.license.secretName }}
- name: FLEET_LICENSE_KEY
valueFrom:
secretKeyRef:
key: {{ .Values.fleet.license.licenseKey }}
name: {{ .Values.fleet.license.secretName }}
{{- end }}
## END FLEET SECTION
## BEGIN MYSQL SECTION
- name: FLEET_MYSQL_ADDRESS
value: "{{ .Values.database.address }}"
- name: FLEET_MYSQL_DATABASE
value: "{{ .Values.database.database }}"
- name: FLEET_MYSQL_USERNAME
value: "{{ .Values.database.username }}"
- name: FLEET_MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: {{ .Values.database.secretName }}
key: {{ .Values.database.passwordKey }}
- name: FLEET_MYSQL_MAX_OPEN_CONNS
value: "{{ .Values.database.maxOpenConns }}"
- name: FLEET_MYSQL_MAX_IDLE_CONNS
value: "{{ .Values.database.maxIdleConns }}"
- name: FLEET_MYSQL_CONN_MAX_LIFETIME
value: "{{ .Values.database.connMaxLifetime }}"
{{- if .Values.database.tls.enabled }}
{{- if .Values.database.tls.caCertKey }}
- name: FLEET_MYSQL_TLS_CA
value: "/secrets/mysql/{{ .Values.database.tls.caCertKey }}"
{{- end }}
{{- if .Values.database.tls.certKey }}
- name: FLEET_MYSQL_TLS_CERT
value: "/secrets/mysql/{{ .Values.database.tls.certKey }}"
{{- end }}
{{- if .Values.database.tls.keyKey }}
- name: FLEET_MYSQL_TLS_KEY
value: "/secrets/mysql/{{ .Values.database.tls.keyKey }}"
{{- end }}
- name: FLEET_MYSQL_TLS_CONFIG
value: "{{ .Values.database.tls.config }}"
- name: FLEET_MYSQL_TLS_SERVER_NAME
value: "{{ .Values.database.tls.serverName }}"
{{- end }}
## END MYSQL SECTION
## BEGIN REDIS SECTION
- name: FLEET_REDIS_ADDRESS
value: "{{ .Values.cache.address }}"
- name: FLEET_REDIS_DATABASE
value: "{{ .Values.cache.database }}"
{{- if .Values.cache.usePassword }}
- name: FLEET_REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .Values.cache.secretName }}"
key: "{{ .Values.cache.passwordKey }}"
{{- end }}
## END REDIS SECTION
## APPEND ENVIRONMENT VARIABLES FROM VALUES
{{- range $key, $value := .Values.environments }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
## APPEND ENVIRONMENT VARIABLES FROM SECRETS/CMs
{{- range .Values.envsFrom }}
- name: {{ .name }}
valueFrom:
{{- if .valueFrom.configMapKeyRef }}
configMapKeyRef:
name: {{ .valueFrom.configMapKeyRef.name }}
key: {{ .valueFrom.configMapKeyRef.key }}
{{- else if .valueFrom.secretKeyRef }}
secretKeyRef:
name: {{ .valueFrom.secretKeyRef.name }}
key: {{ .valueFrom.secretKeyRef.key }}
{{- end }}
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
{{- if .Values.gke.cloudSQL.enableProxy }}
add:
- SYS_PTRACE
{{- else }}
drop: [ALL]
{{- end }}
privileged: false
readOnlyRootFilesystem: true
runAsGroup: 3333
runAsUser: 3333
runAsNonRoot: true
volumeMounts:
- name: tmp
mountPath: /tmp
{{- if .Values.database.tls.enabled }}
- name: mysql-tls
readOnly: true
mountPath: /secrets/mysql
{{- end }}
{{- if .Values.gke.cloudSQL.enableProxy }}
- name: cloudsql-proxy
image: "{{ .Values.gke.cloudSQL.imageRepository }}:{{ .Values.gke.cloudSQL.imageTag }}"
command:
- "/cloud_sql_proxy"
- "-verbose={{ .Values.gke.cloudSQL.verbose}}"
- "-instances={{ .Values.gke.cloudSQL.instanceName }}=tcp:3306"
resources:
limits:
cpu: 0.5 # 500Mhz
memory: 150Mi
requests:
cpu: 0.1 # 100Mhz
memory: 50Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
privileged: false
readOnlyRootFilesystem: true
runAsGroup: 3333
runAsUser: 3333
runAsNonRoot: true
{{- end }}
serviceAccountName: fleet
volumes:
- name: tmp
emptyDir:
{{- if .Values.database.tls.enabled }}
- name: mysql-tls
secret:
secretName: "{{ .Values.database.secretName }}"
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- end }}

View file

@ -269,6 +269,16 @@ spec:
value: "{{ .Values.osquery.logging.pubsub.resultTopic }}"
{{- end }}
## END OSQUERY SECTION
## BEGIN VULNERABILITY PROCESSING
# Disable vulnerability processing in the main deployment when the
# dedicated cron is setup to reduce total cpu/memory utilization
{{- if .Values.vulnProcessing.dedicated }}
- name: FLEET_VULNERABILITIES_DISABLE_SCHEDULE
value: "true"
{{- end }}
## END Vulnerability Processing
## APPEND ENVIRONMENT VARIABLES FROM VALUES
{{- range $key, $value := .Values.environments }}
- name: {{ $key }}

View file

@ -10,7 +10,7 @@ serviceAccountAnnotations: {} # Additional annotations to add to the Fleet servi
resources:
limits:
cpu: 1 # 1GHz
memory: 1Gi
memory: 4Gi
requests:
cpu: 0.1 # 100Mhz
memory: 50Mi
@ -40,7 +40,8 @@ affinity:
ingress:
enabled: false
className: ""
annotations: {}
annotations:
{}
# kubernetes.io/tls-acme: "true"
# nginx.ingress.kubernetes.io/proxy-body-size: 10m
# kubernetes.io/ingress.class: nginx
@ -103,6 +104,21 @@ fleet:
extraVolumes: []
extraVolumeMounts: []
# Whether to make fleet vulnerability processing run in a dedicated container
# if you set dedicated=false, you need to increase the main resources section
# to 4Gi or the fleet container will be OOMKilled when vulnerability processing
# tries to run.
vulnProcessing:
dedicated: false
schedule: "0 * * * *"
resources:
limits:
cpu: 1 # 1GHz
memory: 4Gi
requests:
cpu: 0.1 # 100Mhz
memory: 50Mi
## Section: osquery
# All of the settings related to osquery's interactions with the Fleet server
osquery: