mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
* Fix k8s operator exit handler pod loop and TTL cleanup, add tolerations support (#26772) Fix two bugs in the OMJob operator: - Exit handler pods were recreated indefinitely because findExitHandlerPod() lacked the name-based fallback that findMainPod() already had, causing label propagation delays to trigger repeated pod creation events - Terminal phase handler never rescheduled for TTL-based cleanup, so pods were never cleaned up after ttlSecondsAfterFinished expired Add tolerations support for ingestion pod scheduling across the full stack: - Operator: OMJobPodSpec field, PodManager.buildPod(), CRD schema - Server: OMJob model, K8sPipelineClientConfig parsing, K8sPipelineClient builder, K8sJobUtils serialization Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add K8S_TOLERATIONS env var mapping in openmetadata.yaml Adds the tolerations config binding so the server picks up the K8S_TOLERATIONS env var set by the Helm chart secret. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add tolerations to k8s test values for local validation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix cleanup * Address PR review: remove redundant pod lookup and guard null items - Remove redundant server-created pod selector fallback in findMainPod() since buildPodSelector() now matches all pods by omjob-name alone - Add null guard for getItems() in deletePods() to prevent NPE - Update local test values for namespace and image config Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
112 lines
No EOL
3.1 KiB
YAML
112 lines
No EOL
3.1 KiB
YAML
# Test values for K8s native pipeline client validation
|
|
# Only contains essential overrides for testing
|
|
|
|
openmetadata:
|
|
config:
|
|
# Enable debug logging for testing
|
|
logLevel: DEBUG
|
|
|
|
# Database configuration (using local PostgreSQL via host machine)
|
|
database:
|
|
host: host.docker.internal
|
|
port: 5432
|
|
driverClass: org.postgresql.Driver
|
|
dbScheme: postgresql
|
|
auth:
|
|
password:
|
|
secretRef: postgres-secrets
|
|
secretKey: openmetadata-postgres-password
|
|
|
|
# Search configuration (using local OpenSearch via host machine)
|
|
elasticsearch:
|
|
host: host.docker.internal
|
|
port: 9200
|
|
searchType: opensearch
|
|
scheme: http
|
|
|
|
# K8s Pipeline Client Configuration - Updated structure with common configs
|
|
pipelineServiceClientConfig:
|
|
type: "k8s" # Use Kubernetes native instead of Airflow
|
|
# Common configuration for all pipeline service clients
|
|
# Override metadataApiEndpoint for cross-namespace access (default uses service name only)
|
|
metadataApiEndpoint: "http://openmetadata.openmetadata-ingestion.svc.cluster.local:8585/api"
|
|
k8s:
|
|
# K8s-specific configuration
|
|
# Use release namespace (empty = defaults to release namespace)
|
|
namespace: ""
|
|
serviceAccountName: "openmetadata-ingestion"
|
|
ttlSecondsAfterFinished: 10
|
|
|
|
# Local ingestion image
|
|
ingestionImage: "openmetadata/collate-base:1.12.0-20260402"
|
|
imagePullPolicy: "IfNotPresent"
|
|
|
|
# Reduced resources for local testing
|
|
resources:
|
|
limits:
|
|
cpu: "1"
|
|
memory: "2Gi"
|
|
requests:
|
|
cpu: "100m"
|
|
memory: "512Mi"
|
|
|
|
# Test tolerations for ingestion pods
|
|
tolerations:
|
|
- key: "dedicated"
|
|
operator: "Equal"
|
|
value: "ingestion"
|
|
effect: "NoSchedule"
|
|
|
|
# Enable OMJob operator for guaranteed exit handler execution
|
|
useOMJobOperator: true
|
|
|
|
# Local image configuration
|
|
image:
|
|
repository: openmetadata/server
|
|
tag: "1.12.0"
|
|
pullPolicy: IfNotPresent
|
|
|
|
# Service configuration for local access
|
|
service:
|
|
type: LoadBalancer # For minikube tunnel access
|
|
|
|
# OMJob Operator configuration
|
|
omjobOperator:
|
|
enabled: true
|
|
# Use locally built fixed image
|
|
image:
|
|
repository: docker.getcollate.io/openmetadata/omjob-operator
|
|
tag: "1.12.0-SNAPSHOT"
|
|
pullPolicy: IfNotPresent
|
|
# Adjusted resources for Java application (was OOMKilled at 128Mi)
|
|
resources:
|
|
requests:
|
|
cpu: "500m"
|
|
memory: "512Mi"
|
|
limits:
|
|
cpu: "1"
|
|
memory: "1Gi"
|
|
# Debug logging for testing
|
|
env:
|
|
logLevel: "DEBUG"
|
|
watchNamespaces: ""
|
|
# Enable health checks now that operator implements proper endpoints
|
|
healthCheck:
|
|
enabled: true
|
|
|
|
# Reduced resources for local testing
|
|
resources:
|
|
limits:
|
|
cpu: 2
|
|
memory: 4Gi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
|
|
# More relaxed health checks for local testing
|
|
livenessProbe:
|
|
initialDelaySeconds: 120
|
|
readinessProbe:
|
|
initialDelaySeconds: 120
|
|
startupProbe:
|
|
failureThreshold: 10 |