From ed6fe769e6aa626f20379d0c2ccd3cbac2d8ae0c Mon Sep 17 00:00:00 2001 From: Hannah DeFazio Date: Mon, 17 Nov 2025 16:13:27 -0500 Subject: [PATCH] fix: Allow the ISVC to be healthy when the Stopped Condition is False (#25312) Signed-off-by: Hannah DeFazio --- .../InferenceService/health.lua | 13 +++++++++-- .../InferenceService/health_test.yaml | 4 ++++ .../InferenceService/testdata/healthy.yaml | 4 ++++ .../testdata/healthy_ocp.yaml | 4 ++++ .../testdata/healthy_raw.yaml | 4 ++++ .../InferenceService/testdata/stopped.yaml | 23 +++++++++++++++++++ 6 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 resource_customizations/serving.kserve.io/InferenceService/testdata/stopped.yaml diff --git a/resource_customizations/serving.kserve.io/InferenceService/health.lua b/resource_customizations/serving.kserve.io/InferenceService/health.lua index 94959de841..86a7e42216 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/health.lua +++ b/resource_customizations/serving.kserve.io/InferenceService/health.lua @@ -24,14 +24,23 @@ if obj.status ~= nil then if obj.status.conditions ~= nil then for i, condition in pairs(obj.status.conditions) do + -- Check if the InferenceService is Stopped + if condition.type == "Stopped" and condition.status == "True" then + health_status.status = "Suspended" + health_status.message = "InferenceService is Stopped" + return health_status + end + -- Check for unhealthy statuses + -- Note: The Stopped condition's healthy status is False if condition.status == "Unknown" then status_unknown = status_unknown + 1 - elseif condition.status == "False" then + elseif condition.status == "False" and condition.type ~= "Stopped" then status_false = status_false + 1 end - if condition.status ~= "True" then + -- Add the error messages if the status is unhealthy + if condition.status ~= "True" and condition.type ~= "Stopped" then msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status if condition.reason ~= nil and condition.reason ~= "" then msg = msg .. " | " .. condition.reason diff --git a/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml b/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml index 670b194f79..8cd44e036a 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml @@ -23,6 +23,10 @@ tests: status: Degraded message: "0: transitionStatus | BlockedByFailedLoad" inputPath: testdata/degraded_modelmesh.yaml +- healthStatus: + status: Suspended + message: InferenceService is Stopped + inputPath: testdata/stopped.yaml - healthStatus: status: Healthy message: InferenceService is healthy. diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy.yaml index 3c28c61d48..b7bb954d92 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy.yaml @@ -23,3 +23,7 @@ status: - lastTransitionTime: "2023-06-20T22:44:51Z" status: "True" type: Ready + - lastTransitionTime: "2023-06-20T22:44:51Z" + severity: Info + status: 'False' + type: Stopped diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml index 9d65c2b379..72ff1cc1ff 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml @@ -31,5 +31,9 @@ status: severity: Info status: 'True' type: RoutesReady + - lastTransitionTime: '2024-05-30T22:14:31Z' + severity: Info + status: 'False' + type: Stopped modelStatus: transitionStatus: UpToDate diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_raw.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_raw.yaml index 5f9d805625..de0cfea086 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_raw.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_raw.yaml @@ -17,3 +17,7 @@ status: - lastTransitionTime: '2024-05-16T18:48:56Z' status: 'True' type: Ready + - lastTransitionTime: '2024-05-16T18:48:56Z' + severity: Info + status: 'False' + type: Stopped diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/stopped.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/stopped.yaml new file mode 100644 index 0000000000..70a8605d42 --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/stopped.yaml @@ -0,0 +1,23 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default + annotations: + serving.kserve.io/deploymentMode: RawDeployment + serving.kserve.io/stop: 'true' +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-16T18:48:56Z' + reason: Stopped + status: 'False' + type: PredictorReady + - lastTransitionTime: '2024-05-16T18:48:56Z' + reason: Stopped + status: 'False' + type: Ready + - lastTransitionTime: '2024-05-16T18:48:56Z' + severity: Info + status: 'True' + type: Stopped