From a4ebd5bdcba5dbfeb8fb2971e314a255e1ac1c00 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 16:03:33 -0600 Subject: [PATCH 01/29] automatic update to branch by bootstrap script --- .../overlays/rhoai-fast/patch-application-repo-revision.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml b/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml index 1abff624..635679cb 100644 --- a/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml +++ b/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml @@ -3,4 +3,4 @@ value: 'https://github.com/redhat-ai-services/ai-accelerator.git' - op: replace path: /spec/source/targetRevision - value: main + value: health-checks From 330b13f6390332d20aad845c1eef658a4343ba72 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 16:04:58 -0600 Subject: [PATCH 02/29] add check on unknown state --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 7eda6e6b..4c1465b5 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -46,7 +46,7 @@ if progressing == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" - elseif progressing == true then + elseif progressing == true or status_unknown >= 0 then health_status.status = "Progressing" else health_status.status = "Degraded" From 8432827315b5ecc9ca11cad0923e8bb8f2eb6ba6 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 16:19:12 -0600 Subject: [PATCH 03/29] cleanup objects --- .../kustomization.yaml | 7 ---- .../patch-inferencegraph-health-check.yaml | 42 ------------------- .../patch-servingruntime-health-check.yaml | 42 ------------------- 3 files changed, 91 deletions(-) delete mode 100644 components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferencegraph-health-check.yaml delete mode 100644 components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-servingruntime-health-check.yaml diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/kustomization.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/kustomization.yaml index 735c3f99..57367b15 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/kustomization.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/kustomization.yaml @@ -11,13 +11,6 @@ patches: - path: patch-notebook-health-check.yaml target: kind: ArgoCD - # These have not yet been implimented/tested - # - path: patch-inferencegraph-health-check.yaml - # target: - # kind: ArgoCD - path: patch-inferenceservice-health-check.yaml target: kind: ArgoCD - # - path: patch-servingruntime-health-check.yaml - # target: - # kind: ArgoCD diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferencegraph-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferencegraph-health-check.yaml deleted file mode 100644 index e8005d95..00000000 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferencegraph-health-check.yaml +++ /dev/null @@ -1,42 +0,0 @@ -- op: add - path: /spec/resourceHealthChecks/- - value: - group: serving.kserve.io - kind: InferenceGraph - check: | - health_status = {} - if obj.status ~= nil then - if obj.status.conditions ~= nil then - msg = "" - progressing = false - degraded = false - for i, condition in pairs(obj.status.conditions) do - - if condition.status == "False" then - progressing = true - msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" - end - - end - - if progressing == false and degraded == false then - health_status.status = "Healthy" - elseif progressing == true then - health_status.status = "Progressing" - elseif degraded == false then - -- there is no condition that can help to distinguish between a degraded and progressing object - -- for now, we will just always keep the object as progressing and never set it to degraded - health_status.status = "Degraded" - end - - health_status.message = msg - else - health_status.status = "Progressing" - health_status.message = "Notebook is creating..." - end - else - health_status.status = "Progressing" - health_status.message = "Notebook is creating..." - end - - return health_status diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-servingruntime-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-servingruntime-health-check.yaml deleted file mode 100644 index e9970061..00000000 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-servingruntime-health-check.yaml +++ /dev/null @@ -1,42 +0,0 @@ -- op: add - path: /spec/resourceHealthChecks/- - value: - group: serving.kserve.io - kind: ServingRuntime - check: | - health_status = {} - if obj.status ~= nil then - if obj.status.conditions ~= nil then - msg = "" - progressing = false - degraded = false - for i, condition in pairs(obj.status.conditions) do - - if condition.status == "False" then - progressing = true - msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" - end - - end - - if progressing == false and degraded == false then - health_status.status = "Healthy" - elseif progressing == true then - health_status.status = "Progressing" - elseif degraded == false then - -- there is no condition that can help to distinguish between a degraded and progressing object - -- for now, we will just always keep the object as progressing and never set it to degraded - health_status.status = "Degraded" - end - - health_status.message = msg - else - health_status.status = "Progressing" - health_status.message = "Notebook is creating..." - end - else - health_status.status = "Progressing" - health_status.message = "Notebook is creating..." - end - - return health_status From a12996d33c6053dad0e9a357601e3b35adc451ae Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 16:53:03 -0600 Subject: [PATCH 04/29] add bad test model --- .../patch-inferenceservice-health-check.yaml | 12 +++++---- .../base/inference-service.yaml | 25 +++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 4c1465b5..b73145e7 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -4,12 +4,16 @@ group: serving.kserve.io kind: InferenceService check: | - health_status = {} + local health_status = {} + health_status.status = "Progressing" health_status.message = "Waiting for InferenceService to report status..." if obj.status ~= nil then - progressing = false + + local progressing = false + local status_false = 0 + local status_unknown = 0 if obj.modelStatus ~= nil then if obj.modelStatus.transitionStatus == "InProgress" then @@ -22,9 +26,6 @@ for i, condition in pairs(obj.status.conditions) do - status_false = 0 - status_unknown = 0 - if condition.status == "Unknown" then status_unknown = status_unknown + 1 elseif condition.status == "False" then @@ -46,6 +47,7 @@ if progressing == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" + msg = "InferenceService is healthy." elseif progressing == true or status_unknown >= 0 then health_status.status = "Progressing" else diff --git a/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml b/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml index c151860c..c3900963 100644 --- a/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml +++ b/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml @@ -1,3 +1,4 @@ +--- apiVersion: serving.kserve.io/v1beta1 kind: InferenceService metadata: @@ -21,3 +22,27 @@ spec: storage: key: aws-connection-tgis path: models/flan-t5-small +--- +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + openshift.io/display-name: tgis + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + labels: + opendatahub.io/dashboard: "true" + name: tgis-test +spec: + predictor: + maxReplicas: 1 + minReplicas: 1 + model: + modelFormat: + name: pytorch + name: "" + runtime: tgis + storage: + key: aws-connection-tgis + path: models/flan-t5-small-blah From 0084c97f334fa660684b9c564f9af78586be17b7 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:00:08 -0600 Subject: [PATCH 05/29] add model status back --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index b73145e7..e13ba604 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -48,7 +48,7 @@ if progressing == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" msg = "InferenceService is healthy." - elseif progressing == true or status_unknown >= 0 then + elseif progressing == true and status_unknown >= 0 then health_status.status = "Progressing" else health_status.status = "Degraded" From b768a2634ae42e7108dd3aba24bee400bf2343a1 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:07:58 -0600 Subject: [PATCH 06/29] add bad model --- .../base/inference-service.yaml | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tenants/ai-example/multi-model-serving/base/inference-service.yaml b/tenants/ai-example/multi-model-serving/base/inference-service.yaml index 5a276b6b..49c3bff3 100644 --- a/tenants/ai-example/multi-model-serving/base/inference-service.yaml +++ b/tenants/ai-example/multi-model-serving/base/inference-service.yaml @@ -1,3 +1,4 @@ +--- apiVersion: serving.kserve.io/v1beta1 kind: InferenceService metadata: @@ -19,3 +20,25 @@ spec: storage: key: aws-connection-multi-model path: models/fraud-detection-model/frauddetectionmodel.onnx +--- +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + openshift.io/display-name: bad + serving.kserve.io/deploymentMode: ModelMesh + name: bad + labels: + opendatahub.io/dashboard: 'true' +spec: + predictor: + model: + modelFormat: + name: onnx + version: '1' + name: '' + resources: {} + runtime: multi-model-server + storage: + key: aws-connection-multi-model + path: models/bad/bad.onnx \ No newline at end of file From e0e76181d4bde099045786a17b1daab729110dfe Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:15:38 -0600 Subject: [PATCH 07/29] add transitionstatus message --- .../patch-inferenceservice-health-check.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index e13ba604..9ff11680 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -14,16 +14,18 @@ local progressing = false local status_false = 0 local status_unknown = 0 + msg = "" if obj.modelStatus ~= nil then - if obj.modelStatus.transitionStatus == "InProgress" then - progressing = true + if obj.modelStatus.transitionStatus ~= "UpToDate" then + if obj.modelStatus.transitionStatus == "InProgress" then + progressing = true + end + msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" end end if obj.status.conditions ~= nil then - msg = "" - for i, condition in pairs(obj.status.conditions) do if condition.status == "Unknown" then From 04dc566dcf5e263fbd0e6f3dba53735aac9d2571 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:17:56 -0600 Subject: [PATCH 08/29] add check on transition state for degraded --- .../patch-inferenceservice-health-check.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 9ff11680..ef428f80 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -12,6 +12,7 @@ if obj.status ~= nil then local progressing = false + local degraded = true local status_false = 0 local status_unknown = 0 msg = "" @@ -20,6 +21,8 @@ if obj.modelStatus.transitionStatus ~= "UpToDate" then if obj.modelStatus.transitionStatus == "InProgress" then progressing = true + else + degraded = true end msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" end @@ -47,7 +50,7 @@ end - if progressing == false and status_unknown == 0 and status_false == 0 then + if progressing == false and degraded = false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" msg = "InferenceService is healthy." elseif progressing == true and status_unknown >= 0 then From 2d1b457352bad5aebfad67617f44b92d3e56ada8 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:19:07 -0600 Subject: [PATCH 09/29] fix comparison --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index ef428f80..20b91bc8 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -50,7 +50,7 @@ end - if progressing == false and degraded = false and status_unknown == 0 and status_false == 0 then + if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" msg = "InferenceService is healthy." elseif progressing == true and status_unknown >= 0 then From f89d1246617b0c67b1c27bd28fe727e80540051e Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:23:21 -0600 Subject: [PATCH 10/29] fix default to degraded --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 20b91bc8..04716bc1 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -12,7 +12,7 @@ if obj.status ~= nil then local progressing = false - local degraded = true + local degraded = false local status_false = 0 local status_unknown = 0 msg = "" From c6e0f2bddc317a0581d770c7f2224da448c60b65 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:25:21 -0600 Subject: [PATCH 11/29] skip overwriting message --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 04716bc1..242d34fb 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -52,7 +52,7 @@ if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" - msg = "InferenceService is healthy." + # msg = "InferenceService is healthy." elseif progressing == true and status_unknown >= 0 then health_status.status = "Progressing" else From 112848718c28deddbb0612408e7d73071861c1f5 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:26:00 -0600 Subject: [PATCH 12/29] remove comment --- .../patch-inferenceservice-health-check.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 242d34fb..85a0b0ee 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -52,7 +52,6 @@ if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" - # msg = "InferenceService is healthy." elseif progressing == true and status_unknown >= 0 then health_status.status = "Progressing" else From f3562bf901f92147b83b6456998494580031bbd2 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:28:20 -0600 Subject: [PATCH 13/29] testing --- .../patch-inferenceservice-health-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 85a0b0ee..b19831e3 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -18,14 +18,14 @@ msg = "" if obj.modelStatus ~= nil then - if obj.modelStatus.transitionStatus ~= "UpToDate" then + -- if obj.modelStatus.transitionStatus ~= "UpToDate" then if obj.modelStatus.transitionStatus == "InProgress" then progressing = true else degraded = true end msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" - end + -- end end if obj.status.conditions ~= nil then From 373e175c3647ee94462fb06cd41e8370541f3a64 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:29:45 -0600 Subject: [PATCH 14/29] add more degraded --- .../patch-inferenceservice-health-check.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index b19831e3..324ba4e3 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -50,7 +50,9 @@ end - if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then + if degraded == true then + health_status.status = "Degraded" + elseif progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" elseif progressing == true and status_unknown >= 0 then health_status.status = "Progressing" From 80b218b997edd48da08b08fe702bc8b11f79d167 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:31:38 -0600 Subject: [PATCH 15/29] skip uptodate --- .../patch-inferenceservice-health-check.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 324ba4e3..bc6543f6 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -18,14 +18,14 @@ msg = "" if obj.modelStatus ~= nil then - -- if obj.modelStatus.transitionStatus ~= "UpToDate" then - if obj.modelStatus.transitionStatus == "InProgress" then - progressing = true - else - degraded = true - end - msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" + -- if obj.modelStatus.transitionStatus == "UpToDate" then + if obj.modelStatus.transitionStatus == "InProgress" then + progressing = true + else + degraded = true + end -- end + msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" end if obj.status.conditions ~= nil then From 31fbb846738c687584fd84cdffc44314e1906594 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:32:26 -0600 Subject: [PATCH 16/29] testing --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index bc6543f6..d9a5a2b5 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -21,7 +21,7 @@ -- if obj.modelStatus.transitionStatus == "UpToDate" then if obj.modelStatus.transitionStatus == "InProgress" then progressing = true - else + elseif obj.modelStatus.transitionStatus == "BlockedByFailedLoad" or obj.modelStatus.transitionStatus == "InvalidSpec" then degraded = true end -- end From 34f33a3cae8617be8d002ef40627ef18994c0858 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:35:26 -0600 Subject: [PATCH 17/29] fix status ref --- .../patch-inferenceservice-health-check.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index d9a5a2b5..72692d48 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -17,15 +17,15 @@ local status_unknown = 0 msg = "" - if obj.modelStatus ~= nil then - -- if obj.modelStatus.transitionStatus == "UpToDate" then - if obj.modelStatus.transitionStatus == "InProgress" then - progressing = true - elseif obj.modelStatus.transitionStatus == "BlockedByFailedLoad" or obj.modelStatus.transitionStatus == "InvalidSpec" then - degraded = true + if obj.status.modelStatus ~= nil then + if obj.status.modelStatus.transitionStatus == "UpToDate" then + if obj.status.modelStatus.transitionStatus == "InProgress" then + progressing = true + else + degraded = true + end + msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus .. "\n" end - -- end - msg = msg .. "0: transitionStatus | " .. obj.modelStatus.transitionStatus .. "\n" end if obj.status.conditions ~= nil then From d0ab5b49bbcd71b8a6a6a37b2253bef88c5b1532 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:36:39 -0600 Subject: [PATCH 18/29] fix comparison --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 72692d48..812a464d 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -18,7 +18,7 @@ msg = "" if obj.status.modelStatus ~= nil then - if obj.status.modelStatus.transitionStatus == "UpToDate" then + if obj.status.modelStatus.transitionStatus ~= "UpToDate" then if obj.status.modelStatus.transitionStatus == "InProgress" then progressing = true else From 488749dfedbe21cfaf7276b17ee4f6a910b81feb Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:37:42 -0600 Subject: [PATCH 19/29] cleanup --- .../patch-inferenceservice-health-check.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 812a464d..967a491a 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -50,10 +50,9 @@ end - if degraded == true then - health_status.status = "Degraded" - elseif progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then + if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" + msg = "InferenceService is healthy." elseif progressing == true and status_unknown >= 0 then health_status.status = "Progressing" else From b968a7b0ef1f229928061c4d09f9d219765cbe4f Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:44:39 -0600 Subject: [PATCH 20/29] update checks --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 967a491a..27976ce1 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -53,7 +53,7 @@ if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" msg = "InferenceService is healthy." - elseif progressing == true and status_unknown >= 0 then + elseif degraded == false and status_unknown >= 0 then health_status.status = "Progressing" else health_status.status = "Degraded" From a47d9f9164dc1d15ecdec183d34fd6ec6f36fc28 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:48:05 -0600 Subject: [PATCH 21/29] add local --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 27976ce1..223bb388 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -15,7 +15,7 @@ local degraded = false local status_false = 0 local status_unknown = 0 - msg = "" + local msg = "" if obj.status.modelStatus ~= nil then if obj.status.modelStatus.transitionStatus ~= "UpToDate" then From 9692bd5660cab7e9cf09e7f22e15844d9d28e961 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:52:31 -0600 Subject: [PATCH 22/29] add more details --- .../patch-inferenceservice-health-check.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 223bb388..34297c2e 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -54,8 +54,10 @@ health_status.status = "Healthy" msg = "InferenceService is healthy." elseif degraded == false and status_unknown >= 0 then + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false health_status.status = "Progressing" else + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false health_status.status = "Degraded" end From 53335687ab5359212514f0b8c23fc57a77267a10 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:54:12 -0600 Subject: [PATCH 23/29] add more details --- .../patch-inferenceservice-health-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 34297c2e..f3096b02 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -54,10 +54,10 @@ health_status.status = "Healthy" msg = "InferenceService is healthy." elseif degraded == false and status_unknown >= 0 then - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. degraded .. " || progressing: " .. progressing health_status.status = "Progressing" else - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. degraded .. " || progressing: " .. progressing health_status.status = "Degraded" end From 030257a80360d659acc098d306caf6b0a46fe038 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:55:40 -0600 Subject: [PATCH 24/29] fix boolean --- .../patch-inferenceservice-health-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index f3096b02..6c6120ae 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -54,10 +54,10 @@ health_status.status = "Healthy" msg = "InferenceService is healthy." elseif degraded == false and status_unknown >= 0 then - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. degraded .. " || progressing: " .. progressing + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. tostring(degraded) .. " || progressing: " .. tostring(progressing) health_status.status = "Progressing" else - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. degraded .. " || progressing: " .. progressing + msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. tostring(degraded) .. " || progressing: " .. tostring(progressing) health_status.status = "Degraded" end From c20351ea9dc039e960c8c739e7d319ace0d00263 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:56:38 -0600 Subject: [PATCH 25/29] fix comparison operator' --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 6c6120ae..0512c55f 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -53,7 +53,7 @@ if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then health_status.status = "Healthy" msg = "InferenceService is healthy." - elseif degraded == false and status_unknown >= 0 then + elseif degraded == false and status_unknown > 0 then msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. tostring(degraded) .. " || progressing: " .. tostring(progressing) health_status.status = "Progressing" else From 506395e8f956446fc4adcfe3e4ae36884a5096c0 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Thu, 30 May 2024 17:58:08 -0600 Subject: [PATCH 26/29] cleanup msg --- .../patch-inferenceservice-health-check.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 0512c55f..1922d2b0 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -54,10 +54,8 @@ health_status.status = "Healthy" msg = "InferenceService is healthy." elseif degraded == false and status_unknown > 0 then - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. tostring(degraded) .. " || progressing: " .. tostring(progressing) health_status.status = "Progressing" else - msg = msg .. " || unknown: " .. status_unknown .. " || false: " .. status_false .. " || degraded: " .. tostring(degraded) .. " || progressing: " .. tostring(progressing) health_status.status = "Degraded" end From a34bacb9d7330c6a7e7de90c6ed0e6bc986866f9 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Fri, 31 May 2024 08:00:29 -0600 Subject: [PATCH 27/29] remove test examples --- .../base/inference-service.yaml | 23 ----------------- .../base/inference-service.yaml | 25 ------------------- 2 files changed, 48 deletions(-) diff --git a/tenants/ai-example/multi-model-serving/base/inference-service.yaml b/tenants/ai-example/multi-model-serving/base/inference-service.yaml index 49c3bff3..5a276b6b 100644 --- a/tenants/ai-example/multi-model-serving/base/inference-service.yaml +++ b/tenants/ai-example/multi-model-serving/base/inference-service.yaml @@ -1,4 +1,3 @@ ---- apiVersion: serving.kserve.io/v1beta1 kind: InferenceService metadata: @@ -20,25 +19,3 @@ spec: storage: key: aws-connection-multi-model path: models/fraud-detection-model/frauddetectionmodel.onnx ---- -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - annotations: - openshift.io/display-name: bad - serving.kserve.io/deploymentMode: ModelMesh - name: bad - labels: - opendatahub.io/dashboard: 'true' -spec: - predictor: - model: - modelFormat: - name: onnx - version: '1' - name: '' - resources: {} - runtime: multi-model-server - storage: - key: aws-connection-multi-model - path: models/bad/bad.onnx \ No newline at end of file diff --git a/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml b/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml index c3900963..c151860c 100644 --- a/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml +++ b/tenants/ai-example/single-model-serving-tgis/base/inference-service.yaml @@ -1,4 +1,3 @@ ---- apiVersion: serving.kserve.io/v1beta1 kind: InferenceService metadata: @@ -22,27 +21,3 @@ spec: storage: key: aws-connection-tgis path: models/flan-t5-small ---- -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - annotations: - openshift.io/display-name: tgis - serving.knative.openshift.io/enablePassthrough: "true" - sidecar.istio.io/inject: "true" - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - opendatahub.io/dashboard: "true" - name: tgis-test -spec: - predictor: - maxReplicas: 1 - minReplicas: 1 - model: - modelFormat: - name: pytorch - name: "" - runtime: tgis - storage: - key: aws-connection-tgis - path: models/flan-t5-small-blah From d3de6674e74431033c5bc1114d95881d41a4a1f8 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Fri, 31 May 2024 08:06:42 -0600 Subject: [PATCH 28/29] switch back to main --- .../overlays/rhoai-fast/patch-application-repo-revision.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml b/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml index 635679cb..1abff624 100644 --- a/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml +++ b/clusters/overlays/rhoai-fast/patch-application-repo-revision.yaml @@ -3,4 +3,4 @@ value: 'https://github.com/redhat-ai-services/ai-accelerator.git' - op: replace path: /spec/source/targetRevision - value: health-checks + value: main From dd52c01421e0ab0061ea818c0c3f74e92d35de8e Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Fri, 31 May 2024 08:07:43 -0600 Subject: [PATCH 29/29] cleanup yaml --- .../patch-inferenceservice-health-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml index 1922d2b0..17d9cfac 100644 --- a/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml +++ b/components/operators/openshift-gitops/instance/components/health-check-openshift-ai/patch-inferenceservice-health-check.yaml @@ -27,7 +27,7 @@ msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus .. "\n" end end - + if obj.status.conditions ~= nil then for i, condition in pairs(obj.status.conditions) do