From 304a1e766036205739825615e5c8b0399f5602b3 Mon Sep 17 00:00:00 2001 From: Shilpa Chugh Date: Mon, 22 Jul 2024 13:50:14 +0530 Subject: [PATCH] Add unit tests for RHOAI ray image --- tests/test-case-custom-image.yaml | 156 ++++++++++++++++++++++++++++++ tests/unit_test.py | 63 ++++++++++-- tests/unit_test_support.py | 1 - 3 files changed, 211 insertions(+), 9 deletions(-) create mode 100644 tests/test-case-custom-image.yaml diff --git a/tests/test-case-custom-image.yaml b/tests/test-case-custom-image.yaml new file mode 100644 index 000000000..8a417a581 --- /dev/null +++ b/tests/test-case-custom-image.yaml @@ -0,0 +1,156 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + annotations: + app.kubernetes.io/managed-by: test-prefix + labels: + controller-tools.k8s.io: '1.0' + kueue.x-k8s.io/queue-name: local-queue-default + testlabel: test + testlabel2: test + name: unit-test-cluster-custom-image + namespace: ns +spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + resources: '"{}"' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/project-codeflare/ray:2.20.0-py39-cu118 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + requests: + cpu: 2 + memory: 8G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: + - name: unit-test-pull-secret + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.23.0 + workerGroupSpecs: + - groupName: small-group-unit-test-cluster-custom-image + maxReplicas: 2 + minReplicas: 2 + rayStartParams: + block: 'true' + num-gpus: '7' + resources: '"{}"' + replicas: 2 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/project-codeflare/ray:2.20.0-py39-cu118 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 4 + memory: 6G + nvidia.com/gpu: 7 + requests: + cpu: 3 + memory: 5G + nvidia.com/gpu: 7 + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: + - name: unit-test-pull-secret + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/unit_test.py b/tests/unit_test.py index bb14d2b20..2709894ec 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -262,7 +262,6 @@ def test_config_creation(): assert config.worker_cpu_requests == 3 and config.worker_cpu_limits == 4 assert config.worker_memory_requests == "5G" and config.worker_memory_limits == "6G" assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 7} - assert config.image == "quay.io/rhoai/ray:2.23.0-py39-cu121" assert config.template == f"{parent}/src/codeflare_sdk/templates/base-template.yaml" assert config.machine_types == ["cpu.small", "gpu.large"] assert config.image_pull_secrets == ["unit-test-pull-secret"] @@ -415,7 +414,6 @@ def test_cluster_creation_no_mcad_local_queue(mocker): worker_extended_resource_requests={"nvidia.com/gpu": 7}, machine_types=["cpu.small", "gpu.large"], image_pull_secrets=["unit-test-pull-secret"], - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=True, appwrapper=False, local_queue="local-queue-default", @@ -443,7 +441,6 @@ def test_default_cluster_creation(mocker): ) default_config = ClusterConfiguration( name="unit-test-default-cluster", - image="quay.io/rhoai/ray:2.23.0-py39-cu121", appwrapper=True, ) cluster = Cluster(default_config) @@ -459,6 +456,61 @@ def test_default_cluster_creation(mocker): assert cluster.config.namespace == "opendatahub" +def test_cluster_creation_with_custom_image(mocker): + # With written resources + # Create Ray Cluster with local queue specified + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + mocker.patch("os.environ.get", return_value="test-prefix") + config = createClusterConfig() + config.name = "unit-test-cluster-custom-image" + config.appwrapper = False + config.image = "quay.io/project-codeflare/ray:2.20.0-py39-cu118" + config.local_queue = "local-queue-default" + config.labels = {"testlabel": "test", "testlabel2": "test"} + cluster = Cluster(config) + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml" + assert cluster.app_wrapper_name == "unit-test-cluster-custom-image" + assert filecmp.cmp( + f"{aw_dir}unit-test-cluster-custom-image.yaml", + f"{parent}/tests/test-case-custom-image.yaml", + shallow=True, + ) + # With resources loaded in memory + config = ClusterConfiguration( + name="unit-test-cluster-custom-image", + namespace="ns", + num_workers=2, + worker_cpu_requests=3, + worker_cpu_limits=4, + worker_memory_requests=5, + worker_memory_limits=6, + worker_extended_resource_requests={"nvidia.com/gpu": 7}, + machine_types=["cpu.small", "gpu.large"], + image_pull_secrets=["unit-test-pull-secret"], + image="quay.io/project-codeflare/ray:2.20.0-py39-cu118", + write_to_file=True, + appwrapper=False, + local_queue="local-queue-default", + labels={"testlabel": "test", "testlabel2": "test"}, + ) + cluster = Cluster(config) + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml" + assert cluster.app_wrapper_name == "unit-test-cluster-custom-image" + assert filecmp.cmp( + f"{aw_dir}unit-test-cluster-custom-image.yaml", + f"{parent}/tests/test-case-custom-image.yaml", + shallow=True, + ) + + def test_gen_names_with_name(mocker): mocker.patch.object( uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001") @@ -792,7 +844,6 @@ def test_ray_job_wrapping(mocker): return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), ) cluster = cluster = createClusterWithConfig(mocker) - cluster.config.image = "quay.io/rhoai/ray:2.23.0-py39-cu121" mocker.patch( "ray.job_submission.JobSubmissionClient._check_connection_and_version_with_url", return_value="None", @@ -910,7 +961,6 @@ def test_ray_details(mocker, capsys): ClusterConfiguration( name="raytest2", namespace="ns", - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=True, appwrapper=True, local_queue="local_default_queue", @@ -2313,7 +2363,6 @@ def test_cluster_status(mocker): ClusterConfiguration( name="test", namespace="ns", - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=True, appwrapper=True, local_queue="local_default_queue", @@ -2408,7 +2457,6 @@ def test_wait_ready(mocker, capsys): ClusterConfiguration( name="test", namespace="ns", - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=True, appwrapper=True, local_queue="local-queue-default", @@ -2635,7 +2683,6 @@ def throw_if_getting_raycluster(group, version, namespace, plural): cluster = Cluster( ClusterConfiguration( "test_cluster", - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=False, ) ) diff --git a/tests/unit_test_support.py b/tests/unit_test_support.py index 25e206c52..9fcdd5a5e 100644 --- a/tests/unit_test_support.py +++ b/tests/unit_test_support.py @@ -17,7 +17,6 @@ def createClusterConfig(): appwrapper=True, machine_types=["cpu.small", "gpu.large"], image_pull_secrets=["unit-test-pull-secret"], - image="quay.io/rhoai/ray:2.23.0-py39-cu121", write_to_file=True, ) return config