From 903f28bfceee59c5ef0920a4ff6c5da474ae4a67 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 27 Oct 2023 12:47:29 +0300 Subject: [PATCH] [Feature] Add end to end tests to apiserver Fixes #1388 --- .gitignore | 3 + apiserver/DEVELOPMENT.md | 20 ++-- apiserver/Makefile | 131 ++++++++++++---------- apiserver/test/e2e/job_server_e2e_test.go | 14 +-- 4 files changed, 88 insertions(+), 80 deletions(-) diff --git a/.gitignore b/.gitignore index 6bd6eadbbb0..cf8ea6e0c23 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,6 @@ # Any file with a .backup extension **/*.backup + +# Any file with a .log extension +**/*.log diff --git a/apiserver/DEVELOPMENT.md b/apiserver/DEVELOPMENT.md index 8534a4b89bc..922571c265f 100644 --- a/apiserver/DEVELOPMENT.md +++ b/apiserver/DEVELOPMENT.md @@ -66,14 +66,14 @@ make test There are two `make` targets provide execute the end to end test (integration between Kuberay API server and Kuberay Operator): * `make e2e-test` executes all the tests defined in the [test/e2e package](./test/e2e/). It uses the cluster defined in `~/.kube/config` to submit the workloads. -* `make local-e2e-test` creates a local kind cluster, deploys the nightly operator image and a freshly build Kuberay API server into the kind cluster and shuts down the kind cluster upon successful execution of the end to end test. +* `make local-e2e-test` creates a local kind cluster, builds the Kuberay operator and API server images from the current branch and deploys the operator and API server into the kind cluster. It shuts down the kind cluster upon successful execution of the end to end test. If the tests fail the cluster will be left running and will have to manually be shutdown by executing the `make clean-cluster` The `e2e` test targets use two variables to control what version of Ray images to use in the end to end tests: -* `E2E_API_SERVER_RAY_IMAGE` -- for the ray docker image. Currently set to `rayproject/ray:2.7.0-py310`. On Apple silicon or arm64 development machines the `-aarch64` suffix is added. +* `E2E_API_SERVER_RAY_IMAGE` -- for the ray docker image. Currently set to `rayproject/ray:2.7.0-py310`. On Apple silicon or arm64 development machines the `-aarch64` suffix is added to the image. * `E2E_API_SERVER_URL` -- for the base URL of the deployed KubeRayAPI server. The default value is: `http://localhost:31888` -The end to end test targets share the usage of the `GO_TEST_FLAGS`. Overriding the make file variable with a `-v` option allows for both unit and end to end tests to print any output / debug messages. By default, only if there's a test failure those messages are show. +The end to end test targets share the usage of the `GO_TEST_FLAGS`. Overriding the make file variable with a `-v` option allows for both unit and end to end tests to print any output / debug messages. By default, only if there's a test failure those messages are shown. The default values of the variables can be overridden using the `-e` make command line arguments. @@ -189,10 +189,11 @@ As a convenience for local development the following `make` targets are provided * `make cluster` -- creates a local kind cluster, using the configuration from `hack/kind-cluster-config.yaml`. It creates a port mapping allowing for the service running in the kind cluster to be accessed on `localhost:31888` for HTTP and `localhost:31887` for RPC. * `make clean-cluster` -- deletes the local kind cluster created with `make cluster` * `load-image` -- loads the docker image defined by the `IMG` make variable into the kind cluster. The default value for variable is: `kuberay/apiserver:latest`. The name of the image can be changed by using `make load-image -e IMG=` -* `operator-image` -- Build the operator image to be loaded in your kind cluster. You must specify a value for the operator image tag. Since the default value is set to `nightly`, the local image with this value will be overridden if `make deploy` operator is used later. This step is optional. Example: `make operator-image -e OPERATOR_IMAGE_TAG=latest` -* `load-operator-image` -- Load the operator image to the kind cluster created with `make cluster`. The tag for the operator image is `kuberay/operator:nightly`, and the tag can be overridden using `make load-operator-image -E OPERATOR_IMAGE_TAG=`. -* `deploy-operator` -- Deploy operator into your cluster. The tag for the operator image is `kuberay/operator:nightly`. +* `operator-image` -- Build the operator image to be loaded in your kind cluster. The operator image build is `kuberay/operator:latest`. The image tag can be overridden from the command line: ( example: `make operator-image -e OPERATOR_IMAGE_TAG=foo`) +* `load-operator-image` -- Load the operator image to the kind cluster created with `make cluster`. It should be used in conjunction with the `deploy-operator targe` +* `deploy-operator` -- Deploy operator into your cluster. The tag for the operator image is `kuberay/operator:latest`. * `undeploy-operator` -- Undeploy operator from your cluster +* `load-ray-test-image` -- Load the ray test images into the cluster. When developing and testing with kind you might want to execute these targets together: @@ -201,10 +202,13 @@ When developing and testing with kind you might want to execute these targets to make docker-image cluster load-image deploy #To create a new API server image, operator image and deploy them on a new cluster -make docker-image operator-image cluster load-image load-operator-image deploy deploy-operator -e OPERATOR_IMAGE_TAG=latest +make docker-image operator-image cluster load-image load-operator-image deploy deploy-operator #To execute end 2 end tests with a local build operator and verbose output -make operator-image local-e2e-test -e OPERATOR_IMAGE_TAG=latest -e GO_TEST_FLAGS="-v" +make local-e2e-test -e GO_TEST_FLAGS="-v" + +#To execute end 2 end test with the nightly build operator +make local-e2e-test -e OPERATOR_IMAGE_TAG=nightly ``` #### Access API Server in the Cluster diff --git a/apiserver/Makefile b/apiserver/Makefile index 4e3a6c00417..09fba4adb2c 100644 --- a/apiserver/Makefile +++ b/apiserver/Makefile @@ -55,38 +55,95 @@ help: ## Display this help. ##@ Development +.PHONY: fmt: ## Run go fmt against code. go fmt ./... +.PHONY: vet vet: ## Run go vet against code. go vet ./... +.PHONY: fumpt fumpt: gofumpt ## Run gofmtumpt against code. $(GOFUMPT) -l -w . +.PHONY: imports imports: goimports ## Run goimports against code. $(GOIMPORTS) -l -w . -test: fmt vet fumpt imports lint ## Run unit tests. - go test ./pkg/... ./cmd/... $(GO_TEST_FLAGS) -race -coverprofile ray-kube-api-server-coverage.out -parallel 4 - +.PHONY: lint lint: golangci-lint fmt vet fumpt imports ## Run the linter. $(GOLANGCI_LINT) run --timeout=3m +build: fmt vet fumpt imports lint ## Build api server binary. + go build -o ${REPO_ROOT_BIN}/kuberay-apiserver cmd/main.go + +run: fmt vet fumpt imports lint ## Run the api server from your host. + go run -race cmd/main.go -localSwaggerPath ${REPO_ROOT}/proto/swagger + +.PHONY: build-swagger +build-swagger: go-bindata + cd $(REPO_ROOT) && $(GOBINDATA) --nocompress --pkg swagger -o apiserver/pkg/swagger/datafile.go third_party/swagger-ui/... + +##@ Testing + +.PHONY: test +test: fmt vet fumpt imports lint ## Run all unit tests. + go test ./pkg/... ./cmd/... $(GO_TEST_FLAGS) -race -coverprofile ray-kube-api-server-coverage.out -parallel 4 + .PHONY: e2e-test e2e-test: ## Run end to end tests using a pre-exiting cluster. - go test ./test/e2e/... $(GO_TEST_FLAGS) -timeout 60m -race -count=1 + go test ./test/e2e/... $(GO_TEST_FLAGS) -timeout 60m -race -count=1 -parallel 4 -.PHONY: local-e2e-test +.PHONY: local-e2e-test ## Run end to end tests on newly created cluster. local-e2e-test: docker-image operator-image cluster load-image load-operator-image deploy-operator deploy load-ray-test-image e2e-test clean-cluster ## Run end to end tests, create a fresh kind cluster will all components deployed. -##@ Build +##@ Testing Setup +KIND_CONFIG ?= hack/kind-cluster-config.yaml +KIND_CLUSTER_NAME ?= ray-api-server-cluster +OPERATOR_IMAGE_TAG ?= latest +.PHONY: cluster +cluster: kind ## Start kind development cluster. + $(KIND) create cluster -n $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG) -build: fmt vet fumpt imports lint ## Build api server binary. - go build -o ${REPO_ROOT_BIN}/kuberay-apiserver cmd/main.go +.PHONY: clean-cluster +clean-cluster: kind ## Delete kind development cluster. + $(KIND) delete cluster -n $(KIND_CLUSTER_NAME) -run: fmt vet fumpt imports lint ## Run the api server from your host. - go run -race cmd/main.go -localSwaggerPath ${REPO_ROOT}/proto/swagger +.PHONY: load-image +load-image: ## Load the api server image to the kind cluster created with create-kind-cluster. + $(KIND) load docker-image $(IMG) -n $(KIND_CLUSTER_NAME) + +.PHONY: operator-image +operator-image: ## Build the operator image to be loaded in your kind cluster. + cd ../ray-operator && $(MAKE) docker-image -e IMG=kuberay/operator:$(OPERATOR_IMAGE_TAG) + +.PHONY: deploy-operator +deploy-operator: ## Deploy operator via helm into the K8s cluster specified in ~/.kube/config. +# Note that you should make your operatorimage available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. +# If you are using a Kind cluster for development, you can run `make load-image` to load the newly built image into the Kind cluster. + helm upgrade --install raycluster ../helm-chart/kuberay-operator --wait \ + --set image.tag=${OPERATOR_IMAGE_TAG} --set image.pullPolicy=IfNotPresent + +.PHONY: undeploy-operator +undeploy-operator: ## Undeploy operator via helm from the K8s cluster specified in ~/.kube/config. + helm uninstall raycluster --wait + +.PHONY: load-operator-image +load-operator-image: ## Load the operator image to the kind cluster created with make cluster. +ifneq (${OPERATOR_IMAGE_TAG}, latest) + $(ENGINE) pull kuberay/operator:$(OPERATOR_IMAGE_TAG) +endif + $(KIND) load docker-image kuberay/operator:$(OPERATOR_IMAGE_TAG) -n $(KIND_CLUSTER_NAME) + +.PHONY: load-ray-test-image +load-ray-test-image: ## Load the ray test images + $(ENGINE) pull $(E2E_API_SERVER_RAY_IMAGE) + $(KIND) load docker-image $(E2E_API_SERVER_RAY_IMAGE) -n $(KIND_CLUSTER_NAME) + $(ENGINE) pull rayproject/ray:latest + $(KIND) load docker-image rayproject/ray:latest -n $(KIND_CLUSTER_NAME) + +##@ Docker Build docker-image: test ## Build image with the api server. $(ENGINE) build -t ${IMG} -f Dockerfile .. @@ -94,10 +151,6 @@ docker-image: test ## Build image with the api server. docker-push: ## Push image with the api server. $(ENGINE) push ${IMG} -.PHONY: build-swagger -build-swagger: go-bindata - cd $(REPO_ROOT) && $(GOBINDATA) --nocompress --pkg swagger -o apiserver/pkg/swagger/datafile.go third_party/swagger-ui/... - ##@ Deployment .PHONY: install install: kustomize ## Install the kuberay api server to the K8s cluster specified in ~/.kube/config. @@ -119,7 +172,7 @@ deploy: ## Deploy via helm the kuberay api server to the K8s cluster specified i undeploy: ## Undeploy via helm the kuberay api server to the K8s cluster specified in ~/.kube/config. helm uninstall kuberay-apiserver --wait -##@ Development Tools +##@ Development Tools Setup ## Location to install dependencies to $(REPO_ROOT_BIN): @@ -137,7 +190,7 @@ GOBINDATA ?= $(REPO_ROOT_BIN)/go-bindata ## Tool Versions KUSTOMIZE_VERSION ?= v3.8.7 GOFUMPT_VERSION ?= v0.3.1 -GOIMPORTS_VERSION ?= latest +GOIMPORTS_VERSION ?= v0.14.0 GOLANGCI_LINT_VERSION ?= v1.54.1 KIND_VERSION ?= v0.19.0 GOBINDATA_VERSION ?= v4.0.2 @@ -184,49 +237,3 @@ clean-dev-tools: ## Remove all development tools rm -f $(REPO_ROOT_BIN)/goimports rm -f $(REPO_ROOT_BIN)/kind rm -f $(REPO_ROOT_BIN)/go-bindata - - -##@ Testing Setup and Tools -KIND_CONFIG ?= hack/kind-cluster-config.yaml -KIND_CLUSTER_NAME ?= ray-api-server-cluster -OPERATOR_IMAGE_TAG ?= latest -.PHONY: cluster -cluster: kind ## Start kind development cluster. - $(KIND) create cluster -n $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG) - -.PHONY: clean-cluster -clean-cluster: kind ## Delete kind development cluster. - $(KIND) delete cluster -n $(KIND_CLUSTER_NAME) - -.PHONY: load-image -load-image: ## Load the api server image to the kind cluster created with create-kind-cluster. - $(KIND) load docker-image $(IMG) -n $(KIND_CLUSTER_NAME) - -.PHONY: operator-image -operator-image: ## Build the operator image to be loaded in your kind cluster. - cd ../ray-operator && $(MAKE) docker-image -e IMG=kuberay/operator:$(OPERATOR_IMAGE_TAG) - -.PHONY: deploy-operator -deploy-operator: ## Deploy operator via helm into the K8s cluster specified in ~/.kube/config. -# Note that you should make your operatorimage available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. -# If you are using a Kind cluster for development, you can run `make load-image` to load the newly built image into the Kind cluster. - helm upgrade --install raycluster ../helm-chart/kuberay-operator --wait \ - --set image.tag=${OPERATOR_IMAGE_TAG} --set image.pullPolicy=IfNotPresent - -.PHONY: undeploy-operator -undeploy-operator: ## Undeploy operator via helm from the K8s cluster specified in ~/.kube/config. - helm uninstall raycluster --wait - -.PHONY: load-operator-image -load-operator-image: ## Load the operator image to the kind cluster created with make cluster. -ifneq (${OPERATOR_IMAGE_TAG}, latest) - $(ENGINE) pull kuberay/operator:$(OPERATOR_IMAGE_TAG) -endif - $(KIND) load docker-image kuberay/operator:$(OPERATOR_IMAGE_TAG) -n $(KIND_CLUSTER_NAME) - -.PHONY: load-ray-test-image -load-ray-test-image: ## Load the ray test image - $(ENGINE) pull $(E2E_API_SERVER_RAY_IMAGE) - $(KIND) load docker-image $(E2E_API_SERVER_RAY_IMAGE) -n $(KIND_CLUSTER_NAME) - $(ENGINE) pull rayproject/ray:latest - $(KIND) load docker-image rayproject/ray:latest -n $(KIND_CLUSTER_NAME) diff --git a/apiserver/test/e2e/job_server_e2e_test.go b/apiserver/test/e2e/job_server_e2e_test.go index 16b0aaa7713..aa44b39e997 100644 --- a/apiserver/test/e2e/job_server_e2e_test.go +++ b/apiserver/test/e2e/job_server_e2e_test.go @@ -430,12 +430,10 @@ func TestCreateJobWithClusterSelector(t *testing.T) { Entrypoint: "python /home/ray/samples/counter_sample.py", Metadata: map[string]string{}, RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", - ClusterSelector: map[string]string{"ray.io/cluster": "{cluster-name}"}, + ClusterSelector: map[string]string{"ray.io/cluster": cluster.Name}, TtlSecondsAfterFinished: 60, JobSubmitter: &api.RayJobSubmitter{ - Image: cluster.ClusterSpec.HeadGroupSpec.Image, - Cpu: "0.1", - Memory: "100", + Image: cluster.ClusterSpec.HeadGroupSpec.Image, }, }, Namespace: tCtx.GetNamespaceName(), @@ -454,13 +452,9 @@ func TestCreateJobWithClusterSelector(t *testing.T) { RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", ShutdownAfterJobFinishes: true, TtlSecondsAfterFinished: 60, - ClusterSelector: map[string]string{ - "ray.io/cluster": tCtx.GetRayClusterName(), - }, + ClusterSelector: map[string]string{"ray.io/cluster": cluster.Name}, JobSubmitter: &api.RayJobSubmitter{ - Image: cluster.ClusterSpec.HeadGroupSpec.Image, - Cpu: "0.1", - Memory: "100", + Image: cluster.ClusterSpec.HeadGroupSpec.Image, }, }, Namespace: tCtx.GetNamespaceName(),