diff --git a/tpu-provisioner/admission_controller/Dockerfile b/tpu-provisioner/admission_controller/Dockerfile index b555f58c3..542921eaa 100644 --- a/tpu-provisioner/admission_controller/Dockerfile +++ b/tpu-provisioner/admission_controller/Dockerfile @@ -3,4 +3,4 @@ WORKDIR /webhook COPY requirements.txt /webhook COPY admission_controller.py /webhook RUN pip install --no-cache-dir --upgrade -r /webhook/requirements.txt -CMD ["uvicorn", "admission_controller:app", "--host", "0.0.0.0", "--port", "5000","--ssl-keyfile=/certs/webhook.key", "--ssl-certfile=/certs/webhook.crt"] +CMD ["uvicorn", "admission_controller:app", "--host", "0.0.0.0", "--port", "5000","--ssl-keyfile=/certs/tls.key", "--ssl-certfile=/certs/tls.crt"] diff --git a/tpu-provisioner/admission_controller/README.md b/tpu-provisioner/admission_controller/README.md index 3505fef8a..269efffe0 100644 --- a/tpu-provisioner/admission_controller/README.md +++ b/tpu-provisioner/admission_controller/README.md @@ -30,6 +30,28 @@ for changing to fit their use case. Update the Deployment in `manifests/manifest.yaml` with this container image. +### Local Development + +Create a minikube (or kind) cluster. + +```bash +minikube create cluster +# OR: kind create cluster +``` + +Install dependencies. + +```bash +kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/v0.5.1/manifests.yaml +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.5/cert-manager.yaml +``` + +Deploy the controller locally. + +```bash +skaffold dev +``` + ### Run Unit tests This project uses [pytest](https://docs.pytest.org) for unit testing. @@ -43,4 +65,4 @@ E2E testing is currently done manually via the following steps: 1. [Install JobSet](https://jobset.sigs.k8s.io/docs/installation/) 2. **Deploy admission controller**: Run `kubectl apply -f manifests/` from the `admission_controller/` directory. 3. **Create a test JobSet**: Run `kubectl apply -f test/test-jobset.yaml` -4. **Check Jobs were mutated correctly**: Run `kubectl describe jobs` and view the nodeSelectors in the pod template. \ No newline at end of file +4. **Check Jobs were mutated correctly**: Run `kubectl describe jobs` and view the nodeSelectors in the pod template. diff --git a/tpu-provisioner/admission_controller/manifests/manifest.yaml b/tpu-provisioner/admission_controller/manifests/manifest.yaml index 06d8d6935..ad3092582 100644 --- a/tpu-provisioner/admission_controller/manifests/manifest.yaml +++ b/tpu-provisioner/admission_controller/manifests/manifest.yaml @@ -1,28 +1,22 @@ apiVersion: v1 -kind: Secret -metadata: - name: admission-tls -type: Opaque -data: - webhook.crt: "" # base64 encoded certificate - webhook.key: "" # base64 encoded private key ---- -apiVersion: v1 kind: Service metadata: - name: mutating-webhook + name: admission-controller + namespace: tpu-provisioner-system spec: selector: - app: mutating-webhook + app: admission-controller ports: - port: 5000 --- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: - name: mutating-webhook + name: tpu-provisioner-admission-controller + annotations: + cert-manager.io/inject-ca-from: tpu-provisioner-system/admission-controller webhooks: -- name: mutating-webhook.default.svc +- name: admission-controller.tpu-provisioner-system.svc matchPolicy: Equivalent admissionReviewVersions: ["v1"] sideEffects: None @@ -32,34 +26,36 @@ webhooks: apiVersions: ["v1"] resources: ["jobs"] scope: "Namespaced" - failurePolicy: Ignore + failurePolicy: Fail timeoutSeconds: 20 clientConfig: - caBundle: # base64 CA bundle here + #caBundle: # base64 CA bundle here service: - namespace: default - name: mutating-webhook + namespace: tpu-provisioner-system + name: admission-controller path: /mutate port: 5000 --- apiVersion: apps/v1 kind: Deployment metadata: - name: mutating-webhook + name: admission-controller + namespace: tpu-provisioner-system spec: replicas: 1 selector: matchLabels: - app: mutating-webhook + app: admission-controller template: metadata: labels: - app: mutating-webhook + app: admission-controller spec: containers: - - name: mutating-webhook - image: "" # build container image, push to repository and add it here - imagePullPolicy: Always + - name: webhook + # build container image, push to repository and add it here + image: example.com/tpu-provisioner/admission-controller + imagePullPolicy: IfNotPresent ports: - containerPort: 5000 env: @@ -75,4 +71,24 @@ spec: volumes: - name: certs-volume secret: - secretName: admission-tls \ No newline at end of file + secretName: admission-controller-tls +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: admission-controller-issuer + namespace: tpu-provisioner-system +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: admission-controller + namespace: tpu-provisioner-system +spec: + secretName: admission-controller-tls + dnsNames: + - admission-controller.tpu-provisioner-system.svc + issuerRef: + name: admission-controller-issuer \ No newline at end of file diff --git a/tpu-provisioner/admission_controller/skaffold.yaml b/tpu-provisioner/admission_controller/skaffold.yaml new file mode 100644 index 000000000..0f5a16f63 --- /dev/null +++ b/tpu-provisioner/admission_controller/skaffold.yaml @@ -0,0 +1,13 @@ +apiVersion: skaffold/v4beta11 +kind: Config +metadata: + name: admission-controller +build: + local: {} + artifacts: + - image: example.com/tpu-provisioner/admission-controller + docker: + dockerfile: Dockerfile +manifests: + rawYaml: + - manifests/manifest.yaml