From 4aeb6503162465766476519339d3285f75ffe03e Mon Sep 17 00:00:00 2001 From: Yannis Zarkadas Date: Thu, 18 Mar 2021 18:12:16 +0200 Subject: [PATCH] pytorch-operator: Consolidate manifests (#323) * manifests: Add base/overlays structure Signed-off-by: Yannis Zarkadas * Edit docs and scripts to use new manifests Signed-off-by: Yannis Zarkadas * Delete old manifests Signed-off-by: Yannis Zarkadas --- README.md | 2 +- developer_guide.md | 2 +- manifests/{ => base}/crd.yaml | 0 manifests/{ => base}/deployment.yaml | 0 .../base/kustomization.yaml | 10 +++-- manifests/{ => base}/podgroup.yaml | 0 manifests/{ => base}/rbac.yaml | 0 manifests/{ => base}/service.yaml | 0 .../user-clusterroles.yaml} | 32 -------------- .../kubeflow}/kustomization.yaml | 11 +++-- .../standalone}/kustomization.yaml | 10 ++--- .../{ => overlays/standalone}/namespace.yaml | 0 manifests/pytorch-job-crds/base/crd.yaml | 42 ------------------ .../pytorch-job-crds/base/kustomization.yaml | 4 -- .../overlays/application/application.yaml | 42 ------------------ .../overlays/application/kustomization.yaml | 9 ---- .../base/cluster-role-binding.yaml | 13 ------ .../pytorch-operator/base/deployment.yaml | 34 -------------- manifests/pytorch-operator/base/params.env | 3 -- .../base/service-account.yaml | 6 --- manifests/pytorch-operator/base/service.yaml | 19 -------- .../overlays/application/application.yaml | 44 ------------------- scripts/setup-pytorch-operator.sh | 4 +- 23 files changed, 21 insertions(+), 266 deletions(-) rename manifests/{ => base}/crd.yaml (100%) rename manifests/{ => base}/deployment.yaml (100%) rename manifests/{pytorch-operator => }/base/kustomization.yaml (63%) rename manifests/{ => base}/podgroup.yaml (100%) rename manifests/{ => base}/rbac.yaml (100%) rename manifests/{ => base}/service.yaml (100%) rename manifests/{pytorch-operator/base/cluster-role.yaml => base/user-clusterroles.yaml} (71%) rename manifests/{pytorch-operator/overlays/application => overlays/kubeflow}/kustomization.yaml (55%) rename manifests/{ => overlays/standalone}/kustomization.yaml (61%) rename manifests/{ => overlays/standalone}/namespace.yaml (100%) delete mode 100644 manifests/pytorch-job-crds/base/crd.yaml delete mode 100644 manifests/pytorch-job-crds/base/kustomization.yaml delete mode 100644 manifests/pytorch-job-crds/overlays/application/application.yaml delete mode 100644 manifests/pytorch-job-crds/overlays/application/kustomization.yaml delete mode 100644 manifests/pytorch-operator/base/cluster-role-binding.yaml delete mode 100644 manifests/pytorch-operator/base/deployment.yaml delete mode 100644 manifests/pytorch-operator/base/params.env delete mode 100644 manifests/pytorch-operator/base/service-account.yaml delete mode 100644 manifests/pytorch-operator/base/service.yaml delete mode 100644 manifests/pytorch-operator/overlays/application/application.yaml diff --git a/README.md b/README.md index e6852e1f7..b52430868 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ## Overview -This repository contains the specification and implementation of `PyTorchJob` custom resource definition. Using this custom resource, users can create and manage PyTorch jobs like other built-in resources in Kubernetes. See [CRD definition](https://github.com/kubeflow/pytorch-operator/blob/master/manifests/crd.yaml) +This repository contains the specification and implementation of `PyTorchJob` custom resource definition. Using this custom resource, users can create and manage PyTorch jobs like other built-in resources in Kubernetes. See [CRD definition](https://github.com/kubeflow/pytorch-operator/blob/master/manifests/base/crd.yaml) ## Prerequisites diff --git a/developer_guide.md b/developer_guide.md index 476ccfe65..f57134a86 100644 --- a/developer_guide.md +++ b/developer_guide.md @@ -55,7 +55,7 @@ export KUBEFLOW_NAMESPACE=$(your_namespace) After the cluster is up, the PyTorch Operator CRD should be created on the cluster. ```bash -kubectl create -f ./manifests/crd.yaml +kubectl create -f ./manifests/base/crd.yaml ``` ### Run Operator diff --git a/manifests/crd.yaml b/manifests/base/crd.yaml similarity index 100% rename from manifests/crd.yaml rename to manifests/base/crd.yaml diff --git a/manifests/deployment.yaml b/manifests/base/deployment.yaml similarity index 100% rename from manifests/deployment.yaml rename to manifests/base/deployment.yaml diff --git a/manifests/pytorch-operator/base/kustomization.yaml b/manifests/base/kustomization.yaml similarity index 63% rename from manifests/pytorch-operator/base/kustomization.yaml rename to manifests/base/kustomization.yaml index 09728b045..8598f5782 100644 --- a/manifests/pytorch-operator/base/kustomization.yaml +++ b/manifests/base/kustomization.yaml @@ -2,14 +2,16 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: kubeflow resources: -- cluster-role-binding.yaml -- cluster-role.yaml +- crd.yaml +- rbac.yaml - deployment.yaml -- service-account.yaml - service.yaml +- user-clusterroles.yaml commonLabels: + app: pytorch-operator kustomize.component: pytorch-operator + app.kubernetes.io/component: pytorch + app.kubernetes.io/name: pytorch-operator images: - name: gcr.io/kubeflow-images-public/pytorch-operator - newName: gcr.io/kubeflow-images-public/pytorch-operator newTag: vmaster-g518f9c76 diff --git a/manifests/podgroup.yaml b/manifests/base/podgroup.yaml similarity index 100% rename from manifests/podgroup.yaml rename to manifests/base/podgroup.yaml diff --git a/manifests/rbac.yaml b/manifests/base/rbac.yaml similarity index 100% rename from manifests/rbac.yaml rename to manifests/base/rbac.yaml diff --git a/manifests/service.yaml b/manifests/base/service.yaml similarity index 100% rename from manifests/service.yaml rename to manifests/base/service.yaml diff --git a/manifests/pytorch-operator/base/cluster-role.yaml b/manifests/base/user-clusterroles.yaml similarity index 71% rename from manifests/pytorch-operator/base/cluster-role.yaml rename to manifests/base/user-clusterroles.yaml index d1a9f7f20..0bc8752f4 100644 --- a/manifests/pytorch-operator/base/cluster-role.yaml +++ b/manifests/base/user-clusterroles.yaml @@ -1,35 +1,3 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - labels: - app: pytorch-operator - name: pytorch-operator -rules: -- apiGroups: - - kubeflow.org - resources: - - pytorchjobs - - pytorchjobs/status - - pytorchjobs/finalizers - verbs: - - '*' -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - '*' -- apiGroups: - - "" - resources: - - pods - - services - - endpoints - - events - verbs: - - '*' ---- - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/manifests/pytorch-operator/overlays/application/kustomization.yaml b/manifests/overlays/kubeflow/kustomization.yaml similarity index 55% rename from manifests/pytorch-operator/overlays/application/kustomization.yaml rename to manifests/overlays/kubeflow/kustomization.yaml index 3cfee7722..c50d7c738 100644 --- a/manifests/pytorch-operator/overlays/application/kustomization.yaml +++ b/manifests/overlays/kubeflow/kustomization.yaml @@ -1,9 +1,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 -bases: +kind: Kustomization +namespace: kubeflow +resources: - ../../base commonLabels: + app: pytorch-operator + kustomize.component: pytorch-operator app.kubernetes.io/component: pytorch app.kubernetes.io/name: pytorch-operator -kind: Kustomization -resources: -- application.yaml +images: +- name: gcr.io/kubeflow-images-public/pytorch-operator diff --git a/manifests/kustomization.yaml b/manifests/overlays/standalone/kustomization.yaml similarity index 61% rename from manifests/kustomization.yaml rename to manifests/overlays/standalone/kustomization.yaml index 9f00df7b2..5204a6b36 100644 --- a/manifests/kustomization.yaml +++ b/manifests/overlays/standalone/kustomization.yaml @@ -2,14 +2,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: kubeflow resources: -- crd.yaml +- ../../base - namespace.yaml -- rbac.yaml -- deployment.yaml -- service.yaml commonLabels: + app: pytorch-operator kustomize.component: pytorch-operator + app.kubernetes.io/component: pytorch + app.kubernetes.io/name: pytorch-operator images: - name: gcr.io/kubeflow-images-public/pytorch-operator - newName: 809251082950.dkr.ecr.us-west-2.amazonaws.com/pytorch-operator - newTag: "0.1" diff --git a/manifests/namespace.yaml b/manifests/overlays/standalone/namespace.yaml similarity index 100% rename from manifests/namespace.yaml rename to manifests/overlays/standalone/namespace.yaml diff --git a/manifests/pytorch-job-crds/base/crd.yaml b/manifests/pytorch-job-crds/base/crd.yaml deleted file mode 100644 index 4a8cf899d..000000000 --- a/manifests/pytorch-job-crds/base/crd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: pytorchjobs.kubeflow.org -spec: - additionalPrinterColumns: - - JSONPath: .status.conditions[-1:].type - name: State - type: string - - JSONPath: .metadata.creationTimestamp - name: Age - type: date - group: kubeflow.org - names: - kind: PyTorchJob - plural: pytorchjobs - singular: pytorchjob - scope: Namespaced - subresources: - status: {} - validation: - openAPIV3Schema: - properties: - spec: - properties: - pytorchReplicaSpecs: - properties: - Master: - properties: - replicas: - maximum: 1 - minimum: 1 - type: integer - Worker: - properties: - replicas: - minimum: 1 - type: integer - versions: - - name: v1 - served: true - storage: true diff --git a/manifests/pytorch-job-crds/base/kustomization.yaml b/manifests/pytorch-job-crds/base/kustomization.yaml deleted file mode 100644 index 6e120e7b6..000000000 --- a/manifests/pytorch-job-crds/base/kustomization.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: -- crd.yaml diff --git a/manifests/pytorch-job-crds/overlays/application/application.yaml b/manifests/pytorch-job-crds/overlays/application/application.yaml deleted file mode 100644 index 4946a1cf8..000000000 --- a/manifests/pytorch-job-crds/overlays/application/application.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: app.k8s.io/v1beta1 -kind: Application -metadata: - name: pytorch-job-crds -spec: - selector: - matchLabels: - app.kubernetes.io/name: pytorch-job-crds - app.kubernetes.io/instance: pytorch-job-crds-v0.7.0 - app.kubernetes.io/version: v0.7.0 - app.kubernetes.io/component: pytorch - app.kubernetes.io/part-of: kubeflow - app.kubernetes.io/managed-by: kfctl - componentKinds: - - group: core - kind: Service - - group: apps - kind: Deployment - - group: core - kind: ServiceAccount - - group: kubeflow.org - kind: PyTorchJob - descriptor: - type: "pytorch-job-crds" - version: "v1" - description: "Pytorch-job-crds contains the \"PyTorchJob\" custom resource definition." - maintainers: - - name: Johnu George - email: johnugeo@cisco.com - owners: - - name: Johnu George - email: johnugeo@cisco.com - keywords: - - "pytorchjob" - - "pytorch-operator" - - "pytorch-training" - links: - - description: About - url: "https://github.com/kubeflow/pytorch-operator" - - description: Docs - url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/" - addOwnerRef: true diff --git a/manifests/pytorch-job-crds/overlays/application/kustomization.yaml b/manifests/pytorch-job-crds/overlays/application/kustomization.yaml deleted file mode 100644 index 8647a23c2..000000000 --- a/manifests/pytorch-job-crds/overlays/application/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -bases: -- ../../base -commonLabels: - app.kubernetes.io/component: pytorch - app.kubernetes.io/name: pytorch-job-crds -kind: Kustomization -resources: -- application.yaml diff --git a/manifests/pytorch-operator/base/cluster-role-binding.yaml b/manifests/pytorch-operator/base/cluster-role-binding.yaml deleted file mode 100644 index 595f0fd26..000000000 --- a/manifests/pytorch-operator/base/cluster-role-binding.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - labels: - app: pytorch-operator - name: pytorch-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: pytorch-operator -subjects: -- kind: ServiceAccount - name: pytorch-operator diff --git a/manifests/pytorch-operator/base/deployment.yaml b/manifests/pytorch-operator/base/deployment.yaml deleted file mode 100644 index 4e1f6a8bd..000000000 --- a/manifests/pytorch-operator/base/deployment.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: pytorch-operator -spec: - replicas: 1 - selector: - matchLabels: - name: pytorch-operator - template: - metadata: - labels: - name: pytorch-operator - annotations: - sidecar.istio.io/inject: "false" - spec: - containers: - - command: - - /pytorch-operator.v1 - - --alsologtostderr - - -v=1 - - --monitoring-port=8443 - env: - - name: MY_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - image: gcr.io/kubeflow-images-public/pytorch-operator:v0.6.0-18-g5e36a57 - name: pytorch-operator - serviceAccountName: pytorch-operator diff --git a/manifests/pytorch-operator/base/params.env b/manifests/pytorch-operator/base/params.env deleted file mode 100644 index 47e9d44b5..000000000 --- a/manifests/pytorch-operator/base/params.env +++ /dev/null @@ -1,3 +0,0 @@ -pytorchDefaultImage=null -deploymentScope=cluster -deploymentNamespace=null diff --git a/manifests/pytorch-operator/base/service-account.yaml b/manifests/pytorch-operator/base/service-account.yaml deleted file mode 100644 index 3fe6033e1..000000000 --- a/manifests/pytorch-operator/base/service-account.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - app: pytorch-operator - name: pytorch-operator diff --git a/manifests/pytorch-operator/base/service.yaml b/manifests/pytorch-operator/base/service.yaml deleted file mode 100644 index c788ab2db..000000000 --- a/manifests/pytorch-operator/base/service.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - prometheus.io/path: /metrics - prometheus.io/port: "8443" - prometheus.io/scrape: "true" - labels: - app: pytorch-operator - name: pytorch-operator -spec: - ports: - - name: monitoring-port - port: 8443 - targetPort: 8443 - selector: - name: pytorch-operator - type: ClusterIP - diff --git a/manifests/pytorch-operator/overlays/application/application.yaml b/manifests/pytorch-operator/overlays/application/application.yaml deleted file mode 100644 index c2eb60291..000000000 --- a/manifests/pytorch-operator/overlays/application/application.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: app.k8s.io/v1beta1 -kind: Application -metadata: - name: pytorch-operator -spec: - selector: - matchLabels: - app.kubernetes.io/name: pytorch-operator - app.kubernetes.io/instance: pytorch-operator-v0.7.0 - app.kubernetes.io/version: v0.7.0 - app.kubernetes.io/component: pytorch - app.kubernetes.io/part-of: kubeflow - app.kubernetes.io/managed-by: kfctl - componentKinds: - - group: core - kind: Service - - group: apps - kind: Deployment - - group: core - kind: ConfigMap - - group: core - kind: ServiceAccount - - group: kubeflow.org - kind: PyTorchJob - descriptor: - type: "pytorch-operator" - version: "v1" - description: "Pytorch-operator allows users to create and manage the \"PyTorchJob\" custom resource." - maintainers: - - name: Johnu George - email: johnugeo@cisco.com - owners: - - name: Johnu George - email: johnugeo@cisco.com - keywords: - - "pytorchjob" - - "pytorch-operator" - - "pytorch-training" - links: - - description: About - url: "https://github.com/kubeflow/pytorch-operator" - - description: Docs - url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/" - addOwnerRef: true diff --git a/scripts/setup-pytorch-operator.sh b/scripts/setup-pytorch-operator.sh index 1d72aef14..4504bdef2 100755 --- a/scripts/setup-pytorch-operator.sh +++ b/scripts/setup-pytorch-operator.sh @@ -33,11 +33,11 @@ aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} echo "Update PyTorch operator manifest with new name and tag" #TODO(Jeffwan@): If there's a way to specify context, then we don't need to enter manifests folder -cd manifests/ +cd manifests/overlays/standalone kustomize edit set image gcr.io/kubeflow-images-public/pytorch-operator=${REGISTRY}/${REPO_NAME}:${VERSION} echo "Installing PyTorch operator manifests" -kubectl apply -k . +kustomize build . | kubectl apply -f - TIMEOUT=30 until kubectl get pods -n kubeflow | grep pytorch-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do