Skip to content

Commit

Permalink
Merge branch 'kubeflow:master' into feature/success_failure_policy
Browse files Browse the repository at this point in the history
  • Loading branch information
qiankunli authored Oct 25, 2022
2 parents 200e513 + 74655a1 commit df86f71
Show file tree
Hide file tree
Showing 55 changed files with 2,318 additions and 1,598 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ jobs:
strategy:
fail-fast: false
matrix:
kubernetes-version: ["v1.21.12", "v1.22.9", "v1.23.6"]
kubernetes-version: ["v1.22.15", "v1.23.12", "v1.24.6"]
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Create k8s Kind Cluster
uses: helm/kind-action@v1.2.0
uses: helm/kind-action@v1.3.0
with:
node_image: kindest/node:${{ matrix.kubernetes-version }}
cluster_name: training-operator-cluster
kubectl_version: ${{ matrix.kubernetes-version }}

- name: Build training-operator
run: |
Expand All @@ -35,4 +36,4 @@ jobs:
- name: Run tests
run: |
pip install pytest
python3 -m pip install -r sdk/python/requirements.txt; pytest sdk/python/test --log-cli-level=info
python3 -m pip install -e sdk/python; pytest sdk/python/test --log-cli-level=info
4 changes: 2 additions & 2 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ jobs:
path: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator

- name: Setup Go
uses: actions/setup-go@v2
uses: actions/setup-go@v3
with:
go-version: 1.17.2
go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator/go.mod

- name: Check Go modules
run: |
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
kubernetes-version: ["1.21.4", "1.22.1", "1.23.5"]
kubernetes-version: ["1.22.1", "1.23.5", "1.24.2"]

steps:
- name: Check out code
Expand All @@ -26,9 +26,9 @@ jobs:
path: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator

- name: Setup Go
uses: actions/setup-go@v2
uses: actions/setup-go@v3
with:
go-version: 1.17.2
go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator/go.mod

- name: Run Go test
run: |
Expand Down
25 changes: 6 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ vet: ## Run go vet against code.
GOLANGCI_LINT=$(shell which golangci-lint)
golangci-lint:
ifeq ($(GOLANGCI_LINT),)
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.42.1
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.49.0
$(info golangci-lint has been installed)
endif
golangci-lint run --timeout 5m ./...
golangci-lint run --timeout 5m --go 1.19 ./...

ENVTEST_K8S_VERSION ?= 1.22
ENVTEST_K8S_VERSION ?= 1.24
HAS_SETUP_ENVTEST := $(shell command -v setup-envtest;)

testall: manifests generate fmt vet golangci-lint test ## Run tests.
Expand Down Expand Up @@ -105,25 +105,12 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build manifests/overlays/standalone | kubectl delete -f -

PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))

CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
controller-gen: ## Download controller-gen locally if necessary.
$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/[email protected])
GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/controller-tools/cmd/[email protected]

KUSTOMIZE = $(shell pwd)/bin/kustomize
kustomize: ## Download kustomize locally if necessary.
$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/[email protected])

# go-get-tool will 'go get' any package $2 and install it to $1.
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
define go-get-tool
@[ -f $(1) ] || { \
set -e ;\
TMP_DIR=$$(mktemp -d) ;\
cd $$TMP_DIR ;\
go mod init tmp ;\
echo "Downloading $(2)" ;\
GOBIN=$(PROJECT_DIR)/bin go get $(2) ;\
rm -rf $$TMP_DIR ;\
}
endef
GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/kustomize/kustomize/[email protected]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/stand
### Stable Release

```bash
kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.3.0"
kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.5.0"
```

### TensorFlow Release Only
Expand Down
4 changes: 2 additions & 2 deletions build/images/training-operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM golang:1.17 as builder
FROM golang:1.19 as builder

WORKDIR /workspace
# Copy the Go Modules manifests
Expand All @@ -13,7 +13,7 @@ RUN go mod download
COPY . .

# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager cmd/training-operator.v1/main.go
RUN CGO_ENABLED=0 GOOS=linux GO111MODULE=on go build -a -o manager cmd/training-operator.v1/main.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand Down
6 changes: 5 additions & 1 deletion cmd/training-operator.v1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"volcano.sh/apis/pkg/apis/scheduling/v1beta1"

commonutil "github.com/kubeflow/common/pkg/util"
kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
Expand All @@ -45,12 +46,14 @@ var (
func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(kubeflowv1.AddToScheme(scheme))
utilruntime.Must(v1beta1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

func main() {
var metricsAddr string
var enableLeaderElection bool
var leaderElectionID string
var probeAddr string
var enabledSchemes controllerv1.EnabledSchemes
var enableGangScheduling bool
Expand All @@ -62,6 +65,7 @@ func main() {
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.StringVar(&leaderElectionID, "leader-election-id", "1ca428e5.training-operator.kubeflow.org", "The ID for leader election.")
flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+
" Now supporting TFJob, PyTorchJob, MXNetJob, XGBoostJob. By default, all supported schemes will be enabled.")
flag.BoolVar(&enableGangScheduling, "enable-gang-scheduling", false, "Set true to enable gang scheduling")
Expand Down Expand Up @@ -96,7 +100,7 @@ func main() {
Port: monitoringPort,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "1ca428e5.",
LeaderElectionID: leaderElectionID,
Namespace: namespace,
})
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/kubeflow/training-operator

go 1.17
go 1.19

require (
github.com/go-logr/logr v1.2.3
Expand Down
Loading

0 comments on commit df86f71

Please sign in to comment.