diff --git a/charts/hami/templates/scheduler/deployment.yaml b/charts/hami/templates/scheduler/deployment.yaml index 68898f464..395d5b852 100644 --- a/charts/hami/templates/scheduler/deployment.yaml +++ b/charts/hami/templates/scheduler/deployment.yaml @@ -47,6 +47,9 @@ spec: - {{ . }} {{- end }} {{- end }} + - --leader-elect={{ .Values.scheduler.leaderElect }} + - --leader-elect-resource-name={{ .Values.schedulerName }} + - --leader-elect-resource-namespace={{ .Release.Namespace }} volumeMounts: - name: scheduler-config mountPath: /config @@ -71,6 +74,9 @@ spec: - --default-cores={{ .Values.scheduler.defaultCores }} - --iluvatar-memory={{ .Values.iluvatarResourceMem }} - --iluvatar-cores={{ .Values.iluvatarResourceCore }} + - --leader-elect={{ .Values.scheduler.leaderElect }} + - --leader-elect-resource-name={{ .Values.schedulerName }} + - --leader-elect-resource-namespace={{ .Release.Namespace }} {{- range .Values.scheduler.extender.extraArgs }} - {{ . }} {{- end }} diff --git a/charts/hami/values.yaml b/charts/hami/values.yaml index f44520e30..8632e52cd 100644 --- a/charts/hami/values.yaml +++ b/charts/hami/values.yaml @@ -2,7 +2,7 @@ nameOverride: "" fullnameOverride: "" -imagePullSecrets: [] +imagePullSecrets: [ ] version: "v2.3.9" #Nvidia GPU Parameters @@ -45,6 +45,7 @@ scheduler: defaultCores: 0 defaultGPUNum: 1 metricsBindAddress: ":9395" + leaderElect: true kubeScheduler: # @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default. enabled: true @@ -56,7 +57,6 @@ scheduler: - -v=4 extraArgs: - --policy-config-file=/config/config.json - - --leader-elect=false - -v=4 extender: image: "projecthami/hami" diff --git a/cmd/scheduler/main.go b/cmd/scheduler/main.go index 4cc4cd649..25fb0dcb1 100644 --- a/cmd/scheduler/main.go +++ b/cmd/scheduler/main.go @@ -57,6 +57,7 @@ func init() { rootCmd.Flags().Int32Var(&config.DefaultCores, "default-cores", 0, "default gpu core percentage to allocate") rootCmd.Flags().Int32Var(&config.DefaultResourceNum, "default-gpu", 1, "default gpu to allocate") rootCmd.Flags().StringVar(&config.MetricsBindAddress, "metrics-bind-address", ":9395", "The TCP address that the scheduler should bind to for serving prometheus metrics(e.g. 127.0.0.1:9395, :9395)") + rootCmd.PersistentFlags().AddGoFlagSet(device.GlobalFlagSet()) rootCmd.AddCommand(version.VersionCmd) rootCmd.Flags().AddGoFlagSet(util.InitKlogFlags()) @@ -65,7 +66,7 @@ func init() { func start() { sher = scheduler.NewScheduler() sher.Start() - defer sher.Stop() + go sher.Stop() // start monitor metrics go sher.RegisterFromNodeAnnotations()