diff --git a/README.md b/README.md index 5aed712a..fa16dadd 100644 --- a/README.md +++ b/README.md @@ -242,14 +242,14 @@ You can now execute `nvidia-smi` command in the container and see the difference To Upgrade the k8s-vgpu to the latest version, all you need to do is restart the chart. The latest will be downloaded automatically. ``` -$ helm uninstall vgpu -$ helm install vgpu vgpu +$ helm uninstall vgpu -n kube-system +$ helm install vgpu vgpu -n kube-system ``` ### Uninstall ``` -helm uninstall vgpu +helm uninstall vgpu -n kube-system ``` ## Tests diff --git a/README_cn.md b/README_cn.md index ab102e58..cb6b87af 100644 --- a/README_cn.md +++ b/README_cn.md @@ -239,14 +239,14 @@ spec: 只需要重新启动整个Chart即可自动完成更新,最新的镜像会被自动下载 ``` -$ helm uninstall vgpu -$ helm install vgpu vgpu +$ helm uninstall vgpu -n kube-system +$ helm install vgpu vgpu -n kube-system ``` ### 卸载 ``` -$ helm uninstall vgpu +$ helm uninstall vgpu -n kube-system ``` ## 测试 diff --git a/cmd/nvidia-container-runtime/dockerclient.go b/cmd/nvidia-container-runtime/dockerclient.go deleted file mode 100644 index 3e92d4f7..00000000 --- a/cmd/nvidia-container-runtime/dockerclient.go +++ /dev/null @@ -1,63 +0,0 @@ -package main - -import ( - "bytes" - "context" - - "github.com/docker/docker/api/types" - "github.com/docker/docker/api/types/container" - "github.com/docker/docker/api/types/mount" - client "github.com/docker/docker/client" - log "github.com/sirupsen/logrus" -) - -func GetDockerClient() *client.Client { - cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) - if err != nil { - panic(err) - } - return cli -} - -func InspecctContainer(ctx context.Context, ctrname string, logger *log.Logger) { - client := GetDockerClient() - json, err := client.ContainerInspect(ctx, ctrname) - if err != nil { - logger.Println("err=", err.Error()) - } - logger.Println("LABELS=", json.Config.Labels) -} - -func RunContainer(ctx context.Context, image string, cmd []string, env []string, mount []mount.Mount, shmSize int64) string { - client := GetDockerClient() - resp, err := client.ContainerCreate(ctx, &container.Config{ - Image: image, - Cmd: cmd, - Env: env, - }, &container.HostConfig{ - Mounts: mount, - ShmSize: shmSize, - }, nil, nil, "") - if err != nil { - panic(err) - } - if err := client.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { - panic(err) - } - statusCh, errCh := client.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning) - select { - case err := <-errCh: - if err != nil { - panic(err) - } - case <-statusCh: - } - - out, err := client.ContainerLogs(ctx, resp.ID, types.ContainerLogsOptions{ShowStdout: true}) - if err != nil { - panic(err) - } - buf := new(bytes.Buffer) - buf.ReadFrom(out) - return buf.String() -} diff --git a/cmd/nvidia-container-runtime/nvcr.go b/cmd/nvidia-container-runtime/nvcr.go index d0605890..0930e3d8 100644 --- a/cmd/nvidia-container-runtime/nvcr.go +++ b/cmd/nvidia-container-runtime/nvcr.go @@ -19,7 +19,6 @@ package main import ( "errors" "fmt" - "io/ioutil" "os" "os/exec" "strings" @@ -55,13 +54,13 @@ func newNvidiaContainerRuntimeWithLogger(logger *log.Logger, runtime oci.Runtime // forwarded to the underlying runtime's Exec method. func (r nvidiaContainerRuntime) Exec(args []string) error { if r.modificationRequired(args) { - fmt.Println("NEED modification") + // fmt.Println("NEED modification") err := r.modifyOCISpec() if err != nil { return fmt.Errorf("error modifying OCI spec: %v", err) } } else { - fmt.Println("Need not modification") + //fmt.Println("Need not modification") } r.logger.Println("Forwarding command to runtime") @@ -164,54 +163,55 @@ func (r nvidiaContainerRuntime) addNVIDIAHook(spec *specs.Spec) error { } } - r.logger.Printf("prestart hook path: %s %s\n", path) - envmap, newuuids, err := GetNvidiaUUID(r, spec.Process.Env) - if err != nil { - r.logger.Println("GetNvidiaUUID failed") - } else { - if len(envmap) > 0 { - restr := "" - for idx, val := range envmap { - restr = appendtofilestr(idx, val, restr) - - tmp1 := idx + "=" + val - found := false - for idx1, val1 := range spec.Process.Env { - if strings.Compare(strings.Split(val1, "=")[0], idx) == 0 { - spec.Process.Env[idx1] = tmp1 - found = true - r.logger.Println("modified env", tmp1) - continue + /* + r.logger.Printf("prestart hook path: %s %s\n", path) + envmap, newuuids, err := GetNvidiaUUID(r, spec.Process.Env) + if err != nil { + r.logger.Println("GetNvidiaUUID failed") + } else { + if len(envmap) > 0 { + restr := "" + for idx, val := range envmap { + restr = appendtofilestr(idx, val, restr) + + tmp1 := idx + "=" + val + found := false + for idx1, val1 := range spec.Process.Env { + if strings.Compare(strings.Split(val1, "=")[0], idx) == 0 { + spec.Process.Env[idx1] = tmp1 + found = true + r.logger.Println("modified env", tmp1) + continue + } + } + if !found { + spec.Process.Env = append(spec.Process.Env, tmp1) + r.logger.Println("appended env", tmp1) } } - if !found { - spec.Process.Env = append(spec.Process.Env, tmp1) - r.logger.Println("appended env", tmp1) + restr = appendtofilestr("CUDA_DEVICE_MEMORY_SHARED_CACHE", "/tmp/vgpu/cudevshr.cache", restr) + ioutil.WriteFile("envfile.vgpu", []byte(restr), os.ModePerm) + dir, _ := os.Getwd() + sharedmnt := specs.Mount{ + Destination: "/tmp/envfile.vgpu", + Source: dir + "/envfile.vgpu", + Type: "bind", + Options: []string{"rbind", "rw"}, } - } - restr = appendtofilestr("CUDA_DEVICE_MEMORY_SHARED_CACHE", "/tmp/vgpu/cudevshr.cache", restr) - ioutil.WriteFile("envfile.vgpu", []byte(restr), os.ModePerm) - dir, _ := os.Getwd() - sharedmnt := specs.Mount{ - Destination: "/tmp/envfile.vgpu", - Source: dir + "/envfile.vgpu", - Type: "bind", - Options: []string{"rbind", "rw"}, - } - spec.Mounts = append(spec.Mounts, sharedmnt) + spec.Mounts = append(spec.Mounts, sharedmnt) - //spec.Mounts = append(spec.Mounts, ) - } - if len(newuuids) > 0 { - //r.logger.Println("Get new uuids", newuuids) - //spec.Process.Env = append(spec.Process.Env, newuuids[0]) - err1 := r.addMonitor(newuuids, spec) - if err1 != nil { - r.logger.Println("addMonitorPath failed", err1.Error()) + //spec.Mounts = append(spec.Mounts, ) + } + if len(newuuids) > 0 { + //r.logger.Println("Get new uuids", newuuids) + //spec.Process.Env = append(spec.Process.Env, newuuids[0]) + err1 := r.addMonitor(newuuids, spec) + if err1 != nil { + r.logger.Println("addMonitorPath failed", err1.Error()) + } } } - } - + */ args := []string{path} if spec.Hooks == nil { spec.Hooks = &specs.Hooks{} diff --git a/deployments/vgpu/templates/scheduler/webhook.yaml b/deployments/vgpu/templates/scheduler/webhook.yaml index d044b737..6f7c262e 100644 --- a/deployments/vgpu/templates/scheduler/webhook.yaml +++ b/deployments/vgpu/templates/scheduler/webhook.yaml @@ -4,7 +4,7 @@ metadata: name: {{ include "4pd-vgpu.scheduler.webhook" . }} webhooks: - admissionReviewVersions: - - v1 + - v1beta1 clientConfig: service: name: {{ include "4pd-vgpu.scheduler" . }} diff --git a/go.mod b/go.mod index 10d1c39b..4349a282 100644 --- a/go.mod +++ b/go.mod @@ -4,77 +4,35 @@ go 1.15 require ( 4pd.io/k8s-vgpu/pkg/api v0.0.0 - github.com/Microsoft/go-winio v0.4.17 github.com/NVIDIA/go-gpuallocator v0.2.1 github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20210624153948-4902944b3b52 - github.com/beorn7/perks v1.0.1 - github.com/cespare/xxhash/v2 v2.1.1 - github.com/containerd/containerd v1.5.5 - github.com/davecgh/go-spew v1.1.1 - github.com/docker/distribution v2.7.1+incompatible + github.com/containerd/containerd v1.5.5 // indirect github.com/docker/docker v20.10.8+incompatible - github.com/docker/go-connections v0.4.0 - github.com/docker/go-units v0.4.0 - github.com/evanphx/json-patch v4.11.0+incompatible + github.com/docker/go-connections v0.4.0 // indirect github.com/fsnotify/fsnotify v1.4.9 - github.com/go-logr/logr v0.4.0 - github.com/gogo/protobuf v1.3.2 - github.com/golang/protobuf v1.5.2 - github.com/google/go-cmp v0.5.5 - github.com/google/gofuzz v1.1.0 - github.com/google/uuid v1.2.0 - github.com/googleapis/gnostic v0.5.5 - github.com/hashicorp/golang-lru v0.5.4 - github.com/imdario/mergo v0.3.12 - github.com/json-iterator/go v1.1.11 github.com/julienschmidt/httprouter v1.3.0 - github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd - github.com/modern-go/reflect2 v1.0.1 github.com/morikuni/aec v1.0.0 // indirect - github.com/opencontainers/go-digest v1.0.0 - github.com/opencontainers/image-spec v1.0.1 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/pelletier/go-toml v1.8.1 - github.com/pkg/errors v0.9.1 - github.com/pmezard/go-difflib v1.0.0 github.com/prometheus/client_golang v1.11.0 - github.com/prometheus/client_model v0.2.0 - github.com/prometheus/common v0.26.0 - github.com/prometheus/procfs v0.6.0 github.com/sirupsen/logrus v1.8.1 github.com/spf13/cobra v1.1.3 github.com/spf13/jwalterweatherman v1.1.0 // indirect - github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.7.0 github.com/stretchr/testify v1.7.0 github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781 - golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d - golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 - golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d - golang.org/x/text v0.3.6 - golang.org/x/time v0.0.0-20210611083556-38a9dc6acbc6 - gomodules.xyz/jsonpatch/v2 v2.2.0 - google.golang.org/appengine v1.6.7 google.golang.org/grpc v1.39.0 google.golang.org/protobuf v1.26.0 - gopkg.in/inf.v0 v0.9.1 - gopkg.in/yaml.v2 v2.4.0 - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b gotest.tools/v3 v3.0.3 k8s.io/api v0.21.2 k8s.io/apimachinery v0.21.2 k8s.io/client-go v0.21.2 k8s.io/klog/v2 v2.9.0 - k8s.io/kube-openapi v0.0.0-20210305001622-591a79e4bda7 k8s.io/kube-scheduler v0.21.2 k8s.io/kubelet v0.21.2 - k8s.io/utils v0.0.0-20210527160623-6fdb442a123b sigs.k8s.io/controller-runtime v0.9.3 - sigs.k8s.io/structured-merge-diff/v4 v4.1.0 - sigs.k8s.io/yaml v1.2.0 ) replace (