Skip to content

Commit

Permalink
Update to incorporate go-nvml updates to expose interface types
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Klues <[email protected]>
  • Loading branch information
klueska committed Apr 12, 2024
1 parent d28abb1 commit aa5891f
Show file tree
Hide file tree
Showing 76 changed files with 29,091 additions and 6,915 deletions.
3 changes: 2 additions & 1 deletion cmd/nvidia-mig-parted/apply/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ import (
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"

"github.com/NVIDIA/go-nvml/pkg/nvml"

hooks "github.com/NVIDIA/mig-parted/api/hooks/v1"
"github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted/assert"
"github.com/NVIDIA/mig-parted/internal/nvml"

"sigs.k8s.io/yaml"
)
Expand Down
3 changes: 2 additions & 1 deletion cmd/nvidia-mig-parted/assert/assert.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ import (
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"

"github.com/NVIDIA/go-nvml/pkg/nvml"

v1 "github.com/NVIDIA/mig-parted/api/spec/v1"
"github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted/util"
"github.com/NVIDIA/mig-parted/internal/nvml"
"github.com/NVIDIA/mig-parted/pkg/types"

"sigs.k8s.io/yaml"
Expand Down
3 changes: 2 additions & 1 deletion cmd/nvidia-mig-parted/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ import (
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"

"github.com/NVIDIA/go-nvml/pkg/nvml"

checkpoint "github.com/NVIDIA/mig-parted/api/checkpoint/v1"
"github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted/util"
"github.com/NVIDIA/mig-parted/internal/nvml"
"github.com/NVIDIA/mig-parted/pkg/mig/state"
)

Expand Down
3 changes: 2 additions & 1 deletion cmd/nvidia-mig-parted/export/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ import (
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"

"github.com/NVIDIA/go-nvml/pkg/nvml"

v1 "github.com/NVIDIA/mig-parted/api/spec/v1"
"github.com/NVIDIA/mig-parted/internal/nvml"

yaml "gopkg.in/yaml.v2"
)
Expand Down
10 changes: 5 additions & 5 deletions cmd/nvidia-mig-parted/util/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ import (
"os/exec"
"strings"

"github.com/NVIDIA/mig-parted/internal/nvml"
"github.com/NVIDIA/mig-parted/pkg/types"

"github.com/NVIDIA/go-nvlib/pkg/nvpci"
"github.com/NVIDIA/go-nvml/pkg/nvml"

"github.com/NVIDIA/mig-parted/pkg/types"
)

func GetGPUDeviceIDs() ([]types.DeviceID, error) {
Expand Down Expand Up @@ -87,7 +87,7 @@ func nvmlGetGPUDeviceIDs() ([]types.DeviceID, error) {
var ids []types.DeviceID
err = pciVisitGPUs(func(gpu *nvpci.NvidiaPCIDevice) error {
_, ret := nvmlLib.DeviceGetHandleByPciBusId(gpu.Address)
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return nil
}

Expand Down Expand Up @@ -126,7 +126,7 @@ func nvmlGetGPUPciBusIds() ([]string, error) {
}

_, ret := nvmlLib.DeviceGetHandleByPciBusId(gpu.Address)
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return nil
}

Expand Down
12 changes: 6 additions & 6 deletions cmd/nvidia-mig-parted/util/nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

log "github.com/sirupsen/logrus"

"github.com/NVIDIA/mig-parted/internal/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)

const (
Expand All @@ -49,18 +49,18 @@ func IsNVMLVersionSupported() (bool, error) {
nvmlLib := nvml.New()

ret := nvmlLib.Init()
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return false, fmt.Errorf("error initializing NVML: %v", ret)
}
defer func() {
ret := nvmlLib.Shutdown()
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
log.Warnf("error shutting down NVML: %v", ret)
}
}()

sversion, ret := nvmlLib.SystemGetNVMLVersion()
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return false, fmt.Errorf("error getting getting version: %v", ret)
}

Expand All @@ -86,7 +86,7 @@ func NvmlInit(nvmlLib nvml.Interface) error {
nvmlLib = nvml.New()
}
ret := nvmlLib.Init()
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return ret
}
return nil
Expand All @@ -97,7 +97,7 @@ func TryNvmlShutdown(nvmlLib nvml.Interface) {
nvmlLib = nvml.New()
}
ret := nvmlLib.Shutdown()
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
log.Warnf("error shutting down NVML: %v", ret)
}
}
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ go 1.21

toolchain go1.22.1

replace github.com/NVIDIA/go-nvlib => github.com/klueska/go-nvlib v0.0.0-20240412211930-153699bb9310

require (
github.com/NVIDIA/go-nvlib v0.2.0
github.com/NVIDIA/go-nvml v0.12.0-3
github.com/google/uuid v1.6.0
github.com/NVIDIA/go-nvml v0.12.0-4
github.com/sirupsen/logrus v1.7.0
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
Expand All @@ -31,6 +32,7 @@ require (
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
github.com/NVIDIA/go-nvlib v0.2.0 h1:roq+SDstbP1fcy2XVH7wB2Gz2/Ud7Q+NGQYOcVITVrA=
github.com/NVIDIA/go-nvlib v0.2.0/go.mod h1:kFuLNTyD1tF6FbRFlk+/EdUW5BrkE+v1Y3A3/9zKSjA=
github.com/NVIDIA/go-nvml v0.12.0-3 h1:QwfjYxEqIQVRhl8327g2Y3ZvKResPydpGSKtCIIK9jE=
github.com/NVIDIA/go-nvml v0.12.0-3/go.mod h1:SOufGc5Wql+cxrIZ8RyJwVKDYxfbs4WPkHXqadcbfvA=
github.com/NVIDIA/go-nvml v0.12.0-4 h1:BvPjnjJr6qje0zov57Md7TwEA8i/12kZeUQIpyWzTEE=
github.com/NVIDIA/go-nvml v0.12.0-4/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
Expand Down Expand Up @@ -45,6 +43,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klueska/go-nvlib v0.0.0-20240412211930-153699bb9310 h1:wQJmRXCO75TlHrcPkKZXPrD1wyjBYneTTiH8qXE5KsI=
github.com/klueska/go-nvlib v0.0.0-20240412211930-153699bb9310/go.mod h1:NasUuId9hYFvwzuOHCu9F2X6oTU2tG0JHTfbJYuDAbA=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
Expand Down
22 changes: 11 additions & 11 deletions internal/nvlib/mig/mig.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package mig
import (
"fmt"

"github.com/NVIDIA/mig-parted/internal/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)

type Interface struct {
Expand Down Expand Up @@ -52,10 +52,10 @@ func (i Interface) GpuInstance(gi nvml.GpuInstance) GpuInstance {

func (device Device) AssertMigEnabled() error {
mode, _, ret := device.GetMigMode()
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
if ret == nvml.ERROR_NOT_SUPPORTED {
return fmt.Errorf("MIG not supported")
}
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting MIG mode: %v", ret)
}
if mode != nvml.DEVICE_MIG_ENABLE {
Expand All @@ -67,18 +67,18 @@ func (device Device) AssertMigEnabled() error {
func (device Device) WalkGpuInstances(f func(nvml.GpuInstance, int, nvml.GpuInstanceProfileInfo) error) error {
for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ {
giProfileInfo, ret := device.GetGpuInstanceProfileInfo(i)
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
if ret == nvml.ERROR_NOT_SUPPORTED {
continue
}
if ret.Value() == nvml.ERROR_INVALID_ARGUMENT {
if ret == nvml.ERROR_INVALID_ARGUMENT {
continue
}
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting GPU instance profile info for '%v': %v", i, ret)
}

gis, ret := device.GetGpuInstances(&giProfileInfo)
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting GPU instances for profile '%v': %v", i, ret)
}

Expand All @@ -96,18 +96,18 @@ func (gi GpuInstance) WalkComputeInstances(f func(ci nvml.ComputeInstance, ciPro
for j := 0; j < nvml.COMPUTE_INSTANCE_PROFILE_COUNT; j++ {
for k := 0; k < nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT; k++ {
ciProfileInfo, ret := gi.GetComputeInstanceProfileInfo(j, k)
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
if ret == nvml.ERROR_NOT_SUPPORTED {
continue
}
if ret.Value() == nvml.ERROR_INVALID_ARGUMENT {
if ret == nvml.ERROR_INVALID_ARGUMENT {
continue
}
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting Compute instance profile info for '(%v, %v)': %v", j, k, ret)
}

cis, ret := gi.GetComputeInstances(&ciProfileInfo)
if ret.Value() != nvml.SUCCESS {
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting Compute instances for profile '(%v, %v)': %v", j, k, ret)
}

Expand Down
3 changes: 2 additions & 1 deletion internal/nvlib/nvlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
package nvlib

import (
"github.com/NVIDIA/go-nvml/pkg/nvml"

"github.com/NVIDIA/mig-parted/internal/nvlib/mig"
"github.com/NVIDIA/mig-parted/internal/nvml"
)

type Interface struct {
Expand Down
85 changes: 0 additions & 85 deletions internal/nvml/consts.go

This file was deleted.

Loading

0 comments on commit aa5891f

Please sign in to comment.