Skip to content

Commit

Permalink
also update fake (kwok) nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
enoodle committed Nov 26, 2024
1 parent f505cdd commit c07ef90
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 6 deletions.
10 changes: 8 additions & 2 deletions internal/deviceplugin/device_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@ func NewDevicePlugins(topology *topology.NodeTopology, kubeClient kubernetes.Int
}

if viper.GetBool(constants.EnvFakeNode) {
otherDevices := make(map[string]int)
for _, genericDevice := range topology.OtherDevices {
otherDevices[genericDevice.Name] = genericDevice.Count
}

return []Interface{&FakeNodeDevicePlugin{
kubeClient: kubeClient,
gpuCount: getGpuCount(topology),
kubeClient: kubeClient,
gpuCount: getGpuCount(topology),
otherDevices: otherDevices,
}}
}

Expand Down
32 changes: 28 additions & 4 deletions internal/deviceplugin/fake_node.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,49 @@
package deviceplugin

import (
"encoding/json"
"fmt"
"os"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"golang.org/x/net/context"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type FakeNodeDevicePlugin struct {
kubeClient kubernetes.Interface
gpuCount int
kubeClient kubernetes.Interface
gpuCount int
otherDevices map[string]int
}

func (f *FakeNodeDevicePlugin) Serve() error {
patch := fmt.Sprintf(`{"status": {"capacity": {"%s": "%d"}, "allocatable": {"%s": "%d"}}}`, nvidiaGPUResourceName, f.gpuCount, nvidiaGPUResourceName, f.gpuCount)
_, err := f.kubeClient.CoreV1().Nodes().Patch(context.TODO(), os.Getenv(constants.EnvNodeName), types.MergePatchType, []byte(patch), metav1.PatchOptions{}, "status")
nodeStatus := v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceName(nvidiaGPUResourceName): *resource.NewQuantity(int64(f.gpuCount), resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceName(nvidiaGPUResourceName): *resource.NewQuantity(int64(f.gpuCount), resource.DecimalSI),
},
}

for deviceName, count := range f.otherDevices {
nodeStatus.Capacity[v1.ResourceName(deviceName)] = *resource.NewQuantity(int64(count), resource.DecimalSI)
nodeStatus.Allocatable[v1.ResourceName(deviceName)] = *resource.NewQuantity(int64(count), resource.DecimalSI)
}

// Convert the patch struct to JSON
patchBytes, err := json.Marshal(v1.Node{Status: nodeStatus})
if err != nil {
return fmt.Errorf("failed to marshal patch: %v", err)
}

// Apply the patch
_, err = f.kubeClient.CoreV1().Nodes().Patch(context.TODO(), os.Getenv(constants.EnvNodeName), types.MergePatchType, patchBytes, metav1.PatchOptions{}, "status")
if err != nil {
return fmt.Errorf("failed to update node capacity and allocatable: %v", err)
}
Expand Down
50 changes: 50 additions & 0 deletions internal/deviceplugin/fake_node_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package deviceplugin

import (
"os"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"golang.org/x/net/context"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)

var _ = Describe("FakeNodeDevicePlugin.Serve", func() {
It("should update the node capacity and allocatable", func() {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
}
os.Setenv("NODE_NAME", "node1")

fakeClient := fake.NewSimpleClientset(node)

fakeNodeDevicePlugin := &FakeNodeDevicePlugin{
kubeClient: fakeClient,
gpuCount: 1,
otherDevices: map[string]int{"device1": 2},
}

err := fakeNodeDevicePlugin.Serve()
Expect(err).ToNot(HaveOccurred())

updateNode, err := fakeClient.CoreV1().Nodes().Get(context.TODO(), "node1", metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
Expect(testResourceListCondition(updateNode.Status.Capacity, v1.ResourceName(nvidiaGPUResourceName), 1)).To(BeTrue())
Expect(testResourceListCondition(updateNode.Status.Allocatable, v1.ResourceName(nvidiaGPUResourceName), 1)).To(BeTrue())
Expect(testResourceListCondition(updateNode.Status.Capacity, v1.ResourceName("device1"), 2)).To(BeTrue())
Expect(testResourceListCondition(updateNode.Status.Allocatable, v1.ResourceName("device1"), 2)).To(BeTrue())
})
})

func testResourceListCondition(resourceList v1.ResourceList, resourceName v1.ResourceName, value int64) bool {
quantity, found := resourceList[resourceName]
if !found {
return false
}
return quantity.Value() == value
}

0 comments on commit c07ef90

Please sign in to comment.