diff --git a/Makefile b/Makefile index 310f1dc52..f5ca7edc8 100644 --- a/Makefile +++ b/Makefile @@ -226,7 +226,7 @@ test-e2e-k8s: export NAMESPACE=sriov-network-operator test-e2e-k8s: test-e2e test-bindata-scripts: fakechroot - fakechroot ./test/scripts/enable-kargs_test.sh + fakechroot ./test/scripts/kargs_test.sh test-%: generate manifests envtest KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*.out -coverpkg ./... -v diff --git a/api/v1/sriovnetworknodestate_types.go b/api/v1/sriovnetworknodestate_types.go index 4b90d61d2..e5f59d71c 100644 --- a/api/v1/sriovnetworknodestate_types.go +++ b/api/v1/sriovnetworknodestate_types.go @@ -27,6 +27,7 @@ import ( type SriovNetworkNodeStateSpec struct { Interfaces Interfaces `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` } type Interfaces []Interface @@ -114,10 +115,17 @@ type OVSUplinkConfigExt struct { Interface OVSInterfaceConfig `json:"interface,omitempty"` } +type System struct { + // +kubebuilder:validation:Enum=shared;exclusive + //RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` +} + // SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState type SriovNetworkNodeStateStatus struct { Interfaces InterfaceExts `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` SyncStatus string `json:"syncStatus,omitempty"` LastSyncError string `json:"lastSyncError,omitempty"` } diff --git a/api/v1/sriovnetworkpoolconfig_types.go b/api/v1/sriovnetworkpoolconfig_types.go index c6e710a99..011ffc7d9 100644 --- a/api/v1/sriovnetworkpoolconfig_types.go +++ b/api/v1/sriovnetworkpoolconfig_types.go @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct { // Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, // even if maxUnavailable is greater than one. MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // +kubebuilder:validation:Enum=shared;exclusive + // RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` } type OvsHardwareOffloadConfig struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index fc9477593..0209c0573 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -783,6 +783,7 @@ func (in *SriovNetworkNodeStateSpec) DeepCopyInto(out *SriovNetworkNodeStateSpec } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateSpec. @@ -806,6 +807,7 @@ func (in *SriovNetworkNodeStateStatus) DeepCopyInto(out *SriovNetworkNodeStateSt } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateStatus. @@ -1066,6 +1068,21 @@ func (in *SriovOperatorConfigStatus) DeepCopy() *SriovOperatorConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *System) DeepCopyInto(out *System) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new System. +func (in *System) DeepCopy() *System { + if in == nil { + return nil + } + out := new(System) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrunkConfig) DeepCopyInto(out *TrunkConfig) { *out = *in diff --git a/bindata/scripts/enable-kargs.sh b/bindata/scripts/enable-kargs.sh deleted file mode 100755 index 0dc18c784..000000000 --- a/bindata/scripts/enable-kargs.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -x - -declare -a kargs=( "$@" ) -ret=0 -args=$(chroot /host/ cat /proc/cmdline) - -if chroot /host/ test -f /run/ostree-booted ; then - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then - chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -else - chroot /host/ which grubby > /dev/null 2>&1 - # if grubby is not there, let's tell it - if [ $? -ne 0 ]; then - exit 127 - fi - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then - chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -fi - -echo $ret diff --git a/bindata/scripts/kargs.sh b/bindata/scripts/kargs.sh new file mode 100755 index 000000000..8d118456e --- /dev/null +++ b/bindata/scripts/kargs.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -x + +command=$1 +shift +declare -a kargs=( "$@" ) +ret=0 +args=$(chroot /host/ cat /proc/cmdline) + +if chroot /host/ test -f /run/ostree-booted ; then + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then + chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -q ${t}; then + chroot /host/ rpm-ostree kargs --delete ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +else + chroot /host/ which grubby > /dev/null 2>&1 + # if grubby is not there, let's tell it + if [ $? -ne 0 ]; then + exit 127 + fi + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -q ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --remove-args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +fi + +echo $ret diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index 86da909d8..b96458fa7 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -24,11 +24,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -48,13 +45,6 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var ( - oneNode = intstr.FromInt32(1) - defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ - MaxUnavailable: &oneNode, - NodeSelector: &metav1.LabelSelector{}}} -) - type DrainReconcile struct { client.Client Scheme *runtime.Scheme @@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) ( } func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - logger := log.FromContext(ctx) - logger.Info("findNodePoolConfig():") - // get all the sriov network pool configs - npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} - err := dr.List(ctx, npcl) - if err != nil { - logger.Error(err, "failed to list sriovNetworkPoolConfig") - return nil, nil, err - } - - selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} - nodesInPools := map[string]interface{}{} - - for _, npc := range npcl.Items { - // we skip hw offload objects - if npc.Spec.OvsHardwareOffloadConfig.Name != "" { - continue - } - - if npc.Spec.NodeSelector == nil { - npc.Spec.NodeSelector = &metav1.LabelSelector{} - } - - selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - if selector.Matches(labels.Set(node.Labels)) { - selectedNpcl = append(selectedNpcl, npc.DeepCopy()) - } - - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", - "machineConfigPoolName", npc, - "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - for _, nodeName := range nodeList.Items { - nodesInPools[nodeName.Name] = nil - } - } - - if len(selectedNpcl) > 1 { - // don't allow the node to be part of multiple pools - err = fmt.Errorf("node is part of more then one pool") - logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) - return nil, nil, err - } else if len(selectedNpcl) == 1 { - // found one pool for our node - logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) - selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) - return nil, nil, err - } - - // list all the nodes that are also part of this pool and return them - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) - return nil, nil, err - } - - return selectedNpcl[0], nodeList.Items, nil - } else { - // in this case we get all the nodes and remove the ones that already part of any pool - logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl) - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList) - if err != nil { - logger.Error(err, "failed to list all the nodes") - return nil, nil, err - } - - defaultNodeLists := []corev1.Node{} - for _, nodeObj := range nodeList.Items { - if _, exist := nodesInPools[nodeObj.Name]; !exist { - defaultNodeLists = append(defaultNodeLists, nodeObj) - } - } - return defaultNpcl, defaultNodeLists, nil - } + return findNodePoolConfig(ctx, node, dr.Client) } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/helper.go b/controllers/helper.go index 9ff735473..b90ad44f8 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -30,9 +30,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" kscheme "k8s.io/client-go/kubernetes/scheme" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -47,10 +50,17 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var webhooks = map[string](string){ - constants.InjectorWebHookName: constants.InjectorWebHookPath, - constants.OperatorWebHookName: constants.OperatorWebHookPath, -} +var ( + webhooks = map[string](string){ + constants.InjectorWebHookName: constants.InjectorWebHookPath, + constants.OperatorWebHookName: constants.OperatorWebHookPath, + } + oneNode = intstr.FromInt32(1) + defaultPoolConfig = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ + MaxUnavailable: &oneNode, + NodeSelector: &metav1.LabelSelector{}, + RdmaMode: ""}} +) const ( clusterRoleResourceName = "ClusterRole" @@ -397,3 +407,94 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string] } return nil } + +func findNodePoolConfig(ctx context.Context, node *corev1.Node, c k8sclient.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("FindNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := c.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig) + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return defaultPoolConfig, defaultNodeLists, nil + } +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index be46880b7..62218436f 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -155,22 +155,22 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er delayedEventHandler := handler.Funcs{ CreateFunc: func(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for create event", "resource", e.Object.GetName()) + Info("Enqueuing sync for create event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName()) + Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName(), "type", e.ObjectNew.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for delete event", "resource", e.Object.GetName()) + Info("Enqueuing sync for delete event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, GenericFunc: func(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for generic event", "resource", e.Object.GetName()) + Info("Enqueuing sync for generic event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, } @@ -199,6 +199,7 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er For(&sriovnetworkv1.SriovNetworkNodePolicy{}). Watches(&corev1.Node{}, nodeEvenHandler). Watches(&sriovnetworkv1.SriovNetworkNodePolicy{}, delayedEventHandler). + Watches(&sriovnetworkv1.SriovNetworkPoolConfig{}, delayedEventHandler). WatchesRawSource(&source.Channel{Source: eventChan}, delayedEventHandler). Complete(r) } @@ -271,6 +272,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns := &sriovnetworkv1.SriovNetworkNodeState{} ns.Name = node.Name ns.Namespace = vars.Namespace + netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) + if err != nil { + logger.Error(err, "failed to get SriovNetworkPoolConfig for the current node") + } + if netPoolConfig != nil { + ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode + } j, _ := json.Marshal(ns) logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/go.mod b/go.mod index 350dbb82d..31d70d572 100644 --- a/go.mod +++ b/go.mod @@ -38,6 +38,7 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 go.uber.org/zap v1.25.0 + golang.org/x/net v0.25.0 golang.org/x/time v0.3.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v3 v3.0.1 @@ -145,7 +146,6 @@ require ( golang.org/x/crypto v0.23.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f7025c90d..ba1830f5b 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -54,6 +54,9 @@ const ( VdpaTypeVirtio = "virtio" VdpaTypeVhost = "vhost" + RdmaSubsystemModeShared = "shared" + RdmaSubsystemModeExclusive = "exclusive" + ClusterTypeOpenshift = "openshift" ClusterTypeKubernetes = "kubernetes" @@ -125,6 +128,8 @@ const ( KernelArgIntelIommu = "intel_iommu=on" KernelArgIommuPt = "iommu=pt" KernelArgIommuPassthrough = "iommu.passthrough=1" + KernelArgRdmaShared = "ib_core.netns_mode=1" + KernelArgRdmaExclusive = "ib_core.netns_mode=0" // Feature gates // ParallelNicConfigFeatureGate: allow to configure nics in parallel diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ff7f326dc..53fe82b8b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "math/rand" - "os/exec" "reflect" "sync" "time" @@ -751,11 +750,11 @@ func (dn *Daemon) rebootNode() { // However note we use `;` instead of `&&` so we keep rebooting even // if kubelet failed to shutdown - that way the machine will still eventually reboot // as systemd will time out the stop invocation. - cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot", + stdOut, StdErr, err := dn.HostHelpers.RunCommand("systemd-run", "--unit", "sriov-network-config-daemon-reboot", "--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot") - if err := cmd.Run(); err != nil { - log.Log.Error(err, "failed to reboot node") + if err != nil { + log.Log.Error(err, "failed to reboot node", "stdOut", stdOut, "StdErr", StdErr) } } diff --git a/pkg/daemon/plugin_test.go b/pkg/daemon/plugin_test.go index a13fc1f8b..7b14a4504 100644 --- a/pkg/daemon/plugin_test.go +++ b/pkg/daemon/plugin_test.go @@ -41,6 +41,14 @@ var _ = Describe("config daemon plugin loading tests", func() { vars.ClusterType = consts.ClusterTypeKubernetes gmockController = gomock.NewController(GinkgoT()) helperMock = helperMocks.NewMockHostHelpersInterface(gmockController) + helperMock.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false) + // k8s plugin is ATM the only plugin which require mocking/faking, as its New method performs additional logic // other than simple plugin struct initialization K8sPlugin = func(_ helper.HostHelpersInterface) (plugin.VendorPlugin, error) { diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 09d06d8f9..42eeb2928 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -118,6 +118,7 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { log.Log.V(2).Info("pollNicStatus()") var iface []sriovnetworkv1.InterfaceExt var bridges sriovnetworkv1.Bridges + var rdmaMode string var err error if vars.PlatformType == consts.VirtualOpenStack { @@ -138,8 +139,14 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { } } + rdmaMode, err = w.hostHelper.DiscoverRDMASubsystem() + if err != nil { + return err + } + w.status.Interfaces = iface w.status.Bridges = bridges + w.status.System.RdmaMode = rdmaMode return nil } @@ -182,6 +189,7 @@ func (w *NodeStateStatusWriter) setNodeStateStatus(msg Message) (*sriovnetworkv1 nodeState, err := w.updateNodeStateStatusRetry(func(nodeState *sriovnetworkv1.SriovNetworkNodeState) { nodeState.Status.Interfaces = w.status.Interfaces nodeState.Status.Bridges = w.status.Bridges + nodeState.Status.System = w.status.System if msg.lastSyncError != "" || msg.syncStatus == consts.SyncStatusSucceeded { // clear lastSyncError when sync Succeeded nodeState.Status.LastSyncError = msg.lastSyncError diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index 432d741be..b413ecdee 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -294,6 +294,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostHelpersInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -1044,6 +1059,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostHelpersInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 5b3bcc790..ec136bf29 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -289,6 +289,21 @@ func (mr *MockNetlinkLibMockRecorder) RdmaLinkByName(name interface{}) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaLinkByName", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaLinkByName), name) } +// RdmaSystemGetNetnsMode mocks base method. +func (m *MockNetlinkLib) RdmaSystemGetNetnsMode() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RdmaSystemGetNetnsMode") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RdmaSystemGetNetnsMode indicates an expected call of RdmaSystemGetNetnsMode. +func (mr *MockNetlinkLibMockRecorder) RdmaSystemGetNetnsMode() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaSystemGetNetnsMode", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaSystemGetNetnsMode)) +} + // VDPADelDev mocks base method. func (m *MockNetlinkLib) VDPADelDev(name string) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index ed063834e..ad6056710 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,6 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool + // RdmaSystemGetNetnsMode returns RDMA subsystem mode + RdmaSystemGetNetnsMode() (string, error) } type libWrapper struct{} @@ -185,3 +187,8 @@ func (w *libWrapper) RdmaLinkByName(name string) (*netlink.RdmaLink, error) { func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } + +// RdmaSystemGetNetnsMode returns RDMA subsystem mode +func (w *libWrapper) RdmaSystemGetNetnsMode() (string, error) { + return netlink.RdmaSystemGetNetnsMode() +} diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index ef85ad24a..3ac17cf8f 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -429,3 +429,42 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, log.Log.V(2).Info("GetPciAddressFromInterfaceName(): result", "interface", interfaceName, "pci address", pciAddress) return pciAddress, nil } + +func (n *network) DiscoverRDMASubsystem() (string, error) { + subsystem, err := n.netlinkLib.RdmaSystemGetNetnsMode() + + if err != nil { + log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") + return "", err + } + + return subsystem, nil +} + +func (n *network) SetRDMASubsystem(mode string) error { + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode", "mode", mode) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "sriov_network_operator_modules_config.conf") + + if mode == "" { + err := os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + log.Log.Error(err, "failed to remove ib_core config file") + return err + } + return nil + } + + modeValue := 1 + if mode == "exclusive" { + modeValue = 0 + } + config := fmt.Sprintf("# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=%d\n", modeValue) + + err := os.WriteFile(path, []byte(config), 0644) + if err != nil { + log.Log.Error(err, "SetRDMASubsystem(): failed to write sriov_network_operator_modules_config.conf") + return fmt.Errorf("failed to write sriov_network_operator_modules_config.conf: %v", err) + } + + return nil +} diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 19eb3f438..3e197c3f8 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -283,4 +283,35 @@ var _ = Describe("Network", func() { Expect(pci).To(Equal("0000:3b:00.0")) }) }) + Context("DiscoverRDMASubsystem", func() { + It("Should get RDMA Subsystem using netlink", func() { + netlinkLibMock.EXPECT().RdmaSystemGetNetnsMode().Return("shared", nil) + + pci, err := n.DiscoverRDMASubsystem() + Expect(err).NotTo(HaveOccurred()) + Expect(pci).To(Equal("shared")) + }) + }) + Context("SetRDMASubsystem", func() { + It("Should set RDMA Subsystem shared mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=1\n") + }) + It("Should set RDMA Subsystem exclusive mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=0\n") + }) + }) }) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 5ebed46aa..095d270a9 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -264,6 +264,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostManagerInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -859,6 +874,20 @@ func (mr *MockHostManagerInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostManagerInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostManagerInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index c6e0c8faf..6844ee5ae 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -90,6 +90,10 @@ type NetworkInterface interface { GetNetDevLinkAdminState(ifaceName string) string // GetPciAddressFromInterfaceName parses sysfs to get pci address of an interface by name GetPciAddressFromInterfaceName(interfaceName string) (string, error) + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) + // SetRDMASubsystem changes RDMA subsystem mode + SetRDMASubsystem(mode string) error } type ServiceInterface interface { diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 552f8142a..948459a7f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -1,11 +1,8 @@ package generic import ( - "bytes" "errors" - "os/exec" - "strconv" - "strings" + "fmt" "syscall" "sigs.k8s.io/controller-runtime/pkg/log" @@ -48,12 +45,14 @@ type DriverState struct { type DriverStateMapType map[uint]*DriverState +type KargStateMapType map[string]bool + type GenericPlugin struct { PluginName string SpecVersion string DesireState *sriovnetworkv1.SriovNetworkNodeState DriverStateMap DriverStateMapType - DesiredKernelArgs map[string]bool + DesiredKernelArgs KargStateMapType helpers helper.HostHelpersInterface skipVFConfiguration bool skipBridgeConfiguration bool @@ -82,7 +81,7 @@ type genericPluginOptions struct { skipBridgeConfiguration bool } -const scriptsPath = "bindata/scripts/enable-kargs.sh" +const scriptsPath = "bindata/scripts/kargs.sh" // Initialize our plugin and set up initial values func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (plugin.VendorPlugin, error) { @@ -112,11 +111,27 @@ func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (p NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } + + // To maintain backward compatibility we don't remove the intel_iommu, iommu and pcirealloc + // kernel args if they are configured + kargs, err := helpers.GetCurrentKernelArgs() + if err != nil { + return nil, err + } + desiredKernelArgs := KargStateMapType{ + consts.KernelArgPciRealloc: helpers.IsKernelArgsSet(kargs, consts.KernelArgPciRealloc), + consts.KernelArgIntelIommu: helpers.IsKernelArgsSet(kargs, consts.KernelArgIntelIommu), + consts.KernelArgIommuPt: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPt), + consts.KernelArgIommuPassthrough: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPassthrough), + consts.KernelArgRdmaShared: false, + consts.KernelArgRdmaExclusive: false, + } + return &GenericPlugin{ PluginName: PluginName, SpecVersion: "1.0", DriverStateMap: driverStateMap, - DesiredKernelArgs: make(map[string]bool), + DesiredKernelArgs: desiredKernelArgs, helpers: helpers, skipVFConfiguration: cfg.skipVFConfiguration, skipBridgeConfiguration: cfg.skipBridgeConfiguration, @@ -179,18 +194,13 @@ func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkN } } - missingKernelArgs, err := p.getMissingKernelArgs() + shouldUpdate, err := p.shouldUpdateKernelArgs() if err != nil { log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - log.Log.V(0).Info("generic-plugin CheckStatusChanges(): kernel args missing", - "kernelArgs", missingKernelArgs) - } - - return len(missingKernelArgs) != 0, nil + return shouldUpdate, nil } func (p *GenericPlugin) syncDriverState() error { @@ -228,7 +238,7 @@ func (p *GenericPlugin) Apply() error { p.DesireState.Status.Interfaces, p.skipVFConfiguration); err != nil { // Catch the "cannot allocate memory" error and try to use PCI realloc if errors.Is(err, syscall.ENOMEM) { - p.addToDesiredKernelArgs(consts.KernelArgPciRealloc) + p.enableDesiredKernelArgs(consts.KernelArgPciRealloc) } return err } @@ -264,85 +274,84 @@ func needDriverCheckVdpaType(state *sriovnetworkv1.SriovNetworkNodeState, driver return false } -// setKernelArg Tries to add the kernel args via ostree or grubby. -func setKernelArg(karg string) (bool, error) { - log.Log.Info("generic plugin setKernelArg()") - var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/sh", scriptsPath, karg) - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { +// editKernelArg Tries to add the kernel args via ostree or grubby. +func editKernelArg(helper helper.HostHelpersInterface, mode, karg string) error { + log.Log.Info("generic plugin editKernelArg()", "mode", mode, "karg", karg) + _, _, err := helper.RunCommand("/bin/sh", scriptsPath, mode, karg) + if err != nil { // if grubby is not there log and assume kernel args are set correctly. if utils.IsCommandNotFound(err) { - log.Log.Error(err, "generic plugin setKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are set", + log.Log.Error(err, "generic plugin editKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are correct", "kargs", karg) - return false, nil - } - log.Log.Error(err, "generic plugin setKernelArg(): fail to enable kernel arg", "karg", karg) - return false, err - } - - i, err := strconv.Atoi(strings.TrimSpace(stdout.String())) - if err == nil { - if i > 0 { - log.Log.Info("generic plugin setKernelArg(): need to reboot node for kernel arg", "karg", karg) - return true, nil + return nil } + log.Log.Error(err, "generic plugin editKernelArg(): fail to edit kernel arg", "karg", karg) + return err } - return false, err + return nil } -// addToDesiredKernelArgs Should be called to queue a kernel arg to be added to the node. -func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { - if _, ok := p.DesiredKernelArgs[karg]; !ok { - log.Log.Info("generic plugin addToDesiredKernelArgs(): Adding to desired kernel arg", "karg", karg) - p.DesiredKernelArgs[karg] = false - } +// enableDesiredKernelArgs Should be called to mark a kernel arg as enabled. +func (p *GenericPlugin) enableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin enableDesiredKernelArgs(): enable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = true } -// getMissingKernelArgs gets Kernel arguments that have not been set. -func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { - missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) - if len(p.DesiredKernelArgs) == 0 { - return nil, nil - } +// disableDesiredKernelArgs Should be called to mark a kernel arg as disabled. +func (p *GenericPlugin) disableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin disableDesiredKernelArgs(): disable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = false +} +// shouldUpdateKernelArgs returns true if the DesiredKernelArgs state is not equal to the running kernel args in the system +func (p *GenericPlugin) shouldUpdateKernelArgs() (bool, error) { kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return nil, err + return false, err } - for desiredKarg := range p.DesiredKernelArgs { - if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { - missingArgs = append(missingArgs, desiredKarg) + for karg, kargState := range p.DesiredKernelArgs { + if kargState && !p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil + } + + if !kargState && p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil } } - return missingArgs, nil + return false, nil } // syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { +func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { + kargs, err := p.helpers.GetCurrentKernelArgs() + if err != nil { + return false, err + } + needReboot := false + for karg, kargState := range p.DesiredKernelArgs { + if kargState { + err = editKernelArg(p.helpers, "add", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } - for _, karg := range kargs { - if p.DesiredKernelArgs[karg] { - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", karg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(karg) - if err != nil { - log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) + if !p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } + } else { + err = editKernelArg(p.helpers, "remove", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to remove kernel arg", "karg", karg) + return false, err + } + + if p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } } - p.DesiredKernelArgs[karg] = true } return needReboot, nil } @@ -423,14 +432,14 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo kernelArgFnByCPUVendor := map[hostTypes.CPUVendor]func(){ hostTypes.CPUVendorIntel: func() { - p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIntelIommu) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorAMD: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorARM: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPassthrough) + p.enableDesiredKernelArgs(consts.KernelArgIommuPassthrough) }, } @@ -448,26 +457,41 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } +func (p *GenericPlugin) configRdmaKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) error { + if state.Spec.System.RdmaMode == "" { + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else if state.Spec.System.RdmaMode == "shared" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaShared) + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + } else if state.Spec.System.RdmaMode == "exclusive" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else { + err := fmt.Errorf("unexpected rdma mode: %s", state.Spec.System.RdmaMode) + log.Log.Error(err, "generic-plugin configRdmaKernelArg(): failed to configure kernel arguments for rdma") + return err + } + + return p.helpers.SetRDMASubsystem(state.Spec.System.RdmaMode) +} + func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { needReboot := false p.addVfioDesiredKernelArg(state) - - missingKernelArgs, err := p.getMissingKernelArgs() + err := p.configRdmaKernelArg(state) if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) - if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") - return false, err - } - if needReboot { - log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") - } + needReboot, err = p.syncDesiredKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") } return needReboot, nil diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0a6674712..2e2aed326 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -34,6 +34,16 @@ var _ = Describe("Generic plugin", func() { ctrl = gomock.NewController(t) hostHelper = mock_helper.NewMockHostHelpersInterface(ctrl) + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil).AnyTimes() + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false).AnyTimes() + + hostHelper.EXPECT().RunCommand(gomock.Any(), gomock.Any()).Return("", "", nil).AnyTimes() genericPlugin, err = NewGenericPlugin(hostHelper) Expect(err).ToNot(HaveOccurred()) @@ -898,20 +908,21 @@ var _ = Describe("Generic plugin", func() { }, } + rdmaState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{System: sriovnetworkv1.System{ + RdmaMode: consts.RdmaSubsystemModeShared, + }}, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{}, + } + It("should detect changes on status due to missing kernel args", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorIntel, nil) // Load required kernel args. genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIntelIommu: false, - consts.KernelArgIommuPt: false, - })) - - hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIntelIommu]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) changed, err := genericPlugin.CheckStatusChanges(vfioNetworkNodeState) Expect(err).ToNot(HaveOccurred()) @@ -921,17 +932,52 @@ var _ = Describe("Generic plugin", func() { It("should set the correct kernel args on AMD CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorAMD, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPt: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) }) It("should set the correct kernel args on ARM CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorARM, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPassthrough: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPassthrough]).To(BeTrue()) + }) + + It("should enable rdma shared mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeShared).Return(nil) + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should enable rdma exclusive mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeExclusive).Return(nil) + rdmaState.Spec.System.RdmaMode = consts.RdmaSubsystemModeExclusive + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeTrue()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should not configure RDMA kernel args", func() { + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil) + rdmaState.Spec.System = sriovnetworkv1.System{} + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeFalse()) }) }) diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e07..c5f1f333a 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -5,13 +5,12 @@ import ( "fmt" "os" - "sigs.k8s.io/controller-runtime/pkg/log" - configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" ) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go new file mode 100644 index 000000000..47d929013 --- /dev/null +++ b/test/conformance/tests/test_networkpool.go @@ -0,0 +1,345 @@ +package tests + +import ( + "fmt" + "strconv" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "golang.org/x/net/context" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" +) + +var _ = Describe("[sriov] NetworkPool", Ordered, func() { + var testNode string + var interfaces []*sriovv1.InterfaceExt + + BeforeAll(func() { + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + testNode, interfaces, err = sriovInfos.FindSriovDevicesAndNode() + Expect(err).ToNot(HaveOccurred()) + + By(fmt.Sprintf("Testing on node %s, %d devices found", testNode, len(interfaces))) + WaitForSRIOVStable() + }) + + AfterEach(func() { + err := namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + err = clients.DeleteAllOf(context.Background(), &sriovv1.SriovNetworkPoolConfig{}, client.InNamespace(operatorNamespace)) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + }) + + Context("Configure rdma namespace mode", func() { + It("should switch rdma mode", func() { + By("create a pool with only that node") + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + By("configure rdma mode to exclusive") + err := clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + nodeState := &sriovv1.SriovNetworkNodeState{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + + By("Checking rdma mode and kernel args") + output, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("configure rdma mode to shared") + networkPool.Spec.RdmaMode = consts.RdmaSubsystemModeShared + err = clients.Update(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("removing rdma mode configuration") + err = clients.Delete(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal("")) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "ls /host/etc/modprobe.d | grep sriov_network_operator_modules_config.conf | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + }) + }) + + Context("Check rdma metrics inside a pod in exclusive mode", func() { + var iface *sriovv1.InterfaceExt + + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod with RDMA cni and expose nic metrics and another one without rdma info", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + MetaPluginsConfig: `{"type": "rdma"}`, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + sriovNetwork = &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nordmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-nordmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + podDefinition = pod.DefineWithNetworks([]string{"test-nordmanetwork"}) + secondPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + firstPod = waitForPodRunning(firstPod) + secondPod = waitForPodRunning(secondPod) + + testedNode := &corev1.Node{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + Expect(allocatable).ToNot(Equal(5)) + + By("restart device plugin") + pods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-device-plugin", + FieldSelector: "spec.nodeName=" + testNode, + }) + Expect(err).ToNot(HaveOccurred()) + + for _, podObj := range pods.Items { + err = clients.Delete(context.Background(), &podObj) + Expect(err).ToNot(HaveOccurred()) + Eventually(func() bool { + searchPod := &corev1.Pod{} + err = clients.Get(context.Background(), client.ObjectKey{Name: podObj.Name, Namespace: podObj.Namespace}, searchPod) + if err != nil && errors.IsNotFound(err) { + return true + } + return false + }, 2*time.Minute, time.Second).Should(BeTrue()) + } + + By("checking the amount of allocatable devices remains after device plugin reset") + Consistently(func() int64 { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + newAllocatable, _ := resNum.AsInt64() + return newAllocatable + }, 1*time.Minute, 5*time.Second).Should(Equal(allocatable)) + + By("checking counters inside the pods") + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/*/hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically(">", 0)) + + strOut, _, err = pod.ExecCommand(clients, secondPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + strOut = strings.TrimSpace(strOut) + num, err = strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) + + Context("Check rdma metrics inside a pod in shared mode not exist", func() { + var iface *sriovv1.InterfaceExt + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeShared, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod without RDMA cni and not expose nic metrics", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + firstPod = waitForPodRunning(firstPod) + + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/* | grep hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) +}) diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/kargs_test.sh similarity index 61% rename from test/scripts/enable-kargs_test.sh rename to test/scripts/kargs_test.sh index 93a985700..053bd5200 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/kargs_test.sh @@ -2,14 +2,14 @@ SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/enable-kargs.sh" +SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/kargs.sh" test_RpmOstree_Add_All_Arguments() { echo "a b c=d eee=fff" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT X=Y W=Z` + output=`$SUT_SCRIPT add X=Y W=Z` assertEquals 0 $? assertEquals "2" $output @@ -22,7 +22,7 @@ test_RpmOstree_Add_Only_Missing_Arguments() { echo "a b c=d eee=fff K=L" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT K=L X=Y` + output=`$SUT_SCRIPT add K=L X=Y` assertEquals 0 $? assertEquals "1" $output @@ -30,6 +30,29 @@ test_RpmOstree_Add_Only_Missing_Arguments() { assertNotContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--append K=L" } +test_RpmOstree_Delete_All_Arguments() { + echo "a b c=d eee=fff X=Y W=Z" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "2" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} + +test_RpmOstree_Delete_Only_Exist_Arguments() { + echo "a b c=d eee=fff X=Y" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "1" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} ###### Mock /host directory ###### export FAKE_HOST="$(mktemp -d)" diff --git a/test/scripts/rpm-ostree_mock b/test/scripts/rpm-ostree_mock index db6f66040..06e6b1905 100755 --- a/test/scripts/rpm-ostree_mock +++ b/test/scripts/rpm-ostree_mock @@ -10,3 +10,9 @@ then # Caller is trying to read kernel arguments. cat /proc/cmdline fi + +if ! echo "$*" | grep -q "\--delete" +then + # Caller is trying to read kernel arguments. + cat /proc/cmdline +fi