Skip to content

Commit

Permalink
Add a pod mutating webhook to auto inject the pod network resources
Browse files Browse the repository at this point in the history
Signed-off-by: cyclinder <[email protected]>
  • Loading branch information
cyclinder committed Oct 29, 2024
1 parent a810abd commit 990ad67
Show file tree
Hide file tree
Showing 30 changed files with 1,790 additions and 263 deletions.
139 changes: 71 additions & 68 deletions charts/spiderpool/README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions charts/spiderpool/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ data:
cdiRootPath: {{ .Values.dra.cdiRootPath }}
hostDevicePath: {{ .Values.dra.hostDevicePath }}
tuneSysctlConfig: {{ .Values.spiderpoolAgent.tuneSysctlConfig }}
podNetworkResourceInjectNamespacesExclude: {{ toJson .Values.spiderpoolController.podNetworkResourceInjectNamespacesExclude }}
podNetworkResourceInjectNamespacesInclude: {{ toJson .Values.spiderpoolController.podNetworkResourceInjectNamespacesInclude }}
{{- if .Values.multus.multusCNI.install }}
---
kind: ConfigMap
Expand Down
4 changes: 4 additions & 0 deletions charts/spiderpool/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME
value: {{ .Values.spiderpoolController.name | quote }}
- name: SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT
value: {{ .Values.spiderpoolController.enablePodNetworkResourceInject | quote }}
{{- with .Values.spiderpoolController.extraEnv }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions charts/spiderpool/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ rules:
- delete
- get
- list
- update
- watch
- apiGroups:
- apiextensions.k8s.io
Expand Down
13 changes: 13 additions & 0 deletions charts/spiderpool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,19 @@ spiderpoolController:
## @param spiderpoolController.webhookPort the http port for spiderpoolController webhook
webhookPort: 5722

## @param spiderpoolController.enablePodNetworkResourceInject inject network resource to pod
enablePodNetworkResourceInject: false

## @param spiderpoolController.podNetworkResourceInjectNamespacesExclude exclude the namespaces of the pod mutating webhook
podNetworkResourceInjectNamespacesExclude:
- kube-system
- spiderpool
- metallb-system
- istio-system

## @param spiderpoolController.podNetworkResourceInjectNamespacesInclude include the namespaces of the pod mutating webhook
podNetworkResourceInjectNamespacesInclude: []

prometheus:
## @param spiderpoolController.prometheus.enabled enable spiderpool Controller to collect metrics
enabled: false
Expand Down
22 changes: 14 additions & 8 deletions cmd/spiderpool-controller/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ var envInfo = []envConf{
{"SPIDERPOOL_MULTUS_CONFIG_INFORMER_RESYNC_PERIOD", "60", false, nil, nil, &controllerContext.Cfg.MultusConfigInformerResyncPeriod},
{"SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME", "kube-system/cilium-config", false, &controllerContext.Cfg.CiliumConfigName, nil, nil},

{"SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME", "spiderpool-controller", true, &controllerContext.Cfg.ControllerDeploymentName, nil, nil},
{"SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT", "false", false, nil, &controllerContext.Cfg.InjectPodNetworkResource, nil},
{"SPIDERPOOL_IPPOOL_INFORMER_RESYNC_PERIOD", "300", false, nil, nil, &controllerContext.Cfg.IPPoolInformerResyncPeriod},
{"SPIDERPOOL_IPPOOL_INFORMER_WORKERS", "3", true, nil, nil, &controllerContext.Cfg.IPPoolInformerWorkers},
{"SPIDERPOOL_AUTO_IPPOOL_HANDLER_MAX_WORKQUEUE_LENGTH", "10000", true, nil, nil, &controllerContext.Cfg.IPPoolInformerMaxWorkQueueLength},
Expand Down Expand Up @@ -128,16 +130,20 @@ type Config struct {
GopsListenPort string
PyroscopeAddress string
DefaultCniConfDir string
// CiliumConfigName is formatted by namespace and name,default is kube-system/cilium-config
// CiliumConfigName is formatted by namespace and name
// default is kube-system/cilium-config
CiliumConfigName string

ControllerPodNamespace string
ControllerPodName string
DefaultCoordinatorName string
LeaseDuration int
LeaseRenewDeadline int
LeaseRetryPeriod int
LeaseRetryGap int
InjectPodNetworkResource bool

ControllerDeploymentName string
ControllerPodNamespace string
ControllerPodName string
DefaultCoordinatorName string
LeaseDuration int
LeaseRenewDeadline int
LeaseRetryPeriod int
LeaseRetryGap int

IPPoolMaxAllocatedIPs int

Expand Down
2 changes: 2 additions & 0 deletions cmd/spiderpool-controller/cmd/crd_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/go-logr/logr"
multusv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
calicov1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand All @@ -35,6 +36,7 @@ func init() {
utilruntime.Must(multusv1.AddToScheme(scheme))
utilruntime.Must(apiextensionsv1.AddToScheme(scheme))
utilruntime.Must(kubevirtv1.AddToScheme(scheme))
utilruntime.Must(admissionregistrationv1.AddToScheme(scheme))
}

func newCRDManager() (ctrl.Manager, error) {
Expand Down
16 changes: 16 additions & 0 deletions cmd/spiderpool-controller/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,22 @@ func initControllerServiceManagers(ctx context.Context) {
}
controllerContext.PodManager = podManager

if controllerContext.Cfg.InjectPodNetworkResource {
logger.Debug("Begin to init Pod MutatingWebhook")
if err := podmanager.InitPodWebhook(controllerContext.ClientSet.AdmissionregistrationV1(),
controllerContext.CRDManager, controllerContext.Cfg.ControllerDeploymentName,
controllerContext.Cfg.PodNetworkResourceInjectNamespacesExclude,
controllerContext.Cfg.PodNetworkResourceInjectNamespacesInclude); err != nil {
logger.Fatal(err.Error())
}
} else {
logger.Debug("InjectPodNetworkResource is disabled, try to remove the pod part in the MutatingWebhook")
if err := podmanager.RemovePodMutatingWebhook(controllerContext.ClientSet.AdmissionregistrationV1(),
controllerContext.Cfg.ControllerDeploymentName); err != nil {
logger.Error(err.Error())
}
}

logger.Info("Begin to initialize StatefulSet manager")
statefulSetManager, err := statefulsetmanager.NewStatefulSetManager(
controllerContext.CRDManager.GetClient(),
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/spiderpool-controller.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Run the spiderpool controller daemon.
| SPIDERPOOL_CNI_CONFIG_DIR | /etc/cni/net.d | The host path of the cni config directory. |
| SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME | kube-system/cilium-config. | The cilium's configMap, default is kube-system/cilium-config. |
| SPIDERPOOL_COORDINATOR_DEFAULT_NAME | default | the name of default spidercoordinator CR |
| SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT | false | Enable/disable inject network resources for pod. |
| SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME | spiderpool-controller | The deployment name of spiderpool-controller. |

## spiderpool-controller shutdown

Expand Down
156 changes: 122 additions & 34 deletions docs/usage/install/ai/get-started-macvlan-zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,22 +223,22 @@
metadata:
name: gpu1-net11
spec:
gateway: 172.16.11.254
subnet: 172.16.11.0/16
ips:
- 172.16.11.1-172.16.11.200
gateway: 172.16.11.254
subnet: 172.16.11.0/16
ips:
- 172.16.11.1-172.16.11.200
---
apiVersion: spiderpool.spidernet.io/v2beta1
kind: SpiderMultusConfig
metadata:
name: gpu1-macvlan
namespace: spiderpool
spec:
cniType: macvlan
macvlan:
master: ["enp11s0f0np0"]
ippools:
ipv4: ["gpu1-net11"]
cniType: macvlan
macvlan:
master: ["enp11s0f0np0"]
ippools:
ipv4: ["gpu1-net11"]
EOF
```
Expand All @@ -247,6 +247,8 @@
1. 在指定节点上创建一组 DaemonSet 应用
如下例子,通过 annotations `v1.multus-cni.io/default-network` 指定使用 calico 的缺省网卡,用于进行控制面通信,annotations `k8s.v1.cni.cncf.io/networks` 接入 8 个 GPU 亲和网卡的网卡,用于 RDMA 通信,并配置 8 种 RDMA resources 资源
> 注:可自动为应用注入 RDMA 网络资源,参考 [基于 Webhook 自动注入 RDMA 资源](#基于-webhook-自动注入网络资源)
```shell
$ helm repo add spiderchart https://spidernet-io.github.io/charts
$ helm repo update
Expand All @@ -261,39 +263,39 @@
# just run daemonset in nodes 'worker1' and 'worker2'
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- worker1
- worker2
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- worker1
- worker2
# macvlan interfaces
extraAnnotations:
k8s.v1.cni.cncf.io/networks: |-
[{"name":"gpu1-macvlan","namespace":"spiderpool"},
{"name":"gpu2-macvlan","namespace":"spiderpool"},
{"name":"gpu3-macvlan","namespace":"spiderpool"},
{"name":"gpu4-macvlan","namespace":"spiderpool"},
{"name":"gpu5-macvlan","namespace":"spiderpool"},
{"name":"gpu6-macvlan","namespace":"spiderpool"},
{"name":"gpu7-macvlan","namespace":"spiderpool"},
{"name":"gpu8-macvlan","namespace":"spiderpool"}]
[{"name":"gpu1-macvlan","namespace":"spiderpool"},
{"name":"gpu2-macvlan","namespace":"spiderpool"},
{"name":"gpu3-macvlan","namespace":"spiderpool"},
{"name":"gpu4-macvlan","namespace":"spiderpool"},
{"name":"gpu5-macvlan","namespace":"spiderpool"},
{"name":"gpu6-macvlan","namespace":"spiderpool"},
{"name":"gpu7-macvlan","namespace":"spiderpool"},
{"name":"gpu8-macvlan","namespace":"spiderpool"}]
# macvlan resource
resources:
limits:
spidernet.io/shared_cx5_gpu1: 1
spidernet.io/shared_cx5_gpu2: 1
spidernet.io/shared_cx5_gpu3: 1
spidernet.io/shared_cx5_gpu4: 1
spidernet.io/shared_cx5_gpu5: 1
spidernet.io/shared_cx5_gpu6: 1
spidernet.io/shared_cx5_gpu7: 1
spidernet.io/shared_cx5_gpu8: 1
#nvidia.com/gpu: 1
spidernet.io/shared_cx5_gpu1: 1
spidernet.io/shared_cx5_gpu2: 1
spidernet.io/shared_cx5_gpu3: 1
spidernet.io/shared_cx5_gpu4: 1
spidernet.io/shared_cx5_gpu5: 1
spidernet.io/shared_cx5_gpu6: 1
spidernet.io/shared_cx5_gpu7: 1
spidernet.io/shared_cx5_gpu8: 1
#nvidia.com/gpu: 1
EOF
$ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml
Expand Down Expand Up @@ -410,3 +412,89 @@
# Successfully access the RDMA service of the other Pod
$ ib_read_lat 172.91.0.115
```
## 基于 Webhook 自动注入网络资源
1. Spiderpool 为了简化 AI 应用配置多网卡的复杂度,支持通过 labels(`spidernet.io/resource-inject-key`) 对一组网卡配置分类。用户只需要为 Pod 添加相同的注解。这样 Spiderpool 会通过 webhook 自动为 Pod 注入所有具有相同 label 的对应的网卡和网络资源。比如在创建 SpiderMultusConfig 时指定 labels,并配置 RDMA 相关配置:
```shell
$ cat <<EOF | kubectl apply -f -
apiVersion: spiderpool.spidernet.io/v2beta1
kind: SpiderMultusConfig
metadata:
name: gpu1-macvlan
namespace: spiderpool
labels:
spidernet.io/resource-inject-key: gpu-macvlan
spec:
cniType: macvlan
macvlan:
master: ["enp11s0f0np0"]
enableRdma: true
rdmaResourceName: spidernet.io/shared_cx5_gpu1
ippools:
ipv4: ["gpu1-net11"]
EOF
```
> `spidernet.io/resource-inject-key: gpu-macvlan` 固定的 key,value 为用户自定义。
2. 创建应用时添加注解: `spidernet.io/resource-inject-key: gpu-macvlan`,这样 Spiderpool 自动为 Pod 添加 8 个 GPU 亲和网卡的网卡,用于 RDMA 通信,并配置 8 种 RDMA resources 资源:
```shell
$ helm repo add spiderchart https://spidernet-io.github.io/charts
$ helm repo update
$ helm search repo rdma-tools
# run daemonset on worker1 and worker2
$ cat <<EOF > values.yaml
# for china user , it could add these to use a domestic registry
#image:
# registry: ghcr.m.daocloud.io
# just run daemonset in nodes 'worker1' and 'worker2'
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- worker1
- worker2
# macvlan interfaces
extraAnnotations:
spidernet.io/resource-inject-key: gpu-macvlan
EOF
$ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml
```
当 Pod 成功 Running,检查 Pod 是否成功注入 8 个 RDMA 网卡的 annotations 和 8 种 RDMA 资源。
```shell
# Pod multus annotations
k8s.v1.cni.cncf.io/networks: |-
[{"name":"gpu1-macvlan","namespace":"spiderpool"},
{"name":"gpu2-macvlan","namespace":"spiderpool"},
{"name":"gpu3-macvlan","namespace":"spiderpool"},
{"name":"gpu4-macvlan","namespace":"spiderpool"},
{"name":"gpu5-macvlan","namespace":"spiderpool"},
{"name":"gpu6-macvlan","namespace":"spiderpool"},
{"name":"gpu7-macvlan","namespace":"spiderpool"},
{"name":"gpu8-macvlan","namespace":"spiderpool"}]
# macvlan resource
resources:
requests:
spidernet.io/shared_cx5_gpu1: 1
spidernet.io/shared_cx5_gpu2: 1
spidernet.io/shared_cx5_gpu3: 1
spidernet.io/shared_cx5_gpu4: 1
spidernet.io/shared_cx5_gpu5: 1
spidernet.io/shared_cx5_gpu6: 1
spidernet.io/shared_cx5_gpu7: 1
spidernet.io/shared_cx5_gpu8: 1
#nvidia.com/gpu: 1
```
Loading

0 comments on commit 990ad67

Please sign in to comment.