From 29854712a14f6519de1b441f561afed34a455445 Mon Sep 17 00:00:00 2001 From: wanggangqiang Date: Wed, 4 Sep 2024 17:20:14 +0800 Subject: [PATCH] fix clean up pod network --- cmd/hostnic/hostnic.go | 69 ++++++++++++++++++++++++--------- cmd/hostnic/ipam/ipam.go | 38 +++++++++--------- pkg/allocator/allocator.go | 19 ++++++--- pkg/networkutils/networkutil.go | 13 +++++-- pkg/server/server.go | 21 +++++++--- 5 files changed, 109 insertions(+), 51 deletions(-) diff --git a/cmd/hostnic/hostnic.go b/cmd/hostnic/hostnic.go index b1965e5e..cf4031ab 100644 --- a/cmd/hostnic/hostnic.go +++ b/cmd/hostnic/hostnic.go @@ -448,7 +448,7 @@ func cmdAdd(args *skel.CmdArgs) error { if err != nil { return fmt.Errorf("failed to alloc addr: %v", err) } - klog.Infof("cmdAdd for %s AddrAlloc success, ipamMsg=%s,result=%s", args.ContainerID, spew.Sdump(ipamMsg), spew.Sdump(result)) + klog.Infof("cmdAdd for %s AddrAlloc success, ipamMsg=%s", args.ContainerID, spew.Sdump(ipamMsg)) podInfo := ipamMsg.Args // podInfo.NicType is from annotation @@ -558,7 +558,7 @@ func cmdDelPassThrough(svcIfName, contIfName string, netns ns.NetNS) error { func cmdDel(args *skel.CmdArgs) error { var err error - klog.Infof("cmdDel args %v", args) + klog.Infof("cmdDel args %+v", args) defer func() { klog.Infof("cmdDel for %s rst: %v", args.ContainerID, err) }() @@ -571,26 +571,31 @@ func cmdDel(args *skel.CmdArgs) error { return fmt.Errorf("failed to checkConf: %v", err) } + //only get pod ip info here to delete ip rule and ebtable rule ipamMsg, err := ipam2.AddrUnalloc(args, true) - if err != nil { - if err == constants.ErrNicNotFound { - return nil - } - return fmt.Errorf("failed to unalloc addr: %v", err) - } - klog.Infof("cmdDel for %s AddrUnalloc success", args.ContainerID) - podInfo := ipamMsg.Args conf.HostNicType = podInfo.NicType contIfName := args.IfName svcIfName := generateHostVethName(conf.HostVethPrefix, podInfo.Namespace, podInfo.Name) + podKey := getPodKey(podInfo) + + if err != nil { + return fmt.Errorf("get nic and ip info for pod %s error: %v", podKey, err) + } + if ipamMsg.Nic == nil { + // not found db record, just return + klog.Infof("not found db record for pod %s, skip cleanup!", podKey) + return nil + } + klog.Infof(" get nic and ip info for pod %s success, ip: %v", podKey, ipamMsg.IP) netns, err := ns.GetNS(args.Netns) if err != nil { + klog.Errorf("getns %s for pod %s error: %v", args.Netns, podKey, err) if strings.Contains(err.Error(), "no such file or directory") { - goto end + return cleanupRulesAndIP(args, ipamMsg) } - return fmt.Errorf("getns for %s error: %v", args.Netns, err) + return fmt.Errorf("getns %s for pod %s error: %v", args.Netns, podKey, err) } defer netns.Close() @@ -602,17 +607,39 @@ func cmdDel(args *skel.CmdArgs) error { } if err != nil { + klog.Errorf("del nic for pod %s error: %v", podKey, err) if strings.Contains(err.Error(), "no such file or directory") { - goto end + return cleanupRulesAndIP(args, ipamMsg) } - return err + return fmt.Errorf("del nic for pod %s error: %v", podKey, err) } -end: - _, err = ipam2.AddrUnalloc(args, false) - if err == constants.ErrNicNotFound { - return nil + klog.Infof("del nic for pod %s success", podKey) + + return cleanupRulesAndIP(args, ipamMsg) +} + +func cleanupRulesAndIP(args *skel.CmdArgs, ipamMsg *rpc.IPAMMessage) error { + // clean up ip rule and ebtables + // delete rule and ebtables rule before delete db record, after delete db record, can not get pod ip and nic; + podInfo := ipamMsg.Args + podKey := getPodKey(podInfo) + klog.Infof("going to clean network rules and ip for pod %s", podKey) + + if ipamMsg.Nic != nil { + err := networkutils.NetworkHelper.CleanupPodNetwork(ipamMsg.Nic, ipamMsg.IP) + if err != nil { + return fmt.Errorf("clean network rule for pod %s error: %v", podKey, err) + } + klog.Infof("clean network rule for pod %s success", podKey) } - return err + + // release ip and delet db record + _, err := ipam2.AddrUnalloc(args, false) + if err != nil { + return fmt.Errorf("ipam addr unalloc for pod %s error: %v", podKey, err) + } + klog.Infof("ipam addr unalloc for pod %s success", podKey) + return nil } // generateHostVethName returns a name to be used on the host-side veth device. @@ -622,6 +649,10 @@ func generateHostVethName(prefix, namespace, podname string) string { return fmt.Sprintf("%s%s", prefix, hex.EncodeToString(h.Sum(nil))[:11]) } +func getPodKey(info *rpc.PodInfo) string { + return info.Namespace + "/" + info.Name + "/" + info.Containter +} + func main() { networkutils.SetupNetworkHelper() skel.PluginMain(cmdAdd, nil, cmdDel, version.All, bv.BuildString("hostnic")) diff --git a/cmd/hostnic/ipam/ipam.go b/cmd/hostnic/ipam/ipam.go index 56306c9e..94dc4e89 100644 --- a/cmd/hostnic/ipam/ipam.go +++ b/cmd/hostnic/ipam/ipam.go @@ -20,7 +20,6 @@ package ipam import ( "context" - "encoding/json" "fmt" "net" "time" @@ -29,7 +28,6 @@ import ( "github.com/containernetworking/cni/pkg/types" "github.com/containernetworking/cni/pkg/types/current" "github.com/davecgh/go-spew/spew" - "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -39,10 +37,10 @@ import ( ) func AddrAlloc(args *skel.CmdArgs) (*rpc.IPAMMessage, *current.Result, error) { - conf := NetConf{} - if err := json.Unmarshal(args.StdinData, &conf); err != nil { - return nil, nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args)) - } + // conf := NetConf{} + // if err := json.Unmarshal(args.StdinData, &conf); err != nil { + // return nil, nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args)) + // } k8sArgs := K8sArgs{} if err := types.LoadArgs(args.Args, &k8sArgs); err != nil { @@ -104,10 +102,10 @@ func AddrAlloc(args *skel.CmdArgs) (*rpc.IPAMMessage, *current.Result, error) { } func AddrUnalloc(args *skel.CmdArgs, peek bool) (*rpc.IPAMMessage, error) { - conf := NetConf{} - if err := json.Unmarshal(args.StdinData, &conf); err != nil { - return nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args)) - } + // conf := NetConf{} + // if err := json.Unmarshal(args.StdinData, &conf); err != nil { + // return nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args)) + // } k8sArgs := K8sArgs{} if err := types.LoadArgs(args.Args, &k8sArgs); err != nil { @@ -139,16 +137,18 @@ func AddrUnalloc(args *skel.CmdArgs, peek bool) (*rpc.IPAMMessage, error) { c := rpc.NewCNIBackendClient(conn) reply, err := c.DelNetwork(context.Background(), info) if err != nil { - return nil, fmt.Errorf("failed to call DelNetwork") - } - if reply.Nic == nil || reply.Args.Containter != args.ContainerID { - return nil, ErrNicNotFound - } else { - if err := networkutils.NetworkHelper.CleanupPodNetwork(reply.Nic, reply.IP); err != nil { - logrus.Errorf("clean %v %s network failed: %v", reply.Nic, reply.IP, err) - //HOSTNIC_TODO: try to cleanup ebtables rules once there - } + return nil, fmt.Errorf("failed to call DelNetwork: %v", err) } + //do this before clean db record which in ipam server DelNetwork function + // if reply.Nic == nil || reply.Args.Containter != args.ContainerID { + // return nil, ErrNicNotFound + // } else { + // if err := networkutils.NetworkHelper.CleanupPodNetwork(reply.Nic, reply.IP); err != nil { + // klog.Errorf("clean %v %s network failed: %v", reply.Nic, reply.IP, err) + // //HOSTNIC_TODO: try to cleanup ebtables rules once there + // } + // } + return reply, nil } diff --git a/pkg/allocator/allocator.go b/pkg/allocator/allocator.go index d08d5509..2563801f 100644 --- a/pkg/allocator/allocator.go +++ b/pkg/allocator/allocator.go @@ -7,6 +7,7 @@ import ( "sync" "time" + "k8s.io/klog/v2" log "k8s.io/klog/v2" "github.com/yunify/hostnic-cni/pkg/conf" @@ -260,11 +261,19 @@ func (a *Allocator) FreeHostNic(args *rpc.PodInfo, peek bool) (*rpc.HostNic, str for _, status := range a.nics { if pod, ok := status.Pods[getContainterKey(args)]; ok { - if err := a.delNicPod(status.Nic, pod); err != nil { - log.Errorf("delNicPod record failed: nic: %s, podkey: %s, err: %v", getNicKey(status.Nic), getPodKey(args), err) //HOSTNIC_TODO:the error was not return, why? - } else { - log.Infof("delNicPod record success, nic: %s, podkey: %s", getNicKey(status.Nic), getPodKey(args)) + nicKey := getNicKey(status.Nic) + podKey := getPodKey(args) + if peek { + klog.Infof("found db record for pod %s[%s] , ip: %s", nicKey, podKey, pod.PodIP) + return status.Nic, pod.PodIP, nil + } + + // delete nic pod record, this is the last step for delete a pod + err := a.delNicPod(status.Nic, pod) + if err != nil { + return status.Nic, pod.PodIP, fmt.Errorf("clean db record for pod %s[%s] error: %v", nicKey, podKey, err) } + log.Infof("clean db record for pod %s[%s] success", nicKey, podKey) return status.Nic, pod.PodIP, nil } } @@ -278,7 +287,7 @@ func (a *Allocator) FreeHostNic(args *rpc.PodInfo, peek bool) (*rpc.HostNic, str } return result.Nic, nil */ - + klog.Infof("no db record for pod %s", getPodKey(args)) return nil, "", nil } diff --git a/pkg/networkutils/networkutil.go b/pkg/networkutils/networkutil.go index 01dec7c3..21f75610 100644 --- a/pkg/networkutils/networkutil.go +++ b/pkg/networkutils/networkutil.go @@ -53,17 +53,24 @@ func (n NetworkUtils) CleanupPodNetwork(nic *rpc.HostNic, podIP string) error { ip := net.ParseIP(podIP) dstRules, err := getRuleListByDst(ip) if err != nil { - return err + return fmt.Errorf("get rule list by ip %s error: %v", podIP, err) } for _, rule := range dstRules { + //delete not exists rule return: RTNETLINK answers: No such file or directory err := netlink.RuleDel(&rule) - if err != nil && !os.IsExist(err) { + if err != nil && !strings.Contains(err.Error(), "no such file or directory") { return fmt.Errorf("failed to del rule %v : %v", rule, err) } } - return setArpReply(constants.GetHostNicBridgeName(int(nic.RouteTableNum)), podIP, nic.HardwareAddr, "-D") + //delete not exists rule return: Sorry, rule does not exist. + err = setArpReply(constants.GetHostNicBridgeName(int(nic.RouteTableNum)), podIP, nic.HardwareAddr, "-D") + if err != nil && !strings.Contains(err.Error(), "rule does not exist") { + return fmt.Errorf("delete ebtables rule for ip %s error: %v", podIP, err) + } + + return nil } // Note: setup NetworkManager to disable dhcp on nic diff --git a/pkg/server/server.go b/pkg/server/server.go index 7fb62915..4f0a925a 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -213,18 +213,29 @@ func (s *IPAMServer) DelNetwork(context context.Context, in *rpc.IPAMMessage) (* }() handleID = podHandleKey(in.Args) - log.Infof("going to release ip by handleID %s", handleID) + + //get pod ip and nic info here + in.Nic, in.IP, _ = allocator.Alloc.FreeHostNic(in.Args, true) + if in.Peek { + //only get pod ip info here and return + return in, nil + } + + //release ip in ipamblock + log.Infof("going to release ip (%s) by handleID %s", in.IP, handleID) if err = s.ipamclient.ReleaseByHandle(handleID); err != nil { - log.Errorf("DelNetwork request (%v) release ip by %s failed: %v", in.Args, handleID, err) (*s.oddPodCount).FreeFromPoolFailedCount = (*s.oddPodCount).FreeFromPoolFailedCount + 1 + return in, fmt.Errorf("release ip %s by handleID %s error: %v", in.IP, handleID, err) } - log.Infof("release ip by handleID %s success, going to clear pod record for hostnic", handleID) - in.Nic, in.IP, err = allocator.Alloc.FreeHostNic(in.Args, in.Peek) + //clear pod db record + log.Infof("release ip (%s) by handleID %s success, going to clear db record for hostnic", in.IP, handleID) + _, _, err = allocator.Alloc.FreeHostNic(in.Args, in.Peek) if err != nil { (*s.oddPodCount).FreeFromHostFailedCount = (*s.oddPodCount).FreeFromHostFailedCount + 1 + return in, fmt.Errorf("clear pod db record error for %s: %v", handleID, err) } - return in, err + return in, nil } func (s *IPAMServer) ShowNics(context context.Context, in *rpc.Nothing) (*rpc.NicInfoList, error) {