Skip to content

Commit

Permalink
Merge pull request #96 from qiangzii/master
Browse files Browse the repository at this point in the history
fix clean up pod network
  • Loading branch information
yongchuanzhou authored Sep 13, 2024
2 parents 57adcec + 2985471 commit ef8c7b8
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 51 deletions.
69 changes: 50 additions & 19 deletions cmd/hostnic/hostnic.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ func cmdAdd(args *skel.CmdArgs) error {
if err != nil {
return fmt.Errorf("failed to alloc addr: %v", err)
}
klog.Infof("cmdAdd for %s AddrAlloc success, ipamMsg=%s,result=%s", args.ContainerID, spew.Sdump(ipamMsg), spew.Sdump(result))
klog.Infof("cmdAdd for %s AddrAlloc success, ipamMsg=%s", args.ContainerID, spew.Sdump(ipamMsg))

podInfo := ipamMsg.Args
// podInfo.NicType is from annotation
Expand Down Expand Up @@ -558,7 +558,7 @@ func cmdDelPassThrough(svcIfName, contIfName string, netns ns.NetNS) error {
func cmdDel(args *skel.CmdArgs) error {
var err error

klog.Infof("cmdDel args %v", args)
klog.Infof("cmdDel args %+v", args)
defer func() {
klog.Infof("cmdDel for %s rst: %v", args.ContainerID, err)
}()
Expand All @@ -571,26 +571,31 @@ func cmdDel(args *skel.CmdArgs) error {
return fmt.Errorf("failed to checkConf: %v", err)
}

//only get pod ip info here to delete ip rule and ebtable rule
ipamMsg, err := ipam2.AddrUnalloc(args, true)
if err != nil {
if err == constants.ErrNicNotFound {
return nil
}
return fmt.Errorf("failed to unalloc addr: %v", err)
}
klog.Infof("cmdDel for %s AddrUnalloc success", args.ContainerID)

podInfo := ipamMsg.Args
conf.HostNicType = podInfo.NicType
contIfName := args.IfName
svcIfName := generateHostVethName(conf.HostVethPrefix, podInfo.Namespace, podInfo.Name)
podKey := getPodKey(podInfo)

if err != nil {
return fmt.Errorf("get nic and ip info for pod %s error: %v", podKey, err)
}
if ipamMsg.Nic == nil {
// not found db record, just return
klog.Infof("not found db record for pod %s, skip cleanup!", podKey)
return nil
}
klog.Infof(" get nic and ip info for pod %s success, ip: %v", podKey, ipamMsg.IP)

netns, err := ns.GetNS(args.Netns)
if err != nil {
klog.Errorf("getns %s for pod %s error: %v", args.Netns, podKey, err)
if strings.Contains(err.Error(), "no such file or directory") {
goto end
return cleanupRulesAndIP(args, ipamMsg)
}
return fmt.Errorf("getns for %s error: %v", args.Netns, err)
return fmt.Errorf("getns %s for pod %s error: %v", args.Netns, podKey, err)
}
defer netns.Close()

Expand All @@ -602,17 +607,39 @@ func cmdDel(args *skel.CmdArgs) error {
}

if err != nil {
klog.Errorf("del nic for pod %s error: %v", podKey, err)
if strings.Contains(err.Error(), "no such file or directory") {
goto end
return cleanupRulesAndIP(args, ipamMsg)
}
return err
return fmt.Errorf("del nic for pod %s error: %v", podKey, err)
}
end:
_, err = ipam2.AddrUnalloc(args, false)
if err == constants.ErrNicNotFound {
return nil
klog.Infof("del nic for pod %s success", podKey)

return cleanupRulesAndIP(args, ipamMsg)
}

func cleanupRulesAndIP(args *skel.CmdArgs, ipamMsg *rpc.IPAMMessage) error {
// clean up ip rule and ebtables
// delete rule and ebtables rule before delete db record, after delete db record, can not get pod ip and nic;
podInfo := ipamMsg.Args
podKey := getPodKey(podInfo)
klog.Infof("going to clean network rules and ip for pod %s", podKey)

if ipamMsg.Nic != nil {
err := networkutils.NetworkHelper.CleanupPodNetwork(ipamMsg.Nic, ipamMsg.IP)
if err != nil {
return fmt.Errorf("clean network rule for pod %s error: %v", podKey, err)
}
klog.Infof("clean network rule for pod %s success", podKey)
}
return err

// release ip and delet db record
_, err := ipam2.AddrUnalloc(args, false)
if err != nil {
return fmt.Errorf("ipam addr unalloc for pod %s error: %v", podKey, err)
}
klog.Infof("ipam addr unalloc for pod %s success", podKey)
return nil
}

// generateHostVethName returns a name to be used on the host-side veth device.
Expand All @@ -622,6 +649,10 @@ func generateHostVethName(prefix, namespace, podname string) string {
return fmt.Sprintf("%s%s", prefix, hex.EncodeToString(h.Sum(nil))[:11])
}

func getPodKey(info *rpc.PodInfo) string {
return info.Namespace + "/" + info.Name + "/" + info.Containter
}

func main() {
networkutils.SetupNetworkHelper()
skel.PluginMain(cmdAdd, nil, cmdDel, version.All, bv.BuildString("hostnic"))
Expand Down
38 changes: 19 additions & 19 deletions cmd/hostnic/ipam/ipam.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package ipam

import (
"context"
"encoding/json"
"fmt"
"net"
"time"
Expand All @@ -29,7 +28,6 @@ import (
"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/cni/pkg/types/current"
"github.com/davecgh/go-spew/spew"
"github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"

Expand All @@ -39,10 +37,10 @@ import (
)

func AddrAlloc(args *skel.CmdArgs) (*rpc.IPAMMessage, *current.Result, error) {
conf := NetConf{}
if err := json.Unmarshal(args.StdinData, &conf); err != nil {
return nil, nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args))
}
// conf := NetConf{}
// if err := json.Unmarshal(args.StdinData, &conf); err != nil {
// return nil, nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args))
// }

k8sArgs := K8sArgs{}
if err := types.LoadArgs(args.Args, &k8sArgs); err != nil {
Expand Down Expand Up @@ -104,10 +102,10 @@ func AddrAlloc(args *skel.CmdArgs) (*rpc.IPAMMessage, *current.Result, error) {
}

func AddrUnalloc(args *skel.CmdArgs, peek bool) (*rpc.IPAMMessage, error) {
conf := NetConf{}
if err := json.Unmarshal(args.StdinData, &conf); err != nil {
return nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args))
}
// conf := NetConf{}
// if err := json.Unmarshal(args.StdinData, &conf); err != nil {
// return nil, fmt.Errorf("failed to unmarshal netconf %s", spew.Sdump(args))
// }

k8sArgs := K8sArgs{}
if err := types.LoadArgs(args.Args, &k8sArgs); err != nil {
Expand Down Expand Up @@ -139,16 +137,18 @@ func AddrUnalloc(args *skel.CmdArgs, peek bool) (*rpc.IPAMMessage, error) {
c := rpc.NewCNIBackendClient(conn)
reply, err := c.DelNetwork(context.Background(), info)
if err != nil {
return nil, fmt.Errorf("failed to call DelNetwork")
}
if reply.Nic == nil || reply.Args.Containter != args.ContainerID {
return nil, ErrNicNotFound
} else {
if err := networkutils.NetworkHelper.CleanupPodNetwork(reply.Nic, reply.IP); err != nil {
logrus.Errorf("clean %v %s network failed: %v", reply.Nic, reply.IP, err)
//HOSTNIC_TODO: try to cleanup ebtables rules once there
}
return nil, fmt.Errorf("failed to call DelNetwork: %v", err)
}

//do this before clean db record which in ipam server DelNetwork function
// if reply.Nic == nil || reply.Args.Containter != args.ContainerID {
// return nil, ErrNicNotFound
// } else {
// if err := networkutils.NetworkHelper.CleanupPodNetwork(reply.Nic, reply.IP); err != nil {
// klog.Errorf("clean %v %s network failed: %v", reply.Nic, reply.IP, err)
// //HOSTNIC_TODO: try to cleanup ebtables rules once there
// }
// }

return reply, nil
}
19 changes: 14 additions & 5 deletions pkg/allocator/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"sync"
"time"

"k8s.io/klog/v2"
log "k8s.io/klog/v2"

"github.com/yunify/hostnic-cni/pkg/conf"
Expand Down Expand Up @@ -260,11 +261,19 @@ func (a *Allocator) FreeHostNic(args *rpc.PodInfo, peek bool) (*rpc.HostNic, str

for _, status := range a.nics {
if pod, ok := status.Pods[getContainterKey(args)]; ok {
if err := a.delNicPod(status.Nic, pod); err != nil {
log.Errorf("delNicPod record failed: nic: %s, podkey: %s, err: %v", getNicKey(status.Nic), getPodKey(args), err) //HOSTNIC_TODO:the error was not return, why?
} else {
log.Infof("delNicPod record success, nic: %s, podkey: %s", getNicKey(status.Nic), getPodKey(args))
nicKey := getNicKey(status.Nic)
podKey := getPodKey(args)
if peek {
klog.Infof("found db record for pod %s[%s] , ip: %s", nicKey, podKey, pod.PodIP)
return status.Nic, pod.PodIP, nil
}

// delete nic pod record, this is the last step for delete a pod
err := a.delNicPod(status.Nic, pod)
if err != nil {
return status.Nic, pod.PodIP, fmt.Errorf("clean db record for pod %s[%s] error: %v", nicKey, podKey, err)
}
log.Infof("clean db record for pod %s[%s] success", nicKey, podKey)
return status.Nic, pod.PodIP, nil
}
}
Expand All @@ -278,7 +287,7 @@ func (a *Allocator) FreeHostNic(args *rpc.PodInfo, peek bool) (*rpc.HostNic, str
}
return result.Nic, nil
*/

klog.Infof("no db record for pod %s", getPodKey(args))
return nil, "", nil
}

Expand Down
13 changes: 10 additions & 3 deletions pkg/networkutils/networkutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,24 @@ func (n NetworkUtils) CleanupPodNetwork(nic *rpc.HostNic, podIP string) error {
ip := net.ParseIP(podIP)
dstRules, err := getRuleListByDst(ip)
if err != nil {
return err
return fmt.Errorf("get rule list by ip %s error: %v", podIP, err)
}

for _, rule := range dstRules {
//delete not exists rule return: RTNETLINK answers: No such file or directory
err := netlink.RuleDel(&rule)
if err != nil && !os.IsExist(err) {
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
return fmt.Errorf("failed to del rule %v : %v", rule, err)
}
}

return setArpReply(constants.GetHostNicBridgeName(int(nic.RouteTableNum)), podIP, nic.HardwareAddr, "-D")
//delete not exists rule return: Sorry, rule does not exist.
err = setArpReply(constants.GetHostNicBridgeName(int(nic.RouteTableNum)), podIP, nic.HardwareAddr, "-D")
if err != nil && !strings.Contains(err.Error(), "rule does not exist") {
return fmt.Errorf("delete ebtables rule for ip %s error: %v", podIP, err)
}

return nil
}

// Note: setup NetworkManager to disable dhcp on nic
Expand Down
21 changes: 16 additions & 5 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,29 @@ func (s *IPAMServer) DelNetwork(context context.Context, in *rpc.IPAMMessage) (*
}()

handleID = podHandleKey(in.Args)
log.Infof("going to release ip by handleID %s", handleID)

//get pod ip and nic info here
in.Nic, in.IP, _ = allocator.Alloc.FreeHostNic(in.Args, true)
if in.Peek {
//only get pod ip info here and return
return in, nil
}

//release ip in ipamblock
log.Infof("going to release ip (%s) by handleID %s", in.IP, handleID)
if err = s.ipamclient.ReleaseByHandle(handleID); err != nil {
log.Errorf("DelNetwork request (%v) release ip by %s failed: %v", in.Args, handleID, err)
(*s.oddPodCount).FreeFromPoolFailedCount = (*s.oddPodCount).FreeFromPoolFailedCount + 1
return in, fmt.Errorf("release ip %s by handleID %s error: %v", in.IP, handleID, err)
}

log.Infof("release ip by handleID %s success, going to clear pod record for hostnic", handleID)
in.Nic, in.IP, err = allocator.Alloc.FreeHostNic(in.Args, in.Peek)
//clear pod db record
log.Infof("release ip (%s) by handleID %s success, going to clear db record for hostnic", in.IP, handleID)
_, _, err = allocator.Alloc.FreeHostNic(in.Args, in.Peek)
if err != nil {
(*s.oddPodCount).FreeFromHostFailedCount = (*s.oddPodCount).FreeFromHostFailedCount + 1
return in, fmt.Errorf("clear pod db record error for %s: %v", handleID, err)
}
return in, err
return in, nil
}

func (s *IPAMServer) ShowNics(context context.Context, in *rpc.Nothing) (*rpc.NicInfoList, error) {
Expand Down

0 comments on commit ef8c7b8

Please sign in to comment.