Skip to content

Commit

Permalink
debug: no space left
Browse files Browse the repository at this point in the history
Signed-off-by: tao.yang <[email protected]>
  • Loading branch information
ty-dc committed Aug 8, 2024
1 parent b399e5e commit 75f9d67
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 27 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/e2e-init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,18 @@ jobs:
-e INSTALL_RDMA=true \
-e INSTALL_SRIOV=true
- name: check Free Disk Space (Ubuntu)
run: |
echo "df -h"
df -h
echo "lsblk"
lsblk
- name: check kind node list
run: |
echo "kind node list"
kind get clusters
- name: Run e2e Test
id: run_e2e
continue-on-error: true
Expand Down Expand Up @@ -181,6 +193,18 @@ jobs:
echo "UPLOAD_E2E_REPORT=false" >> $GITHUB_ENV
fi
- name: check kind node list
run: |
echo "kind node list"
kind get clusters
- name: check Free Disk Space (Ubuntu)
run: |
echo "df -h"
df -h
echo "lsblk"
lsblk
- name: Upload e2e log
if: ${{ inputs.run_e2e == 'true' }}
uses: actions/[email protected]
Expand Down
114 changes: 88 additions & 26 deletions pkg/gcmanager/scanAll_IPPool.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
for _, pool := range pools {
logger.Sugar().Debugf("checking IPPool '%s'", pool.Name)
poolAllocatedIPs, err := convert.UnmarshalIPPoolAllocatedIPs(pool.Status.AllocatedIPs)
if nil != err {
if err != nil {
logger.Sugar().Errorf("failed to parse IPPool '%v' status AllocatedIPs, error: %v", pool, err)
continue
}
Expand All @@ -126,34 +126,16 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
if apierrors.IsNotFound(err) {
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "pod not found in k8s but still exists in IPPool allocation"))
endpoint, err := s.wepMgr.GetEndpointByName(ctx, podNS, podName, constant.UseCache)
if nil != err {
if err != nil {
// just continue if we meet other errors
if !apierrors.IsNotFound(err) {
wrappedLog.Sugar().Errorf("failed to get SpiderEndpoint: %v", err)
continue
}
} else {
if s.gcConfig.EnableStatefulSet && endpoint.Status.OwnerControllerType == constant.KindStatefulSet {
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, podNS, podName, constant.KindStatefulSet)
if nil != err {
scanAllLogger.Sugar().Errorf("failed to check StatefulSet pod IP '%s' should be cleaned or not, error: %v", poolIP, err)
continue
}
if isValidStsPod {
scanAllLogger.Sugar().Warnf("no need to release IP '%s' for StatefulSet pod ", poolIP)
continue
}
}
if s.gcConfig.EnableKubevirtStaticIP && endpoint.Status.OwnerControllerType == constant.KindKubevirtVMI {
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(logutils.IntoContext(ctx, scanAllLogger), podNS, constant.KindKubevirtVMI, endpoint.Status.OwnerControllerName)
if nil != err {
scanAllLogger.Sugar().Errorf("failed to check kubevirt vm pod IP '%s' should be cleaned or not, error: %v", poolIP, err)
continue
}
if isValidVMPod {
scanAllLogger.Sugar().Warnf("no need to release IP '%s' for kubevirt vm pod ", poolIP)
continue
}
shouldKeepIP, err := s.isValidStaticPod(ctx, scanAllLogger, podNS, podName, poolIP, endpoint.Status.OwnerControllerType)
if err != nil || shouldKeepIP {
continue
}
}

Expand Down Expand Up @@ -202,10 +184,61 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
} else {
// case: The pod in IPPool's ip-allocationDetail is also exist in k8s, but the IPPool IP corresponding allocation pod UID is different with pod UID
if string(podYaml.UID) != poolIPAllocation.PodUID {
// Once the static IP Pod restarts, it will retrieve the Pod IP from it SpiderEndpoint.
// So at this moment the Pod UID is different from the IPPool's ip-allocationDetail, we should not release it.
// Once the static IP Pod restarts, it will retrieve the Pod IP from the SpiderEndpoint.
// However, when the Pod UID is different from the IPPool UID, it needs to be determined whether to reclaim the IP.
if podmanager.IsStaticIPPod(s.gcConfig.EnableStatefulSet, s.gcConfig.EnableKubevirtStaticIP, podYaml) {
scanAllLogger.Sugar().Debugf("Static IP Pod just restarts, keep the static IP '%s' from the IPPool", poolIP)
// Check if the status.ips of the current K8S Pod has values.
// If there are values, the pod has started and IP allocation is successful
// If there are no values, it means the new pod is still starting.
if len(podYaml.Status.PodIPs) != 0 {
endpoint, err := s.wepMgr.GetEndpointByName(ctx, podYaml.Namespace, podYaml.Name, constant.UseCache)
if err != nil {
if apierrors.IsNotFound(err) {
// If Endpoint is not found, check if it is a valid static Pod. If so, the IP address will not be reclaimed.
shouldKeepIP, err := s.isValidStaticPod(ctx, scanAllLogger, podNS, podName, poolIP, podYaml.OwnerReferences[0].Kind)
if err != nil || shouldKeepIP {
continue
}
scanAllLogger.Sugar().Debugf("The static Pod %s/%s already has an IP in K8S and is a valid Pod, but it is different from the IP in IPPool. try to release the IP in IPPool.", podYaml.Namespace, podYaml.Name)
wrappedLog := scanAllLogger.With(zap.String("gc-reason", fmt.Sprintf("The IP address of the valid static Pod %s/%s is different from that in the IPPool", podYaml.Namespace, podYaml.Name)))
err = s.ippoolMgr.ReleaseIP(ctx, pool.Name, []types.IPAndUID{{
IP: poolIP,
UID: poolIPAllocation.PodUID},
})
if nil != err {
wrappedLog.Sugar().Errorf("failed to release ip '%s', error: '%v'", poolIP, err)
continue
}
} else {
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "The Pod has been assigned IPs, but Pod UID different with the IPPool UID "))
wrappedLog.Sugar().Errorf("failed to get SpiderEndpoint: %v", err)
continue
}
} else {
if endpoint.Status.Current.UID != string(podYaml.UID) {
scanAllLogger.Sugar().Debugf("the Pod UID %s is different from Endpoint UID: %s and IPPool UID: %s. try to release Endpoint and IPPool IP.", endpoint.Status.Current.UID, poolIPAllocation.PodUID, podYaml.UID)
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "Pod UID is different from Endpoint UID and IPPool UID"))
err := s.releaseSingleIPAndRemoveWEPFinalizer(logutils.IntoContext(ctx, wrappedLog), pool.Name, poolIP, poolIPAllocation)
if nil != err {
wrappedLog.Sugar().Errorf("failed to release ip '%s', error: '%v'", poolIP, err)
continue
}
} else {
scanAllLogger.Sugar().Debugf("the Pod UID %s is the same as the Endpoint UID %s, but different from IPPool UID %s. try to release the IPPool IP.", podYaml.UID, endpoint.Status.Current.UID, poolIPAllocation.PodUID)
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "Pod UID is the same as the Endpoint UID , but different from IPPool UID"))
err = s.ippoolMgr.ReleaseIP(ctx, pool.Name, []types.IPAndUID{{
IP: poolIP,
UID: poolIPAllocation.PodUID},
})
if nil != err {
wrappedLog.Sugar().Errorf("failed to release ip '%s', error: '%v'", poolIP, err)
continue
}
}
}
} else {
scanAllLogger.Sugar().Debugf("Static IP Pod just restarts, no IPs yet, keep the static IP '%s' from the IPPool", poolIP)
}
} else {
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "IPPoolAllocation pod UID is different with pod UID"))
// we are afraid that no one removes the old same name Endpoint finalizer
Expand Down Expand Up @@ -323,3 +356,32 @@ func (s *SpiderGC) releaseSingleIPAndRemoveWEPFinalizer(ctx context.Context, poo
log.Sugar().Infof("remove SpiderEndpoint '%s/%s' finalizer successfully", podNS, podName)
return nil
}

// Helps check if it is a valid static Pod (StatefulSet or Kubevirt), if it is a valid static Pod. Return true
func (s *SpiderGC) isValidStaticPod(ctx context.Context, logger *zap.Logger, podNS, podName, poolIP, ownerControllerType string) (bool, error) {
if s.gcConfig.EnableStatefulSet && ownerControllerType == constant.KindStatefulSet {
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, podNS, podName, constant.KindStatefulSet)
if err != nil {
logger.Sugar().Errorf("failed to check if StatefulSet pod IP '%s' should be cleaned or not, error: %v", poolIP, err)
return true, err
}
if isValidStsPod {
logger.Sugar().Warnf("no need to release IP '%s' for StatefulSet pod", poolIP)
return true, nil
}
}

if s.gcConfig.EnableKubevirtStaticIP && ownerControllerType == constant.KindKubevirtVMI {
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(ctx, podNS, constant.KindKubevirtVMI, podName)
if err != nil {
logger.Sugar().Errorf("failed to check if KubeVirt VM pod IP '%s' should be cleaned or not, error: %v", poolIP, err)
return true, err
}
if isValidVMPod {
logger.Sugar().Warnf("no need to release IP '%s' for KubeVirt VM pod", poolIP)
return true, nil
}
}

return false, nil
}
2 changes: 1 addition & 1 deletion test/e2e/common/spiderpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ func DeleteIPPoolUntilFinish(f *frame.Framework, poolName string, ctx context.Co
default:
_, err := GetIppoolByName(f, poolName)
if err != nil {
GinkgoWriter.Printf("IPPool '%s' has been removederror: %v", poolName, err)
GinkgoWriter.Printf("IPPool '%s' has been removed, error: %v \n", poolName, err)
return nil
}
time.Sleep(ForcedWaitingTime)
Expand Down

0 comments on commit 75f9d67

Please sign in to comment.