Skip to content

Commit

Permalink
SKS-1474: Get VM NIC IP from K8s node to speed up VM reconcile (#124)
Browse files Browse the repository at this point in the history
## 问题

[SKS-1474] CAPE同步VM网络信息慢 - Jira http://jira.smartx.com/browse/SKS-1474。 导致CAPE创建VM时有些时候需要等待3分钟才能从Tower处获取到IP,明显降低了VM创建速度。

## 根因

ELF同步已安装vmtools的VM的IP数据时,同步时延不固定,快的话1分钟内。慢的话也有2-3分钟的。
ELF同步慢的原因为每次要同步状态的虚拟机太多了,所以导致执行一轮耗费的时间太长。

## 修复

当Tower API没有返回网卡IP时,从k8s node获取VM第一块网卡IP。VM单网卡场景中可以加快CAPE同步VM IP的速度,加速VM创建过程。

Co-authored-by: Jesse Hu <[email protected]>
  • Loading branch information
Levi080513 and jessehu authored Jul 3, 2023
1 parent 5747927 commit 2f3c90a
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 51 deletions.
15 changes: 15 additions & 0 deletions api/v1beta1/elfmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,21 @@ func (m *ElfMachine) SetVMDisconnectionTimestamp(timestamp *metav1.Time) {
}
}

// GetNetworkDevicesRequiringIP returns a slice of NetworkDeviceSpec which requires DHCP IP or static IP.
func (m *ElfMachine) GetNetworkDevicesRequiringIP() []NetworkDeviceSpec {
networkDevices := []NetworkDeviceSpec{}

for index := range m.Spec.Network.Devices {
if m.Spec.Network.Devices[index].NetworkType == NetworkTypeNone {
continue
}

networkDevices = append(networkDevices, m.Spec.Network.Devices[index])
}

return networkDevices
}

func (m *ElfMachine) GetVMDisconnectionTimestamp() *metav1.Time {
if m.Annotations == nil {
return nil
Expand Down
98 changes: 79 additions & 19 deletions controllers/elfmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -958,8 +958,10 @@ func (r *ElfMachineReconciler) reconcileNode(ctx *context.MachineContext, vm *mo
return true, nil
}

// If the VM is powered on then issue requeues until all of the VM's
// networks have IP addresses.
// Ensure all the VM's NICs get IP addresses, otherwise requeue.
//
// In the scenario with many virtual machines, it could be slow for SMTX OS to synchronize VM information via vmtools.
// So if Tower API returns empty IP address for the VM's 1st NIC, try to get its IP address from the corresponding K8s Node.
func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) {
defer func() {
if reterr != nil {
Expand All @@ -970,38 +972,62 @@ func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm
}
}()

ctx.ElfMachine.Status.Network = []infrav1.NetworkStatus{}
ctx.ElfMachine.Status.Addresses = []clusterv1.MachineAddress{}
// A Map of IP to MachineAddress
ipToMachineAddressMap := make(map[string]clusterv1.MachineAddress)

nics, err := ctx.VMService.GetVMNics(*vm.ID)
if err != nil {
return false, err
}

networkStatuses := make([]infrav1.NetworkStatus, 0, len(nics))
for i := 0; i < len(nics); i++ {
nic := nics[i]
if service.GetTowerString(nic.IPAddress) == "" {
continue
}
ip := service.GetTowerString(nic.IPAddress)

networkStatuses = append(networkStatuses, infrav1.NetworkStatus{
IPAddrs: []string{service.GetTowerString(nic.IPAddress)},
// Add to Status.Network even if IP is empty.
ctx.ElfMachine.Status.Network = append(ctx.ElfMachine.Status.Network, infrav1.NetworkStatus{
IPAddrs: []string{ip},
MACAddr: service.GetTowerString(nic.MacAddress),
})
}

ctx.ElfMachine.Status.Network = networkStatuses
if len(networkStatuses) < len(ctx.ElfMachine.Spec.Network.Devices) {
return false, nil
}
if ip == "" {
continue
}

ipAddrs := make([]clusterv1.MachineAddress, 0, len(ctx.ElfMachine.Status.Network))
for _, netStatus := range ctx.ElfMachine.Status.Network {
ipAddrs = append(ipAddrs, clusterv1.MachineAddress{
ipToMachineAddressMap[ip] = clusterv1.MachineAddress{
Type: clusterv1.MachineInternalIP,
Address: netStatus.IPAddrs[0],
})
Address: ip,
}
}

networkDevicesRequiringIP := ctx.ElfMachine.GetNetworkDevicesRequiringIP()

if len(ipToMachineAddressMap) < len(networkDevicesRequiringIP) {
// Try to get VM NIC IP address from the K8s Node.
nodeIP, err := r.getK8sNodeIP(ctx, ctx.ElfMachine.Name)
if err == nil && nodeIP != "" {
ipToMachineAddressMap[nodeIP] = clusterv1.MachineAddress{
Address: nodeIP,
Type: clusterv1.MachineInternalIP,
}

// If not all NICs get IP, return false and wait for next requeue.
if len(ipToMachineAddressMap) < len(networkDevicesRequiringIP) {
return false, nil
}
} else {
if err != nil {
ctx.Logger.Error(err, "failed to get VM NIC IP address from the K8s Node", "Node", ctx.ElfMachine.Name)
}
return false, nil
}
}

ctx.ElfMachine.Status.Addresses = ipAddrs
for _, machineAddress := range ipToMachineAddressMap {
ctx.ElfMachine.Status.Addresses = append(ctx.ElfMachine.Status.Addresses, machineAddress)
}

return true, nil
}
Expand Down Expand Up @@ -1106,3 +1132,37 @@ func (r *ElfMachineReconciler) deleteNode(ctx *context.MachineContext, nodeName

return nil
}

// getK8sNodeIP get the default network IP of K8s Node.
func (r *ElfMachineReconciler) getK8sNodeIP(ctx *context.MachineContext, nodeName string) (string, error) {
// Return early if control plane is not initialized.
if !conditions.IsTrue(ctx.Cluster, clusterv1.ControlPlaneInitializedCondition) {
return "", nil
}

kubeClient, err := util.NewKubeClient(ctx, ctx.Client, ctx.Cluster)
if err != nil {
return "", errors.Wrapf(err, "failed to get client for Cluster %s/%s", ctx.Cluster.Namespace, ctx.Cluster.Name)
}

k8sNode, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
if err != nil && apierrors.IsNotFound(err) {
return "", nil
}

if err != nil {
return "", errors.Wrapf(err, "failed to get K8s Node %s for Cluster %s/%s", nodeName, ctx.Cluster.Namespace, ctx.Cluster.Name)
}

if len(k8sNode.Status.Addresses) == 0 {
return "", nil
}

for _, address := range k8sNode.Status.Addresses {
if address.Type == corev1.NodeInternalIP {
return address.Address, nil
}
}

return "", nil
}
Loading

0 comments on commit 2f3c90a

Please sign in to comment.