Max_vcpus_topology: ensure that the VM gets correct topology with max…

…imum vcpus
autotest · Sep 26, 2024 · e5c01c1 · e5c01c1
1 parent 2baf67c
commit e5c01c1
Show file tree

Hide file tree

Showing 2 changed files with 236 additions and 0 deletions.
diff --git a/libvirt/tests/cfg/cpu/vcpu_max_topology.cfg b/libvirt/tests/cfg/cpu/vcpu_max_topology.cfg
@@ -0,0 +1,27 @@
+- vcpu.max_topology:
+    type = vcpu_max_topology
+    vcpus_placement = "static"
+    sockets = ""
+    cores = ""
+    clusters = ""
+    variants:
+        - one_socket:
+            sockets = "one"
+            cores = "many"
+            variants:
+                - default_clusters:
+                    clusters = ""
+                - many_clusters:
+                    clusters = "many"
+        - one_core_per_socket:
+            sockets = "many"
+            cores = "one"
+            clusters = ""
+        - many_cores_per_socket:
+            sockets = "many"
+            cores = "many"
+            variants:
+                - default_clusters:
+                    clusters = ""
+                - many_clusters:
+                    clusters = "many"
diff --git a/libvirt/tests/src/cpu/vcpu_max_topology.py b/libvirt/tests/src/cpu/vcpu_max_topology.py
@@ -0,0 +1,209 @@
+import logging as log
+
+
+from virttest import virt_vm
+from virttest import cpu as cpuutil
+from virttest.libvirt_xml import vm_xml
+
+
+# Using as lower capital is not the best way to do, but this is just a
+# workaround to avoid changing the entire file.
+logging = log.getLogger('avocado.' + __name__)
+
+
+def run(test, params, env):
+    """
+    Test that the vm can start with vcpus which is equal to host online cpu number
+    and vm topology is consistent to those configured.
+
+    Steps:
+    1. Configure the vm topology with specified number of sockets, cores, and clusters
+    2. Start configured vm with guest agent
+    3. Check that the vm setup is consistent with the topology configured
+    3a. Check the lscpu output
+    3b. Check the kernel file for core id for each vcpu
+    3c. Check the vcpu cluster number
+    3d. Check the cluster cpu list
+    """
+    sockets_param = params.get("sockets", "")
+    cores_param = params.get("cores", "")
+    clusters_param = params.get("clusters", "")
+
+    vcpus_num = 0
+    sockets_list = []
+    cores_list = []
+    clusters_list = []
+
+    # Set vcpus_num to the host online cpu number
+    host_cpu_info = cpuutil.get_cpu_info()
+    host_online_cpus = int(host_cpu_info["On-line CPU(s) list"].split("-")[1]) + 1
+    logging.debug("Host online CPU number: %s", str(host_online_cpus))
+    vcpus_num = host_online_cpus
+
+    # Setting number of sockets, cores, and clusters
+
+    # one_socket case
+    # cores = vcpus_num // number of clusters
+    if sockets_param == "one":
+        sockets_list = [1]
+
+        # many_clusters case
+        if clusters_param == "many":
+            # Ensure that vcpus_num is evenly divisible by the number of clusters
+            clusters_list = [clusters for clusters in [2, 4, 6] if vcpus_num % clusters == 0]
+            cores_list = [vcpus_num // clusters for clusters in clusters_list]
+        # default_clusters case
+        else:
+            cores_list = [vcpus_num]
+
+    # one_core_per_socket case
+    elif sockets_param == "many" and cores_param == "one":
+        sockets_list = [vcpus_num]
+        cores_list = [1]
+
+    # many_cores_per_socket
+    # Ensure that vcpus_num is evenly divisible by the number of cores
+    else:
+        # many_clusters case
+        # sockets = vcpus_num // number of cores // number of clusters
+        if clusters_param == "many":
+            # defaulting to either 2 or 3 cores
+            cores = 2 if vcpus_num % 2 == 0 else 3
+            clusters_list = [clusters for clusters in [2, 4, 6] if (vcpus_num / cores) % clusters == 0]
+            cores_list = [cores] * (len(clusters_list))
+            sockets_list = [(vcpus_num // cores) // clusters for clusters in clusters_list]
+        # default_clusters case
+        # sockets * cores = vcpus_num
+        else:
+            cores_list = [cores for cores in [2, 4, 6] if vcpus_num % cores == 0]
+            sockets_list = [vcpus_num // cores for cores in cores_list]
+
+    if not sockets_list or not cores_list:
+        test.error("The number of sockets or cores is not valid")
+    elif (len(cores_list) == 1):
+        # len(sockets_list) will also be 1
+        set_and_check_topology(test, params, env, vcpus_num, sockets_list[0], cores_list[0])
+    else:
+        for i, cores in enumerate(cores_list):
+            if (cores == 0):
+                continue
+            if (len(sockets_list) == 1):
+                if clusters_list:
+                    set_and_check_topology(test, params, env, vcpus_num, sockets_list[0], cores, clusters_list[i])
+                else:
+                    set_and_check_topology(test, params, env, vcpus_num, sockets_list[0], cores)
+            else:
+                if (sockets_list[i] == 0):
+                    continue
+                if clusters_list:
+                    set_and_check_topology(test, params, env, vcpus_num, sockets_list[i], cores, clusters_list[i])
+                else:
+                    set_and_check_topology(test, params, env, vcpus_num, sockets_list[i], cores)
+
+
+def set_and_check_topology(test, params, env, vcpus_num, sockets, cores, clusters=1):
+    '''
+    Perform steps 2-3 for each vm topology configuration
+    '''
+    vm_name = params.get("main_vm")
+    vm = env.get_vm(vm_name)
+    vcpus_placement = params.get("vcpus_placement", "static")
+
+    # Back up domain XML
+    vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name)
+    vmxml_backup = vmxml.copy()
+
+    try:
+        # Modify vm
+        if vm.is_alive():
+            vm.destroy()
+        vmxml.placement = vcpus_placement
+
+        if clusters > 1:
+            # Set manually
+            vmcpu_xml = vmxml["cpu"]
+            vmcpu_xml["topology"] = {
+                "sockets": str(sockets),
+                "clusters": str(clusters),
+                "cores": str(cores),
+                "threads": "1"}
+            vmxml["cpu"] = vmcpu_xml
+            vmxml["vcpu"] = vcpus_num
+            vmxml.sync()
+
+        else:
+            # Set using set_vm_vcpus helper function
+            vmxml.set_vm_vcpus(
+                vm_name,
+                vcpus_num,
+                sockets=sockets,
+                cores=cores,
+                threads=1,
+                add_topology=True
+            )
+        logging.debug("Defined guest with '%s' vcpu(s), '%s' socket(s), and '%s' core(s), and '%s' cluster(s)",
+                      str(vcpus_num), str(sockets), str(cores), str(clusters))
+
+        # Start guest agent in vm
+        try:
+            vm.prepare_guest_agent()
+        except virt_vm.VMStartError as info:
+            if "not supported" in str(info).lower():
+                test.cancel(info)
+            else:
+                test.error(info)
+
+        # Check lscpu output within the vm is consistent with the topology configured
+
+        # Check the topology in the guest
+        session = vm.wait_for_login()
+        lscpu_output = cpuutil.get_cpu_info(session)
+        # get_cpu_info() should close the session
+        session.close()
+
+        lscpu_check_fail = "The configured topology is not consistent with the lscpu output within the vm for "
+        if (str(vcpus_num) != lscpu_output["CPU(s)"]):
+            test.fail(lscpu_check_fail + "CPU(s)")
+        elif (('0' + '-' + str(vcpus_num - 1)) != lscpu_output["On-line CPU(s) list"]):
+            test.fail(lscpu_check_fail + "on-line CPU(s) list")
+        elif ("1" != lscpu_output["Thread(s) per core"]):
+            test.fail(lscpu_check_fail + "thread(s) per core")
+        elif (str(sockets) != lscpu_output["Socket(s)"]):
+            test.fail(lscpu_check_fail + "socket(s)")
+        elif (str(cores * clusters) != lscpu_output["Core(s) per socket"]):
+            test.fail(lscpu_check_fail + "core(s) per socket")
+
+        session = vm.wait_for_login()
+
+        # Check kernel file for core id for each vcpu in the vm
+
+        for vcpu in range(vcpus_num):
+            cmd_coreid = f'cat /sys/devices/system/cpu/cpu{vcpu}/topology/core_id'
+            ret_coreid = session.cmd_output(cmd_coreid, timeout=100).strip()
+            if (str(vcpu) != ret_coreid):
+                test.fail("In the vm kernel file, the core id for vcpu %s should not be %s" % (vcpu, ret_coreid))
+
+        # Check vcpu cluster number
+
+        cmd_clusterid = 'cat /sys/devices/system/cpu/cpu*/topology/cluster_id | sort | uniq -c | wc -l'
+        ret_clusterid = session.cmd_output(cmd_clusterid, timeout=100).strip()
+        # The result should be equal to defined clusters' value
+        if (str(sockets * clusters) != ret_clusterid):
+            test.fail("In the vm kernel file, the vcpu cluster number should be %s, not %s" % (str(sockets * clusters), ret_clusterid))
+
+        # Check cluster cpu list
+
+        cmd_cluster_cpu_list = 'cat /sys/devices/system/cpu/cpu*/topology/cluster_cpus_list | sort | uniq -c | wc -l'
+        ret_cluster_cpu_list = session.cmd_output(cmd_cluster_cpu_list, timeout=100).strip()
+        # The result should be equal to #sockets * #clusters
+        if (str(sockets * clusters) != ret_cluster_cpu_list):
+            test.fail("In the vm kernel file, the cluster cpu list should be %s, not %s" % (str(sockets * clusters), ret_cluster_cpu_list))
+
+        session.close()
+
+    finally:
+        # Recover VM
+        if vm.is_alive():
+            vm.destroy(gracefully=False)
+        logging.info("Restoring vm...")
+        vmxml_backup.sync()