diff --git a/qemu/tests/cfg/numa.cfg b/qemu/tests/cfg/numa.cfg index 03c073e7667..d497dc5dd0c 100644 --- a/qemu/tests/cfg/numa.cfg +++ b/qemu/tests/cfg/numa.cfg @@ -3,6 +3,10 @@ kill_vm_on_error = yes login_timeout = 240 numa_hardware_cmd = "numactl --hardware" + mem_ratio = 0.6 + mem_map_tool = "mem_mapping.tar.gz" + stress_cmds_mem_mapping = "./mem_mapping" + make_cmds_mem_mapping = "gcc mem_mapping.c -o ${stress_cmds_mem_mapping}" variants: - numa_basic: vms = "" @@ -11,22 +15,19 @@ only Linux type = numa_consistency start_vm = no - threshold = 0.05 - ppc64,ppc64le: - threshold = 0.15 + mem_fixed = 4096 + vm_mem_backend = "memory-backend-ram" + mem_ratio = 0.3 + guest_stress_args = "-a -p -l %sM" + vm_mem_policy = bind - numa_stress: only Linux no ppc64 ppc64le type = numa_stress del stress_args mem = 8192 - mem_ratio = 0.6 - cmd_cp_mmap_tool = "/bin/cp -rf %s /var/tmp/ && cd /var/tmp/ && tar zxvf mem_mapping.tar.gz" - make_cmds_mem_mapping = "gcc mem_mapping.c -o mem_mapping" - cmd_mmap = "cd /var/tmp/mem_mapping && ${make_cmds_mem_mapping} && numactl -m %s ./mem_mapping -a -p -l %dK &" - cmd_mmap_cleanup = "rm -rf /var/tmp/mem_mapping*" + cmd_cp_mmap_tool = "/bin/cp -rf %s %s && cd %s && tar zxvf ${mem_map_tool}" + cmd_mmap = "cd %s/mem_mapping && ${make_cmds_mem_mapping} && numactl -m %s ${stress_cmds_mem_mapping} -a -p -l %dK &" + cmd_mmap_cleanup = "rm -rf %s/mem_mapping*" cmd_mmap_stop = "pkill -9 mem_mapping" cmd_migrate_pages = "migratepages %s %s %s" - mem_map_tool = "mem_mapping.tar.gz" - stress_cmds_mem_mapping = "./mem_mapping" - uninstall_cmds_mem_mapping = "rm -rf /home/mem_mapping*" diff --git a/qemu/tests/numa_consistency.py b/qemu/tests/numa_consistency.py index 1e1eb602ce8..a552678d5ad 100644 --- a/qemu/tests/numa_consistency.py +++ b/qemu/tests/numa_consistency.py @@ -1,125 +1,99 @@ -import resource +import os -from avocado.utils import process - -from virttest import cpu +from virttest import data_dir from virttest import env_process from virttest import error_context from virttest import utils_misc from virttest import utils_test -from virttest import arch from virttest.staging import utils_memory +def get_node_used_memory(qemu_pid, node): + """ + Return the memory used by the NUMA node + + :param qemu_pid: the process id of qemu-kvm + :param node: the NUMA node + """ + qemu_memory_status = utils_memory.read_from_numa_maps(qemu_pid, + "N%d" % node) + used_memory = sum([int(_) for _ in list(qemu_memory_status.values())]) + return used_memory + + @error_context.context_aware def run(test, params, env): """ Qemu numa consistency test: 1) Get host numa topological structure - 2) Start a guest with the same node as the host, each node has one cpu - 3) Get the vcpu thread used cpu id in host and the cpu belongs which node - 4) Allocate memory inside guest and bind the allocate process to one of - its vcpu. - 5) The memory used in host should increase in the same node if the vcpu - thread is not switch to other node. - 6) Repeat step 3~5 for each vcpu thread of the guest. + 2) Start a guest with the same number of nodes as the host + 3) Allocate memory for every node inside the guest + 4) The memory used in host should increase for the corresponding + node :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ - def get_vcpu_used_node(numa_node_info, vcpu_thread): - cpu_used_host = cpu.get_thread_cpu(vcpu_thread)[0] - node_used_host = ([_ for _ in node_list if cpu_used_host - in numa_node_info.nodes[_].cpus][0]) - return node_used_host - error_context.context("Get host numa topological structure", test.log.info) - timeout = float(params.get("login_timeout", 240)) host_numa_node = utils_misc.NumaInfo() node_list = host_numa_node.online_nodes_withcpumem if len(node_list) < 2: - test.cancel("This host only has one NUMA node, skipping test...") - node_list.sort() - params['smp'] = len(node_list) - params['vcpu_cores'] = 1 - params['vcpu_threads'] = 1 - params['vcpu_sockets'] = params['smp'] - params['vcpu_maxcpus'] = params['smp'] - params['guest_numa_nodes'] = "" - params['mem_devs'] = "" - params['backend_mem'] = "memory-backend-ram" - params['use_mem'] = "no" - params['size_mem'] = "1024M" - if arch.ARCH in ('ppc64', 'ppc64le'): - params['size_mem'] = "4096M" - params['mem'] = int(params['size_mem'].strip('M')) * len(node_list) - for node_id in range(len(node_list)): - params['guest_numa_nodes'] += " node%d" % node_id - params['mem_devs'] += "mem%d " % node_id - params['numa_memdev_node%d' % node_id] = "mem-mem%d" % node_id - params['start_vm'] = 'yes' + test.cancel("Host only has one NUMA node, skipping test...") - utils_memory.drop_caches() - vm = params['main_vm'] - env_process.preprocess_vm(test, params, env, vm) - vm = env.get_vm(vm) - vm.verify_alive() - vcpu_threads = vm.vcpu_threads - session = vm.wait_for_login(timeout=timeout) - threshold = params.get_numeric("threshold", target_type=float) - - dd_size = 256 - if dd_size * len(vcpu_threads) > int(params['mem']): - dd_size = int(int(params['mem']) / 2 / len(vcpu_threads)) - - mount_size = dd_size * len(vcpu_threads) + node_alloc = node_list[0] + node_mem_alloc = int(host_numa_node.read_from_node_meminfo(node_alloc, + 'MemFree')) + # Get the node with more free memory + for node in node_list[1:]: + node_mem_free = int(host_numa_node.read_from_node_meminfo(node, + 'MemFree')) + if node_mem_free > node_mem_alloc: + node_mem_alloc = node_mem_free + node_alloc = node - mount_cmd = "mount -o size=%dM -t tmpfs none /tmp" % mount_size + mem_map_tool = params.get("mem_map_tool") + mem_ratio = params.get_numeric("mem_ratio", 0.3, float) + timeout = params.get_numeric("login_timeout", 240, float) + params["vm_mem_host_nodes"] = str(node_alloc) + params["qemu_command_prefix"] = "numactl -m %d " % node_alloc + params["start_vm"] = "yes" + vm_name = params['main_vm'] + env_process.preprocess_vm(test, params, env, vm_name) + vm = env.get_vm(vm_name) + vm.verify_alive() + session = vm.wait_for_login(timeout=timeout) qemu_pid = vm.get_pid() - drop = 0 - for cpuid in range(len(vcpu_threads)): - error_context.context("Get vcpu %s used numa node." % cpuid, - test.log.info) - memory_status, _ = utils_test.qemu.get_numa_status(host_numa_node, - qemu_pid) - node_used_host = get_vcpu_used_node(host_numa_node, - vcpu_threads[cpuid]) - node_used_host_index = node_list.index(node_used_host) - memory_used_before = memory_status[node_used_host_index] - error_context.context("Allocate memory in guest", test.log.info) - session.cmd(mount_cmd) - binded_dd_cmd = "taskset %s" % str(2 ** int(cpuid)) - binded_dd_cmd += " dd if=/dev/urandom of=/tmp/%s" % cpuid - binded_dd_cmd += " bs=1M count=%s" % dd_size - session.cmd(binded_dd_cmd) - error_context.context("Check qemu process memory use status", - test.log.info) - node_after = get_vcpu_used_node(host_numa_node, vcpu_threads[cpuid]) - if node_after != node_used_host: - test.log.warn("Node used by vcpu thread changed. So drop the" - " results in this round.") - drop += 1 - continue - memory_status, _ = utils_test.qemu.get_numa_status(host_numa_node, - qemu_pid) - memory_used_after = memory_status[node_used_host_index] - page_size = resource.getpagesize() / 1024 - memory_allocated = (memory_used_after - - memory_used_before) * page_size / 1024 - if 1 - float(memory_allocated) / float(dd_size) > threshold: - numa_hardware_cmd = params.get("numa_hardware_cmd") - if numa_hardware_cmd: - numa_info = process.system_output(numa_hardware_cmd, - ignore_status=True, - shell=True) - msg = "Expect malloc %sM memory in node %s," % (dd_size, - node_used_host) - msg += "but only malloc %sM \n" % memory_allocated - msg += "Please check more details of the numa node: %s" % numa_info - test.fail(msg) - session.close() - - if drop == len(vcpu_threads): - test.error("All test rounds are dropped. Please test it again.") + try: + test_mem = float(params.get("mem")) * mem_ratio + guest_stress_args = params.get("guest_stress_args", "-a -p -l %sM") + guest_stress_args = guest_stress_args % int(test_mem) + stress_path = os.path.join(data_dir.get_deps_dir('mem_mapping'), + mem_map_tool) + utils_memory.drop_caches() + for node in node_list: + error_context.base_context("Get the qemu memory use for node: %d before stress" + % node_alloc, test.log.info) + memory_before = get_node_used_memory(qemu_pid, node_alloc) + try: + error_context.context("Executing stress test in node: %d" + % node, test.log.info) + guest_stress = utils_test.VMStress(vm, "mem_mapping", params, + download_url=stress_path, + stress_args=guest_stress_args) + guest_stress.load_stress_tool() + except utils_test.StressError as guest_info: + test.error(guest_info) + guest_stress.unload_stress() + guest_stress.clean() + utils_memory.drop_caches() + error_context.context("Get the qemu memory used in node: %d after stress" + % node_alloc, test.log.debug) + memory_after = get_node_used_memory(qemu_pid, node_alloc) + error_context.context("memory_before %d, memory_after: %d" + % (memory_before, memory_after), test.log.debug) + if memory_after <= memory_before: + test.error("Memory usage has not increased after the allocation!") + finally: + session.close() diff --git a/qemu/tests/numa_stress.py b/qemu/tests/numa_stress.py index d02df211492..fa52d5c6d8a 100644 --- a/qemu/tests/numa_stress.py +++ b/qemu/tests/numa_stress.py @@ -51,9 +51,10 @@ def run(test, params, env): if len(host_numa_node.online_nodes) < 2: test.cancel("Host only has one NUMA node, skipping test...") + tmp_directory = "/var/tmp" mem_map_tool = params.get("mem_map_tool") cmd_cp_mmap_tool = params.get("cmd_cp_mmap_tool") - cmd_mmap_cleanup = params.get("cmd_mmap_cleanup") + cmd_mmap_cleanup = params.get("cmd_mmap_cleanup") % tmp_directory cmd_mmap_stop = params.get("cmd_mmap_stop") cmd_migrate_pages = params.get("cmd_migrate_pages") mem_ratio = params.get_numeric("mem_ratio", 0.6, float) @@ -73,7 +74,7 @@ def run(test, params, env): guest_stress_args = "-a -p -l %sM" % int(test_mem) stress_path = os.path.join(data_dir.get_deps_dir('mem_mapping'), mem_map_tool) test.log.info("Compile the mem_mapping tool") - cmd_cp_mmap_tool = cmd_cp_mmap_tool % stress_path + cmd_cp_mmap_tool = cmd_cp_mmap_tool % (stress_path, tmp_directory, tmp_directory) process.run(cmd_cp_mmap_tool, shell=True) utils_memory.drop_caches() for test_round in range(test_count): @@ -84,7 +85,7 @@ def run(test, params, env): most_used_node, memory_used = max_mem_map_node(host_numa_node, qemu_pid) numa_node_malloc = most_used_node mmap_size = math.floor(float(node_meminfo(numa_node_malloc, 'MemTotal')) * mem_ratio) - cmd_mmap = cmd_mmap % (numa_node_malloc, mmap_size) + cmd_mmap = cmd_mmap % (tmp_directory, numa_node_malloc, mmap_size) error_context.context("Run mem_mapping on host node " "%s." % numa_node_malloc, test.log.info) process.system(cmd_mmap, shell=True, ignore_bg_processes=True)