From c08c8a1676179dafa5f40c7519066697f46e961f Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:52 +0530 Subject: [PATCH 01/26] x86/apic: Add new driver for Secure AVIC The Secure AVIC feature provides SEV-SNP guests hardware acceleration for performance sensitive APIC accesses while securely managing the guest-owned APIC state through the use of a private APIC backing page. This helps prevent malicious hypervisor from generating unexpected interrupts for a vCPU or otherwise violate architectural assumptions around APIC behavior. Add a new x2APIC driver that will serve as the base of the Secure AVIC support. It is initially the same as the x2APIC phys driver, but will be modified as features of Secure AVIC are implemented. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/Kconfig | 12 +++ arch/x86/boot/compressed/sev.c | 1 + arch/x86/coco/core.c | 3 + arch/x86/include/asm/msr-index.h | 4 +- arch/x86/kernel/apic/Makefile | 1 + arch/x86/kernel/apic/x2apic_savic.c | 112 ++++++++++++++++++++++++++++ include/linux/cc_platform.h | 8 ++ 7 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/apic/x2apic_savic.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07ee295368d4..509b063af66e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -473,6 +473,18 @@ config X86_X2APIC If you don't know what to do here, say N. +config AMD_SECURE_AVIC + bool "AMD Secure AVIC" + depends on X86_X2APIC && AMD_MEM_ENCRYPT + help + This enables AMD Secure AVIC support on guests that have this feature. + + AMD Secure AVIC provides hardware acceleration for performance sensitive + APIC accesses and support for managing guest owned APIC state for SEV-SNP + guests. + + If you don't know what to do here, say N. + config X86_POSTED_MSI bool "Enable MSI and MSI-x delivery by posted interrupts" depends on X86_64 && IRQ_REMAP diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index af39855a390a..d92fc9fa6e9f 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -394,6 +394,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) MSR_AMD64_SNP_VMSA_REG_PROT | \ MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_SECURE_AVIC_ENABLED | \ MSR_AMD64_SNP_RESERVED_MASK) /* diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 0f81f70aca82..4c3bc031e9a9 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -100,6 +100,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_HOST_SEV_SNP: return cc_flags.host_sev_snp; + case CC_ATTR_SNP_SECURE_AVIC: + return sev_status & MSR_AMD64_SNP_SECURE_AVIC_ENABLED; + default: return false; } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ae84c3b8e6d..3dcb2a2a4683 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -680,7 +680,9 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_ENABLED BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 3bf0487cf3b7..12153993c12b 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y) # APIC probe will depend on the listing order here obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c new file mode 100644 index 000000000000..97dac09a7f42 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure AVIC Support (SEV-SNP Guests) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Author: Kishon Vijay Abraham I + */ + +#include +#include + +#include +#include + +#include "local.h" + +static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); +} + +static void x2apic_savic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); +} + +static void +__send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) +{ + unsigned long query_cpu; + unsigned long this_cpu; + unsigned long flags; + + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + for_each_cpu(query_cpu, mask) { + if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_savic_send_IPI_mask(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); +} + +static int x2apic_savic_probe(void) +{ + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return 0; + + if (!x2apic_mode) { + pr_err("Secure AVIC enabled in non x2APIC mode\n"); + snp_abort(); + } + + pr_info("Secure AVIC Enabled\n"); + + return 1; +} + +static struct apic apic_x2apic_savic __ro_after_init = { + + .name = "secure avic x2apic", + .probe = x2apic_savic_probe, + .acpi_madt_oem_check = x2apic_savic_acpi_madt_oem_check, + + .dest_mode_logical = false, + + .disable_esr = 0, + + .cpu_present_to_apicid = default_cpu_present_to_apicid, + + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, + .get_apic_id = x2apic_get_apic_id, + + .calc_dest_apicid = apic_default_calc_apicid, + + .send_IPI = x2apic_savic_send_IPI, + .send_IPI_mask = x2apic_savic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_savic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + .nmi_to_offline_cpu = true, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .eoi = native_apic_msr_eoi, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, +}; + +apic_driver(apic_x2apic_savic); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..801208678450 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -88,6 +88,14 @@ enum cc_attr { * enabled to run SEV-SNP guests. */ CC_ATTR_HOST_SEV_SNP, + + /** + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. + * + * The host kernel is running with the necessary features enabled + * to run SEV-SNP guests with full Secure AVIC capabilities. + */ + CC_ATTR_SNP_SECURE_AVIC, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM From 60667e2ec578e7425972465ce5e208e7048be1d3 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:53 +0530 Subject: [PATCH 02/26] x86/apic: Initialize Secure AVIC APIC backing page With Secure AVIC, the APIC backing page is owned and managed by guest. Allocate APIC backing page for all guest CPUs. In addition, add a setup() APIC callback. This callback is used by Secure AVIC driver to initialize APIC backing page area for each CPU. Allocate APIC backing page memory area in chunks of 2M, so that backing page memory is mapped using full huge pages. Without this, if there are private to shared page state conversions for any non-backing-page allocation which is part of the same huge page as the one containing a backing page, hypervisor splits the huge page into 4K pages. Splitting of APIC backing page area into individual 4K pages can result in performance impact, due to TLB pressure. Secure AVIC requires that vCPU's APIC backing page's NPT entry is always present while that vCPU is running. If APIC backing page's NPT entry is not present, a VMEXIT_BUSY is returned on VMRUN and the vCPU cannot be resumed after that point. To handle this, invoke sev_notify_savic_gpa() in Secure AVIC driver's setup() callback. This triggers SVM_VMGEXIT_SECURE_ AVIC_GPA exit for the hypervisor to note GPA of the vCPU's APIC backing page. Hypervisor uses this information to ensure that the APIC backing page is mapped in NPT before invoking VMRUN. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 22 +++++++++++++++++ arch/x86/include/asm/apic.h | 1 + arch/x86/include/asm/sev.h | 2 ++ arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kernel/apic/apic.c | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 38 +++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index a0b73e6ed747..1a7b322c1e95 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1358,6 +1358,28 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +enum es_result sev_notify_savic_gpa(u64 gpa) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SECURE_AVIC_GPA, gpa, 0); + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + return ret; +} + static void snp_register_per_cpu_ghcb(void) { struct sev_es_runtime_data *data; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 33f677e2db75..c877378c7841 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -305,6 +305,7 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); + void (*setup)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); void (*init_apic_ldr)(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 833954e5aade..2b433d433231 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -414,6 +414,7 @@ u64 sev_get_status(void); void sev_show_status(void); void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); +enum es_result sev_notify_savic_gpa(u64 gpa); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -451,6 +452,7 @@ static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } +static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..0f21cea6d21c 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,7 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SECURE_AVIC_GPA 0x8000001a #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c5fb28e6451a..85d2d53d6d06 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1504,6 +1504,8 @@ static void setup_local_APIC(void) return; } + if (apic->setup) + apic->setup(); /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 97dac09a7f42..d903c35b8b64 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -9,12 +9,16 @@ #include #include +#include #include #include #include "local.h" +static DEFINE_PER_CPU(void *, apic_backing_page); +static DEFINE_PER_CPU(bool, savic_setup_done); + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -61,8 +65,30 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void x2apic_savic_setup(void) +{ + void *backing_page; + enum es_result ret; + unsigned long gpa; + + if (this_cpu_read(savic_setup_done)) + return; + + backing_page = this_cpu_read(apic_backing_page); + gpa = __pa(backing_page); + ret = sev_notify_savic_gpa(gpa); + if (ret != ES_OK) + snp_abort(); + this_cpu_write(savic_setup_done, true); +} + static int x2apic_savic_probe(void) { + void *backing_pages; + unsigned int cpu; + size_t sz; + int i; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) return 0; @@ -71,6 +97,17 @@ static int x2apic_savic_probe(void) snp_abort(); } + sz = ALIGN(num_possible_cpus() * SZ_4K, SZ_2M); + backing_pages = kzalloc(sz, GFP_ATOMIC); + if (!backing_pages) + snp_abort(); + + i = 0; + for_each_possible_cpu(cpu) { + per_cpu(apic_backing_page, cpu) = backing_pages + i * SZ_4K; + i++; + } + pr_info("Secure AVIC Enabled\n"); return 1; @@ -81,6 +118,7 @@ static struct apic apic_x2apic_savic __ro_after_init = { .name = "secure avic x2apic", .probe = x2apic_savic_probe, .acpi_madt_oem_check = x2apic_savic_acpi_madt_oem_check, + .setup = x2apic_savic_setup, .dest_mode_logical = false, From 82cbff817a4f51992e06252ad4b1e3dfabf6b7b5 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:06:54 +0530 Subject: [PATCH 03/26] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver The x2APIC registers are mapped at an offset within the guest APIC backing page which is same as their x2APIC MMIO offset. Secure AVIC adds new registers such as ALLOWED_IRRs (which are at 4-byte offset within the IRR register offset range) and NMI_REQ to the APIC register space. In addition, the APIC_ID register is writable and configured by guest. Add read() and write() APIC callback functions to read and write x2APIC registers directly from the guest APIC backing page. The default .read()/.write() callbacks of x2APIC drivers perform a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled, these would result in #VC exception (for non-accelerated register accesses). The #VC exception handler reads/write the x2APIC register in the guest APIC backing page. Since this would increase the latency of accessing x2APIC registers, the read() and write() callbacks of Secure AVIC driver directly reads/writes to the guest APIC backing page. Co-developed-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/apicdef.h | 2 + arch/x86/kernel/apic/x2apic_savic.c | 107 +++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 094106b6a538..be39a543fbe5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -135,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index d903c35b8b64..6a471bbc3dba 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); } +static inline u32 get_reg(char *page, int reg_off) +{ + return READ_ONCE(*((u32 *)(page + reg_off))); +} + +static inline void set_reg(char *page, int reg_off, u32 val) +{ + WRITE_ONCE(*((u32 *)(page + reg_off)), val); +} + +#define SAVIC_ALLOWED_IRR_OFFSET 0x204 + +static u32 x2apic_savic_read(u32 reg) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return get_reg(backing_page, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); + return get_reg(backing_page, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Either aligned at 16 bytes for valid IRR reg offset or a + * valid Secure AVIC ALLOWED_IRR offset. + */ + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); + return get_reg(backing_page, reg); + default: + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ_OFFSET 0x278 + +static void x2apic_savic_write(u32 reg, u32 data) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: + case APIC_TMICT: + case APIC_TDCR: + case APIC_SELF_IPI: + /* APIC_ID is writable and configured by guest for Secure AVIC */ + case APIC_ID: + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ_OFFSET: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + set_reg(backing_page, reg, data); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: + if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { + set_reg(backing_page, reg, data); + break; + } + fallthrough; + default: + pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg); + } +} + static void x2apic_savic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); @@ -140,8 +243,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .send_IPI_self = x2apic_send_IPI_self, .nmi_to_offline_cpu = true, - .read = native_apic_msr_read, - .write = native_apic_msr_write, + .read = x2apic_savic_read, + .write = x2apic_savic_write, .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, From 03b3f7c68a5d755e261b508823d29ce853f49cc0 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:55 +0530 Subject: [PATCH 04/26] x86/apic: Initialize APIC backing page for Secure AVIC Secure AVIC lets guest manage the APIC backing page (unlike emulated x2APIC or x2AVIC where the hypervisor manages the APIC backing page). However the introduced Secure AVIC Linux design still maintains the APIC backing page in the hypervisor to shadow the APIC backing page maintained by guest (It should be noted only subset of the registers are shadowed for specific usecases and registers like APIC_IRR, APIC_ISR are not shadowed). Add sev_ghcb_msr_read() to invoke "SVM_EXIT_MSR" VMGEXIT to read MSRs from hypervisor. Initialize the Secure AVIC's APIC backing page by copying the initial state of shadow APIC backing page in the hypervisor to the guest APIC backing page. Specifically copy APIC_LVR, APIC_LDR, and APIC_LVT MSRs from the shadow APIC backing page. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 41 ++++++++++++++++----- arch/x86/include/asm/sev.h | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 55 +++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 8 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 1a7b322c1e95..ce88a8281074 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1322,18 +1322,15 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd) return 0; } -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) { struct pt_regs *regs = ctxt->regs; + u64 exit_info_1 = write ? 1 : 0; enum es_result ret; - u64 exit_info_1; - - /* Is it a WRMSR? */ - exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; if (regs->cx == MSR_SVSM_CAA) { /* Writes to the SVSM CAA msr are ignored */ - if (exit_info_1) + if (write) return ES_OK; regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); @@ -1343,14 +1340,14 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) } ghcb_set_rcx(ghcb, regs->cx); - if (exit_info_1) { + if (write) { ghcb_set_rax(ghcb, regs->ax); ghcb_set_rdx(ghcb, regs->dx); } ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); - if ((ret == ES_OK) && (!exit_info_1)) { + if (ret == ES_OK && !write) { regs->ax = ghcb->save.rax; regs->dx = ghcb->save.rdx; } @@ -1358,6 +1355,34 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + return __vc_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30); +} + +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) +{ + struct pt_regs regs = { .cx = msr }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, false); + if (ret == ES_OK) + *value = regs.ax | regs.dx << 32; + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + enum es_result sev_notify_savic_gpa(u64 gpa) { struct ghcb_state state; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 2b433d433231..f1a55ec63106 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -415,6 +415,7 @@ void sev_show_status(void); void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); enum es_result sev_notify_savic_gpa(u64 gpa); +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -453,6 +454,7 @@ static inline void sev_show_status(void) { } static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 6a471bbc3dba..99151be4e173 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,19 @@ static DEFINE_PER_CPU(void *, apic_backing_page); static DEFINE_PER_CPU(bool, savic_setup_done); +enum lapic_lvt_entry { + LVT_TIMER, + LVT_THERMAL_MONITOR, + LVT_PERFORMANCE_COUNTER, + LVT_LINT0, + LVT_LINT1, + LVT_ERROR, + + APIC_MAX_NR_LVT_ENTRIES, +}; + +#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -35,6 +49,22 @@ static inline void set_reg(char *page, int reg_off, u32 val) WRITE_ONCE(*((u32 *)(page + reg_off)), val); } +static u32 read_msr_from_hv(u32 reg) +{ + u64 data, msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_read(msr, &data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); + /* MSR read failures are treated as fatal errors */ + snp_abort(); + } + + return lower_32_bits(data); +} + #define SAVIC_ALLOWED_IRR_OFFSET 0x204 static u32 x2apic_savic_read(u32 reg) @@ -168,6 +198,30 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void init_backing_page(void *backing_page) +{ + u32 val; + int i; + + val = read_msr_from_hv(APIC_LVR); + set_reg(backing_page, APIC_LVR, val); + + /* + * Hypervisor is used for all timer related functions, + * so don't copy those values. + */ + for (i = LVT_THERMAL_MONITOR; i < APIC_MAX_NR_LVT_ENTRIES; i++) { + val = read_msr_from_hv(APIC_LVTx(i)); + set_reg(backing_page, APIC_LVTx(i), val); + } + + val = read_msr_from_hv(APIC_LVT0); + set_reg(backing_page, APIC_LVT0, val); + + val = read_msr_from_hv(APIC_LDR); + set_reg(backing_page, APIC_LDR, val); +} + static void x2apic_savic_setup(void) { void *backing_page; @@ -178,6 +232,7 @@ static void x2apic_savic_setup(void) return; backing_page = this_cpu_read(apic_backing_page); + init_backing_page(backing_page); gpa = __pa(backing_page); ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) From 69707ef43a0d2a7d0a485b788c8c465be1493e75 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:06:56 +0530 Subject: [PATCH 05/26] x86/apic: Initialize APIC ID for Secure AVIC Initialize the APIC ID in the APIC backing page with the CPUID function 0000_000bh_EDX (Extended Topology Enumeration), and ensure that APIC ID msr read from hypervisor is consistent with the value read from CPUID. Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 99151be4e173..09fbc1857bf3 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -14,6 +14,7 @@ #include #include +#include #include #include "local.h" @@ -200,6 +201,8 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in static void init_backing_page(void *backing_page) { + u32 hv_apic_id; + u32 apic_id; u32 val; int i; @@ -220,6 +223,13 @@ static void init_backing_page(void *backing_page) val = read_msr_from_hv(APIC_LDR); set_reg(backing_page, APIC_LDR, val); + + /* Read APIC ID from Extended Topology Enumeration CPUID */ + apic_id = cpuid_edx(0x0000000b); + hv_apic_id = read_msr_from_hv(APIC_ID); + WARN_ONCE(hv_apic_id != apic_id, "Inconsistent APIC_ID values: %d (cpuid), %d (msr)", + apic_id, hv_apic_id); + set_reg(backing_page, APIC_ID, apic_id); } static void x2apic_savic_setup(void) From cdcea19e106c3d3a3eeb205430be9a274024deff Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:57 +0530 Subject: [PATCH 06/26] x86/apic: Add update_vector callback for Secure AVIC Add update_vector callback to set/clear ALLOWED_IRR field in the APIC backing page. The allowed IRR vector indicates the interrupt vectors which the guest allows the hypervisor to send (typically for emulated devices). ALLOWED_IRR is meant to be used specifically for vectors that the hypervisor is allowed to inject, such as device interrupts. Interrupt vectors used exclusively by the guest itself (like IPI vectors) should not be allowed to be injected into the guest for security reasons. The update_vector callback is invoked from APIC vector domain whenever a vector is allocated, freed or moved. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/apic/vector.c | 8 ++++++++ arch/x86/kernel/apic/x2apic_savic.c | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c877378c7841..1ab0e22a7187 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -318,6 +318,8 @@ struct apic { /* wakeup secondary CPU using 64-bit wakeup point */ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); + char *name; }; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 557318145038..5aa65a732b05 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, managed); } @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, if (irqd_is_activated(irqd)) { trace_vector_setup(virq, true, 0); apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } else { /* Release the vector */ apicd->can_reserve = true; irqd_set_can_reserve(irqd); clear_irq_vector(irqd); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); realloc = true; } raw_spin_unlock_irqrestore(&vector_lock, flags); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 09fbc1857bf3..a9e54c1c6446 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -19,6 +19,9 @@ #include "local.h" +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) + static DEFINE_PER_CPU(void *, apic_backing_page); static DEFINE_PER_CPU(bool, savic_setup_done); @@ -199,6 +202,22 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + void *backing_page; + unsigned long *reg; + int reg_off; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = SAVIC_ALLOWED_IRR_OFFSET + REG_POS(vector); + reg = (unsigned long *)((char *)backing_page + reg_off); + + if (set) + test_and_set_bit(VEC_POS(vector), reg); + else + test_and_clear_bit(VEC_POS(vector), reg); +} + static void init_backing_page(void *backing_page) { u32 hv_apic_id; @@ -313,6 +332,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, + + .update_vector = x2apic_savic_update_vector, }; apic_driver(apic_x2apic_savic); From fef534bec2b45498d85f813f90b50cb92d3b47b2 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:58 +0530 Subject: [PATCH 07/26] x86/apic: Add support to send IPI for Secure AVIC With Secure AVIC only Self-IPI is accelerated. To handle all the other IPIs, add new callbacks for sending IPI, which write to the IRR of the target guest APIC backing page (after decoding the ICR register) and then issue VMGEXIT for the hypervisor to notify the target vCPU. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 25 +++++ arch/x86/include/asm/sev.h | 2 + arch/x86/kernel/apic/x2apic_savic.c | 152 +++++++++++++++++++++++++--- 3 files changed, 166 insertions(+), 13 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index ce88a8281074..d6fa563ac9ec 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1383,6 +1383,31 @@ enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) return ret; } +enum es_result sev_ghcb_msr_write(u64 msr, u64 value) +{ + struct pt_regs regs = { + .cx = msr, + .ax = lower_32_bits(value), + .dx = upper_32_bits(value) + }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, true); + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + enum es_result sev_notify_savic_gpa(u64 gpa) { struct ghcb_state state; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index f1a55ec63106..ced3d8014ef4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -416,6 +416,7 @@ void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); enum es_result sev_notify_savic_gpa(u64 gpa); enum es_result sev_ghcb_msr_read(u64 msr, u64 *value); +enum es_result sev_ghcb_msr_write(u64 msr, u64 value); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -455,6 +456,7 @@ static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_write(u64 msr, u64 value) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index a9e54c1c6446..30a24b70e5cb 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -69,6 +69,20 @@ static u32 read_msr_from_hv(u32 reg) return lower_32_bits(data); } +static void write_msr_to_hv(u32 reg, u64 data) +{ + u64 msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_write(msr, data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) write returned error (%d)\n", msr, ret); + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ + snp_abort(); + } +} + #define SAVIC_ALLOWED_IRR_OFFSET 0x204 static u32 x2apic_savic_read(u32 reg) @@ -124,6 +138,7 @@ static u32 x2apic_savic_read(u32 reg) static void x2apic_savic_write(u32 reg, u32 data) { void *backing_page = this_cpu_read(apic_backing_page); + unsigned int cfg; switch (reg) { case APIC_LVTT: @@ -131,7 +146,6 @@ static void x2apic_savic_write(u32 reg, u32 data) case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: - case APIC_SELF_IPI: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: @@ -149,6 +163,11 @@ static void x2apic_savic_write(u32 reg, u32 data) case APIC_EILVTn(0) ... APIC_EILVTn(3): set_reg(backing_page, reg, data); break; + /* Self IPIs are accelerated by hardware, use wrmsr */ + case APIC_SELF_IPI: + cfg = __prepare_ICR(APIC_DEST_SELF, data, 0); + native_x2apic_icr_write(cfg, 0); + break; /* ALLOWED_IRR offsets are writable */ case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { @@ -161,13 +180,100 @@ static void x2apic_savic_write(u32 reg, u32 data) } } +static void send_ipi(int cpu, int vector) +{ + void *backing_page; + int reg_off; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = APIC_IRR + REG_POS(vector); + /* + * Use test_and_set_bit() to ensure that IRR updates are atomic w.r.t. other + * IRR updates such as during VMRUN and during CPU interrupt handling flow. + */ + test_and_set_bit(VEC_POS(vector), (unsigned long *)((char *)backing_page + reg_off)); +} + +static void send_ipi_dest(u64 icr_data) +{ + int vector, cpu; + + vector = icr_data & APIC_VECTOR_MASK; + cpu = icr_data >> 32; + + send_ipi(cpu, vector); +} + +static void send_ipi_target(u64 icr_data) +{ + if (icr_data & APIC_DEST_LOGICAL) { + pr_err("IPI target should be of PHYSICAL type\n"); + return; + } + + send_ipi_dest(icr_data); +} + +static void send_ipi_allbut(u64 icr_data) +{ + const struct cpumask *self_cpu_mask = get_cpu_mask(smp_processor_id()); + unsigned long flags; + int vector, cpu; + + vector = icr_data & APIC_VECTOR_MASK; + local_irq_save(flags); + for_each_cpu_andnot(cpu, cpu_present_mask, self_cpu_mask) + send_ipi(cpu, vector); + write_msr_to_hv(APIC_ICR, icr_data); + local_irq_restore(flags); +} + +static void send_ipi_allinc(u64 icr_data) +{ + int vector; + + send_ipi_allbut(icr_data); + vector = icr_data & APIC_VECTOR_MASK; + native_x2apic_icr_write(APIC_DEST_SELF | vector, 0); +} + +static void x2apic_savic_icr_write(u32 icr_low, u32 icr_high) +{ + int dsh, vector; + u64 icr_data; + + icr_data = ((u64)icr_high) << 32 | icr_low; + dsh = icr_low & APIC_DEST_ALLBUT; + + switch (dsh) { + case APIC_DEST_SELF: + vector = icr_data & APIC_VECTOR_MASK; + x2apic_savic_write(APIC_SELF_IPI, vector); + break; + case APIC_DEST_ALLINC: + send_ipi_allinc(icr_data); + break; + case APIC_DEST_ALLBUT: + send_ipi_allbut(icr_data); + break; + default: + send_ipi_target(icr_data); + write_msr_to_hv(APIC_ICR, icr_data); + } +} + +static void __send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned int cfg = __prepare_ICR(0, vector, dest); + + x2apic_savic_icr_write(cfg, apicid); +} + static void x2apic_savic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - /* x2apic MSRs are special and need a special fence: */ - weak_wrmsr_fence(); - __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); + __send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } static void @@ -177,18 +283,16 @@ __send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - /* x2apic MSRs are special and need a special fence: */ - weak_wrmsr_fence(); - local_irq_save(flags); this_cpu = smp_processor_id(); for_each_cpu(query_cpu, mask) { if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) continue; - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + __send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, + APIC_DEST_PHYSICAL); } + local_irq_restore(flags); } @@ -202,6 +306,28 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void __send_IPI_shorthand(int vector, u32 which) +{ + unsigned int cfg = __prepare_ICR(which, vector, 0); + + x2apic_savic_icr_write(cfg, 0); +} + +static void x2apic_savic_send_IPI_allbutself(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLBUT); +} + +static void x2apic_savic_send_IPI_all(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_self(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_SELF); +} + static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { void *backing_page; @@ -322,16 +448,16 @@ static struct apic apic_x2apic_savic __ro_after_init = { .send_IPI = x2apic_savic_send_IPI, .send_IPI_mask = x2apic_savic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_savic_send_IPI_mask_allbutself, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_self = x2apic_send_IPI_self, + .send_IPI_allbutself = x2apic_savic_send_IPI_allbutself, + .send_IPI_all = x2apic_savic_send_IPI_all, + .send_IPI_self = x2apic_savic_send_IPI_self, .nmi_to_offline_cpu = true, .read = x2apic_savic_read, .write = x2apic_savic_write, .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, - .icr_write = native_x2apic_icr_write, + .icr_write = x2apic_savic_icr_write, .update_vector = x2apic_savic_update_vector, }; From 04cadc13a5aaa3691a8bfabde0b42574b13314cc Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:59 +0530 Subject: [PATCH 08/26] x86/apic: Support LAPIC timer for Secure AVIC Secure AVIC requires LAPIC timer to be emulated by hypervisor. KVM already supports emulating LAPIC timer using hrtimers. In order to emulate LAPIC timer, APIC_LVTT, APIC_TMICT and APIC_TDCR register values need to be propagated to the hypervisor for arming the timer. APIC_TMCCT register value has to be read from the hypervisor, which is required for calibrating the APIC timer. So, read/write all APIC timer registers from/to the hypervisor. In addition, configure APIC_ALLOWED_IRR for the hypervisor to inject timer interrupt using LOCAL_TIMER_VECTOR. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Tianyu Lan --- arch/x86/kernel/apic/apic.c | 4 ++++ arch/x86/kernel/apic/x2apic_savic.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 85d2d53d6d06..95ae177dff88 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -591,6 +591,10 @@ static void setup_APIC_timer(void) 0xF, ~0UL); } else clockevents_register_device(levt); + + if (apic->update_vector) + apic->update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, + true); } /* diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 30a24b70e5cb..2eab9a773005 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -94,6 +94,7 @@ static u32 x2apic_savic_read(u32 reg) case APIC_TMICT: case APIC_TMCCT: case APIC_TDCR: + return read_msr_from_hv(reg); case APIC_ID: case APIC_LVR: case APIC_TASKPRI: @@ -142,10 +143,12 @@ static void x2apic_savic_write(u32 reg, u32 data) switch (reg) { case APIC_LVTT: - case APIC_LVT0: - case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: + write_msr_to_hv(reg, data); + break; + case APIC_LVT0: + case APIC_LVT1: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: From 29bf650d557c153ccb2907612fa49f755d421c72 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:00 +0530 Subject: [PATCH 09/26] x86/sev: Initialize VGIF for secondary VCPUs for Secure AVIC VINTR_CTRL in VMSA should be configured for Secure AVIC. Configure for secondary vCPUs (the configuration for boot CPU is done in hypervisor). Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index d6fa563ac9ec..8e9bdddc7700 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1181,6 +1181,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + vmsa->vintr_ctrl |= V_GIF_MASK; + /* SVME must be set. */ vmsa->efer = EFER_SVME; From 69ab46f2cac159ad0bc5241e2e5d9cdb34987ecd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:01 +0530 Subject: [PATCH 10/26] x86/apic: Add support to send NMI IPI for Secure AVIC Secure AVIC has introduced a new field in the APIC backing page "NmiReq" that has to be set by the guest to request a NMI IPI. Add support to set NmiReq appropriately to send NMI IPI. This also requires Virtual NMI feature to be enabled in VINTRL_CTRL field in the VMSA. However this would be added by a later commit after adding support for injecting NMI from the hypervisor. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 2eab9a773005..5502a828a795 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -183,7 +183,7 @@ static void x2apic_savic_write(u32 reg, u32 data) } } -static void send_ipi(int cpu, int vector) +static void send_ipi(int cpu, int vector, bool nmi) { void *backing_page; int reg_off; @@ -195,16 +195,20 @@ static void send_ipi(int cpu, int vector) * IRR updates such as during VMRUN and during CPU interrupt handling flow. */ test_and_set_bit(VEC_POS(vector), (unsigned long *)((char *)backing_page + reg_off)); + if (nmi) + set_reg(backing_page, SAVIC_NMI_REQ_OFFSET, nmi); } static void send_ipi_dest(u64 icr_data) { int vector, cpu; + bool nmi; vector = icr_data & APIC_VECTOR_MASK; cpu = icr_data >> 32; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); - send_ipi(cpu, vector); + send_ipi(cpu, vector, nmi); } static void send_ipi_target(u64 icr_data) @@ -222,11 +226,13 @@ static void send_ipi_allbut(u64 icr_data) const struct cpumask *self_cpu_mask = get_cpu_mask(smp_processor_id()); unsigned long flags; int vector, cpu; + bool nmi; vector = icr_data & APIC_VECTOR_MASK; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); local_irq_save(flags); for_each_cpu_andnot(cpu, cpu_present_mask, self_cpu_mask) - send_ipi(cpu, vector); + send_ipi(cpu, vector, nmi); write_msr_to_hv(APIC_ICR, icr_data); local_irq_restore(flags); } From 9bffdfd26b659f461509d430030a8d3ab546ff30 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:02 +0530 Subject: [PATCH 11/26] x86/apic: Allow NMI to be injected from hypervisor for Secure AVIC Secure AVIC requires "AllowedNmi" bit in the Secure AVIC Control MSR to be set for NMI to be injected from hypervisor. Set "AllowedNmi" bit in Secure AVIC Control MSR here to allow NMI interrupts to be injected from hypervisor. While at that, also propagate APIC_LVT0 and APIC_LVT1 register values to the hypervisor required for injecting NMI interrupts from hypervisor. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/msr-index.h | 5 +++++ arch/x86/kernel/apic/x2apic_savic.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3dcb2a2a4683..4b219f089528 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -684,6 +684,11 @@ #define MSR_AMD64_SNP_SECURE_AVIC_ENABLED BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) #define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SECURE_AVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SECURE_AVIC_EN_BIT 0 +#define MSR_AMD64_SECURE_AVIC_EN BIT_ULL(MSR_AMD64_SECURE_AVIC_EN_BIT) +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 5502a828a795..321b3678e26f 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -38,6 +38,11 @@ enum lapic_lvt_entry { #define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) +static inline void savic_wr_control_msr(u64 val) +{ + native_wrmsr(MSR_AMD64_SECURE_AVIC_CONTROL, lower_32_bits(val), upper_32_bits(val)); +} + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -143,12 +148,12 @@ static void x2apic_savic_write(u32 reg, u32 data) switch (reg) { case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: write_msr_to_hv(reg, data); break; - case APIC_LVT0: - case APIC_LVT1: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: @@ -401,6 +406,7 @@ static void x2apic_savic_setup(void) ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) snp_abort(); + savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); this_cpu_write(savic_setup_done, true); } From 3a318682f76953e3b2fd1691fe1b3c4860b98330 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:03 +0530 Subject: [PATCH 12/26] x86/sev: Enable NMI support for Secure AVIC Now that support to send NMI IPI and support to inject NMI from hypervisor has been added, set V_NMI_ENABLE in VINTR_CTRL field of VMSA to enable NMI. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 8e9bdddc7700..a1eadbcbbe9e 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1182,7 +1182,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) - vmsa->vintr_ctrl |= V_GIF_MASK; + vmsa->vintr_ctrl |= (V_GIF_MASK | V_NMI_ENABLE_MASK); /* SVME must be set. */ vmsa->efer = EFER_SVME; From 34d380bd184ab53104c6248a9298bfd71392ab78 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:07:04 +0530 Subject: [PATCH 13/26] x86/apic: Enable Secure AVIC in Control MSR With all the pieces in place now, enable Secure AVIC in Secure AVIC Control MSR. Any access to x2APIC MSRs are emulated by hypervisor before Secure AVIC is enabled in the Control MSR. Post Secure AVIC enablement, all x2APIC MSR accesses (whether accelerated by AVIC hardware or trapped as #VC exception) operate on guest APIC backing page. Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 321b3678e26f..a3f0ddc6b5b6 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -406,7 +406,7 @@ static void x2apic_savic_setup(void) ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) snp_abort(); - savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); + savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_EN | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); this_cpu_write(savic_setup_done, true); } From bf07961f77e9cc0840e7d91b93f1adf8b4d9518b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:05 +0530 Subject: [PATCH 14/26] x86/sev: Indicate SEV-SNP guest supports Secure AVIC Now that Secure AVIC support is added in the guest, indicate SEV-SNP guest supports Secure AVIC. Without this, the guest terminates booting with Non-Automatic Exit(NAE) termination request event. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/boot/compressed/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index d92fc9fa6e9f..6379edf5d97a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -402,7 +402,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | MSR_AMD64_SNP_SECURE_AVIC_ENABLED) u64 snp_get_unsupported_features(u64 status) { From d8deedf0bd5c742379c30e42a00ee21a2065bd69 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 25 Apr 2025 10:24:07 -0400 Subject: [PATCH 15/26] x86/Hyper-V: Add Hyper-V specific hvcall to set backing page Secure AVIC provides backing page to aid the guest in limiting which interrupt vectors can be injected into the guest. Hyper-V has specific hvcall to set backing page and call it in Secure AVIC driver. Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_init.c | 24 +++++++++++++++- arch/x86/hyperv/ivm.c | 44 +++++++++++++++++++++++++++++ arch/x86/include/asm/hyperv-tlfs.h | 8 ++++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 13 ++++++++- include/asm-generic/hyperv-tlfs.h | 26 +++++++++++++++-- 6 files changed, 113 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a44c60c105f8..b31fabf091bb 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -40,6 +40,7 @@ void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); +void *hv_vp_early_input_arg; union hv_ghcb * __percpu *hv_ghcb_pg; /* Storage to save the hypercall page temporarily for hibernation */ @@ -357,6 +358,7 @@ void __init hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; + int ret; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -364,6 +366,22 @@ void __init hyperv_init(void) if (hv_common_init()) return; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) { + hv_vp_early_input_arg = kcalloc(num_possible_cpus(), + PAGE_SIZE, + GFP_KERNEL); + if (hv_vp_early_input_arg) { + ret = set_memory_decrypted(hv_vp_early_input_arg, + num_possible_cpus()); + if (ret) { + kfree(hv_vp_early_input_arg); + goto common_free; + } + } else { + goto common_free; + } + } + /* * The VP assist page is useless to a TDX guest: the only use we * would have for it is lazy EOI, which can not be used with TDX. @@ -378,7 +396,7 @@ void __init hyperv_init(void) ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; if (!hv_isolation_type_tdx()) - goto common_free; + goto free_vp_early_input_arg; } if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { @@ -538,6 +556,10 @@ void __init hyperv_init(void) free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; +free_vp_early_input_arg: + set_memory_encrypted(hv_vp_early_input_arg, num_possible_cpus()); + kfree(hv_vp_early_input_arg); + hv_vp_early_input_arg = NULL; common_free: hv_common_free(); } diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 4bd2e881e9e7..66662527d4fb 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -65,6 +65,13 @@ union hv_ghcb { /* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; +/* + * Use static page to set Secure AVIC backing page. + * The operation happens before allocating input arg + * page when start AP. + */ +static u8 inputbuf[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); + /* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { @@ -289,6 +296,43 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } +enum es_result hv_set_savic_backing_page(u64 gfn) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_SET_VP_REGISTERS; + struct hv_set_vp_registers_input *input + = hv_vp_early_input_arg + smp_processor_id() * PAGE_SIZE; + union hv_x64_register_sev_gpa_page value; + unsigned long flags; + int retry = 5; + u64 ret; + + local_irq_save(flags); + + value.enabled = 1; + value.reserved = 0; + value.pagenumber = gfn; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = ms_hyperv.vtl; + input->element[0].name = HV_X64_REGISTER_SEV_AVIC_GPA; + input->element[0].value.reg64 = value.u64; + + do { + ret = hv_do_hypercall(control, input, NULL); + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); + } while (ret == HV_STATUS_TIME_OUT && retry--); + + local_irq_restore(flags); + + if (hv_result_success(ret)) + return ES_OK; + else + return ES_VMM_ERROR; +} + int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index a0c992faa1e9..910b03d74c85 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -311,6 +311,14 @@ enum hv_isolation_type { #define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) #define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) +#define HV_X64_REGISTER_SEV_AVIC_GPA 0x00090043 + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6e94e4e8230c..008ae41168c5 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -43,6 +43,7 @@ static inline unsigned char hv_get_nmi_reason(void) extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; +extern void *hv_vp_early_input_arg; extern u64 hv_current_partition_id; @@ -265,6 +266,7 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); +enum es_result hv_set_savic_backing_page(u64 gfn); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index a3f0ddc6b5b6..d82a5e82e3e5 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "local.h" @@ -396,6 +397,10 @@ static void x2apic_savic_setup(void) void *backing_page; enum es_result ret; unsigned long gpa; + unsigned long gfn; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; if (this_cpu_read(savic_setup_done)) return; @@ -403,7 +408,13 @@ static void x2apic_savic_setup(void) backing_page = this_cpu_read(apic_backing_page); init_backing_page(backing_page); gpa = __pa(backing_page); - ret = sev_notify_savic_gpa(gpa); + gfn = gpa >> PAGE_SHIFT; + + if (hv_isolation_type_snp()) + ret = hv_set_savic_backing_page(gfn); + else + ret = sev_notify_savic_gpa(gpa); + if (ret != ES_OK) snp_abort(); savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_EN | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 02f0a4ab723e..ddeef1ebbad8 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -749,6 +749,23 @@ struct hv_get_vp_registers_output { }; }; +union hv_x64_register_sev_gpa_page { + u64 u64; + struct { + u64 enabled:1; + u64 reserved:11; + u64 pagenumber:52; + }; +} __packed; + +union hv_register_value { + u128 reg128; + u64 reg64; + u32 reg32; + u16 reg16; + u8 reg8; +}; + /* HvSetVpRegisters hypercall with variable size reg name/value list*/ struct hv_set_vp_registers_input { struct { @@ -761,8 +778,13 @@ struct hv_set_vp_registers_input { u32 name; u32 padding1; u64 padding2; - u64 valuelow; - u64 valuehigh; + union { + union hv_register_value value; + struct { + u64 valuelow; + u64 valuehigh; + }; + }; } element[]; } __packed; From 7ada5535f7286ea5527a56d5ef283b53f2639d72 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:19:21 -0400 Subject: [PATCH 16/26] x86/Hyper-V: Not use hv apic driver when Secure AVIC is available When Secure AVIC is available, AMD x2apic Secure AVIC driver should be selected and return directly in the hv_apic_init(). Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_apic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 0569f579338b..34987b223418 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -288,6 +288,9 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { pr_info("Hyper-V: Using IPI hypercalls\n"); /* From f75564f01de1afce5ee1c4bed6d684403c5c80a5 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 19:35:12 -0400 Subject: [PATCH 17/26] x86/x2apic-savic: Expose x2apic_savic_update_vector() Expose x2apic_savic_update_vector() to allow driver or arch code to allow Hyper-V inject related vector. Signed-off-by: Tianyu Lan --- arch/x86/include/asm/apic.h | 9 +++++++++ arch/x86/kernel/apic/x2apic_savic.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1ab0e22a7187..db4e84484eee 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -241,6 +241,15 @@ static inline u64 native_x2apic_icr_read(void) return val; } +#if defined(CONFIG_AMD_SECURE_AVIC) +extern void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, + bool set); +#else +static inline void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, bool set) { } +#endif + extern int x2apic_mode; extern int x2apic_phys; extern void __init x2apic_set_max_apicid(u32 apicid); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index d82a5e82e3e5..deb202dbb020 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -343,12 +343,15 @@ static void x2apic_savic_send_IPI_self(int vector) __send_IPI_shorthand(vector, APIC_DEST_SELF); } -static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { void *backing_page; unsigned long *reg; int reg_off; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + backing_page = per_cpu(apic_backing_page, cpu); reg_off = SAVIC_ALLOWED_IRR_OFFSET + REG_POS(vector); reg = (unsigned long *)((char *)backing_page + reg_off); From 7a3be0a4812205b51af41f99918439a312a7f49f Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:29:23 -0400 Subject: [PATCH 18/26] drivers/hv: Allow vmbus message synic interrupt injected from Hyper-V When Secure AVIC is enabled, Vmbus driver should call x2apic Secure AVIC interface to allow Hyper-V to inject Vmbus message interrupt. Signed-off-by: Tianyu Lan --- drivers/hv/hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fc8729deb659..c28e60e8c399 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -312,6 +313,7 @@ void hv_synic_enable_regs(unsigned int cpu) if (vmbus_irq != -1) enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT); + x2apic_savic_update_vector(smp_processor_id(), vmbus_interrupt, true); shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; From e2dd054080fe6398ab1666fa86b473e2e38ca3c1 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:54:55 -0400 Subject: [PATCH 19/26] x86/Hyper-V: Allow Hyper-V to inject Hyper-V vectors When Secure AVIC is enabled, call Secure AVIC function to allow Hyper-V to inject REENLIGHTENMENT, STIMER0 and CALLBACK vectors. Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_init.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b31fabf091bb..930083663a08 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -85,6 +85,17 @@ static int hv_cpu_init(unsigned int cpu) if (ret) return ret; + /* Allow Hyper-V vector to be injected from Hypervisor. */ + if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) + x2apic_savic_update_vector(cpu, + HYPERV_REENLIGHTENMENT_VECTOR, true); + + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + x2apic_savic_update_vector(cpu, + HYPERV_STIMER0_VECTOR, true); + + x2apic_savic_update_vector(cpu, HYPERVISOR_CALLBACK_VECTOR, true); + return hyperv_init_ghcb(); } From 923327baf310bb0ebc60a92c77854fe129579e83 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:42:42 -0400 Subject: [PATCH 20/26] x86/Hyper-V: Not use auto-eoi when Secure AVIC is available Hyper-V doesn't support auto-eoi with Secure AVIC. So Enable HV_DEPRECATING_AEOI_RECOMMENDED flag to force to write eoi register after handling interrupt. Signed-off-by: Tianyu Lan --- arch/x86/kernel/cpu/mshyperv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c6797cf9f37c..c2ae8ea213b0 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -460,6 +460,9 @@ static void __init ms_hyperv_init_platform(void) pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED; + /* * Check CPU management privilege. * From 16a9726ac16768db42cef900353a2b859185d91e Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 19:38:01 -0400 Subject: [PATCH 21/26] x86/x2apic-savic: Not set APIC backing page if Secure AVIC is not enabled. When Secure AVIC is not enabled, init_backing_page() should return directly. Signed-off-by: Tianyu Lan --- arch/x86/kernel/apic/x2apic_savic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index deb202dbb020..96a6f3889f53 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -369,6 +369,9 @@ static void init_backing_page(void *backing_page) u32 val; int i; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + val = read_msr_from_hv(APIC_LVR); set_reg(backing_page, APIC_LVR, val); From fe8c7fd22d49a3c6805bc97ab0466b28967f22bb Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 21:28:49 -0400 Subject: [PATCH 22/26] x64-cvm.config: Add Secure AVIC driver for CVM Select AMD Secure AVIC driver in the CVM config file. Signed-off-by: Tianyu Lan --- Microsoft/x64-cvm.config | 1 + 1 file changed, 1 insertion(+) diff --git a/Microsoft/x64-cvm.config b/Microsoft/x64-cvm.config index 054a91783c73..b5ae5b1f1787 100644 --- a/Microsoft/x64-cvm.config +++ b/Microsoft/x64-cvm.config @@ -3,5 +3,6 @@ CONFIG_VIRT_DRIVERS=y CONFIG_TDX_GUEST_DRIVER=y CONFIG_SEV_GUEST=y CONFIG_AMD_MEM_ENCRYPT=y +CONFIG_AMD_SECURE_AVIC=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_LIB_AES=y From b4a67d2bc5ef9f231d9b32977a6f92e36120f409 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Fri, 21 Mar 2025 22:40:14 +0000 Subject: [PATCH 23/26] x86/hyperv: fix an indentation issue in mshyperv.h Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503220640.hjiacW2C-lkp@intel.com/ Signed-off-by: Wei Liu --- arch/x86/include/asm/mshyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 008ae41168c5..421cc6d3d0ac 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -161,7 +161,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) : "cc", "edi", "esi"); } #endif - return hv_status; + return hv_status; } static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) From 8b8b39efc77a00cc076763fc15023a80cefc1a47 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Fri, 2 May 2025 13:02:18 -0700 Subject: [PATCH 24/26] fixes for Tianyu's patches --- arch/x86/hyperv/hv_init.c | 4 ++-- arch/x86/hyperv/ivm.c | 11 ++--------- drivers/hv/hv.c | 5 ++++- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 930083663a08..bf20e655076f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -382,7 +382,7 @@ void __init hyperv_init(void) PAGE_SIZE, GFP_KERNEL); if (hv_vp_early_input_arg) { - ret = set_memory_decrypted(hv_vp_early_input_arg, + ret = set_memory_decrypted((u64)hv_vp_early_input_arg, num_possible_cpus()); if (ret) { kfree(hv_vp_early_input_arg); @@ -568,7 +568,7 @@ void __init hyperv_init(void) kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; free_vp_early_input_arg: - set_memory_encrypted(hv_vp_early_input_arg, num_possible_cpus()); + set_memory_encrypted((u64)hv_vp_early_input_arg, num_possible_cpus()); kfree(hv_vp_early_input_arg); hv_vp_early_input_arg = NULL; common_free: diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 66662527d4fb..03b439ac54a3 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -65,13 +65,6 @@ union hv_ghcb { /* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; -/* - * Use static page to set Secure AVIC backing page. - * The operation happens before allocating input arg - * page when start AP. - */ -static u8 inputbuf[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); - /* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { @@ -321,9 +314,9 @@ enum es_result hv_set_savic_backing_page(u64 gfn) do { ret = hv_do_hypercall(control, input, NULL); - if (!hv_result_success(ret)) - pr_err("Failed to set secure AVIC backing page %llx.\n", ret); } while (ret == HV_STATUS_TIME_OUT && retry--); + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); local_irq_restore(flags); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index c28e60e8c399..92003b8004f7 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,7 +20,9 @@ #include #include #include +#ifdef CONFIG_SEV_GUEST #include +#endif #include #include "hyperv_vmbus.h" @@ -313,8 +315,9 @@ void hv_synic_enable_regs(unsigned int cpu) if (vmbus_irq != -1) enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT); +#ifdef CONFIG_SEV_GUEST x2apic_savic_update_vector(smp_processor_id(), vmbus_interrupt, true); - +#endif shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; From 513e4ddf9141f16e395a5d0ac4a8000eb0934360 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Tue, 24 Jun 2025 17:30:08 -0700 Subject: [PATCH 25/26] drivers: hv: mshv_vtl: Support for Secure AVIC --- arch/x86/include/asm/apic.h | 1 + arch/x86/include/asm/sev.h | 7 +- arch/x86/include/asm/svm.h | 12 +- arch/x86/include/uapi/asm/svm.h | 6 +- arch/x86/kernel/apic/x2apic_savic.c | 4 +- drivers/hv/mshv_vtl_main.c | 743 +++++++++++++++++++++------- include/uapi/linux/mshv.h | 1 + 7 files changed, 580 insertions(+), 194 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index db4e84484eee..e504b5e597ed 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -245,6 +245,7 @@ static inline u64 native_x2apic_icr_read(void) extern void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set); +extern void x2apic_savic_init_backing_page(void *backing_page); #else static inline void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ced3d8014ef4..89bc19d3f6c1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -109,7 +109,12 @@ struct rmp_state { u32 asid; } __packed; -#define RMPADJUST_VMSA_PAGE_BIT BIT(16) +/* Target VMPL takes the first byte */ +#define RMPADJUST_ENABLE_READ BIT(8) +#define RMPADJUST_ENABLE_WRITE BIT(9) +#define RMPADJUST_USER_EXECUTE BIT(10) +#define RMPADJUST_KERNEL_EXECUTE BIT(11) +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) /* SNP Guest message request */ struct snp_req_data { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 2b59b9951c90..d5207e9badd3 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,7 +5,8 @@ #include #include -#include +/* TODO: including into mshv_vtl_main.c breaks the build. */ +// #include /* * 32-bit intercept words in the VMCB Control Area, starting @@ -164,7 +165,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * for use by hypervisor/software. */ union { - struct hv_vmcb_enlightenments hv_enlightenments; + /* TODO: including into mshv_vtl_main.c breaks the build. */ + // struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; @@ -183,6 +185,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_GIF_SHIFT 9 #define V_GIF_MASK (1 << V_GIF_SHIFT) +#define V_INT_SHADOW 10 +#define V_INT_SHADOW_MASK (1 << V_INT_SHADOW) + #define V_NMI_PENDING_SHIFT 11 #define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT) @@ -195,6 +200,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_GUEST_BUSY_SHIFT 63 +#define V_GUEST_BUSY_MASK (1ULL << V_GUEST_BUSY_SHIFT) + #define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) #define V_INTR_MASKING_SHIFT 24 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 0f21cea6d21c..89876c35dd11 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -94,8 +94,10 @@ #define SVM_EXIT_CR13_WRITE_TRAP 0x09d #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f -#define SVM_EXIT_INVPCID 0x0a2 -#define SVM_EXIT_NPF 0x400 +#define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUSLOCK 0x0a5 +#define SVM_EXIT_IDLE_HLT 0x0a6 +#define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 #define SVM_EXIT_VMGEXIT 0x403 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 96a6f3889f53..9c6181229165 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -362,7 +362,7 @@ void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) test_and_clear_bit(VEC_POS(vector), reg); } -static void init_backing_page(void *backing_page) +void x2apic_savic_init_backing_page(void *backing_page) { u32 hv_apic_id; u32 apic_id; @@ -412,7 +412,7 @@ static void x2apic_savic_setup(void) return; backing_page = this_cpu_read(apic_backing_page); - init_backing_page(backing_page); + x2apic_savic_init_backing_page(backing_page); gpa = __pa(backing_page); gfn = gpa >> PAGE_SHIFT; diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index b727c76d17b4..b587cce8fc82 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -167,6 +168,9 @@ struct mshv_vtl_per_cpu { bool msrs_are_guest; struct user_return_notifier mshv_urn; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; +#endif }; static struct mutex mshv_vtl_poll_file_lock; @@ -196,20 +200,66 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu) return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); } -#if defined(CONFIG_X86_64) && defined(CONFIG_INTEL_TDX_GUEST) +#if defined(CONFIG_X86_64) + +#if defined(CONFIG_INTEL_TDX_GUEST) + +static struct page *tdx_this_apic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +} + +static u32 *mshv_tdx_vapic_irr(void) +{ + return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); +} + +#endif /* defined(CONFIG_INTEL_TDX_GUEST) */ static struct page *tdx_apic_page(int cpu) { +#if defined(CONFIG_INTEL_TDX_GUEST) return *per_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } -static struct page *tdx_this_apic_page(void) +static struct page *snp_secure_avic_page(int cpu) { - return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +#if defined(CONFIG_SEV_GUEST) + return *per_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } +static u32 *mshv_snp_secure_avic_irr(int cpu) +{ +#if defined(CONFIG_SEV_GUEST) + return (u32 *)((char *)page_address(snp_secure_avic_page(cpu)) + APIC_IRR); +#else + (void)cpu; + return NULL; +#endif +} + +static struct page* mshv_apic_page(int cpu) +{ + if (hv_isolation_type_tdx()) + return tdx_apic_page(cpu); + else if (hv_isolation_type_snp()) + return snp_secure_avic_page(cpu); + + return NULL; +} + +#if defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) /* - * For ICR emulation on TDX, we need a fast way to map APICIDs to CPUIDs. + * For ICR emulation when running a hardware isolated guest, we need a fast way to map + * APICIDs to CPUIDs. * Instead of iterating through all CPUs for each target in the ICR destination field * precompute a mapping. APICIDs can be sparse so we have to use a hash table. * Note: CPU hotplug is not supported (both by this code and by the paravisor in general) @@ -225,21 +275,250 @@ struct apicid_to_cpuid_entry { * Sets the cpu described by apicid in cpu_mask. * Returns 0 on success, -EINVAL if no cpu matches the apicid. */ -static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) +static int mshv_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) { struct apicid_to_cpuid_entry *found; hash_for_each_possible(apicid_to_cpuid, found, node, apicid) { if (found->apicid != apicid) continue; - cpumask_set_cpu(found->cpuid, cpu_mask); return 0; } return -EINVAL; } -#endif + +/* + * Returns the cpumask described by dest, where dest is a logical destination. + * cpu_mask should have no CPUs set. + * Returns 0 on success + */ +static int mshv_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) +{ + int ret = 0; + + while ((u16)dest) { + const u16 i = fls((u16)dest) - 1; + const u32 physical_id = (dest >> 16 << 4) | i; + + ret = mshv_set_cpumask_from_apicid(physical_id, cpu_mask); + dest &= ~BIT(i); + if (ret) + break; + } + + return ret; +} + +/* + * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), + * is a hot path on hardware-isolated VMs. By performing some of the common functionality + * entirely in-kernel we eliminate costly user<->kernel transitions. + */ +static void mshv_free_apicid_to_cpuid_mapping(void) +{ + int bkt; + struct apicid_to_cpuid_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +/* + * Creates and populates the apicid_to_cpuid hash table. + * This mapping is used for fast ICR emulation on on hardware-isolated VMs. + * Returns 0 on success. + */ +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) { + struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) { + ret = -ENOMEM; + break; + } + + entry->apicid = cpuid_to_apicid[cpu]; + entry->cpuid = cpu; + + if (entry->apicid == BAD_APICID) { + dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); + ret = -ENODEV; + break; + } + + hash_add(apicid_to_cpuid, &entry->node, entry->apicid); + } + + if (ret) + mshv_free_apicid_to_cpuid_mapping(); + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_cpu_mask_for_icr_write(u32 icr_lo, u32 dest, struct cpumask* local_mask) +{ + const u8 shorthand = (icr_lo >> 18) & 0b11; + const u32 self = smp_processor_id(); + int ret = 0; + + cpumask_clear(local_mask); + if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ + cpumask_copy(local_mask, cpu_online_mask); + } else if (shorthand == 0b11) { /* shorthand all but self */ + cpumask_copy(local_mask, cpu_online_mask); + cpumask_clear_cpu(self, local_mask); + } else if (shorthand == 0b01) { /* shorthand self */ + cpumask_set_cpu(self, local_mask); + } else if (icr_lo & BIT(11)) { /* logical */ + ret = mshv_get_logical_cpumask(dest, local_mask); + } else { /* physical */ + ret = mshv_set_cpumask_from_apicid(dest, local_mask); + } + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_update_proxy_irr_for_icr_write(u32 icr_lo, struct cpumask *local_mask) +{ + const u8 vector = icr_lo; + const u64 bank = vector / 32; + const u32 mask = BIT(vector % 32); + const u32 self = smp_processor_id(); + + unsigned int cpu; + bool send_ipi; + + send_ipi = false; + for_each_cpu(cpu, local_mask) { + /* + * The kernel doesn't provide an atomic_or which operates on u32, + * so cast to atomic_t, which should have the same layout + */ + static_assert(sizeof(atomic_t) == sizeof(u32)); + atomic_or(mask, (atomic_t *) + (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); + smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); + send_ipi |= cpu != self; + } + + if (send_ipi) { + cpumask_clear_cpu(self, local_mask); + __apic_send_IPI_mask(local_mask, RESCHEDULE_VECTOR); + } + + return 0; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + * Secure AVIC accelerates self-IPI only. + */ +static int mshv_snp_handle_simple_icr_write(u32 icr_lo, u32 dest) +{ + const u8 vector = icr_lo; + + struct cpumask local_mask; + unsigned int cpu; + int ret; + + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); + if (ret) + return ret; + + // Probobaly shouldn't update the target VP's IRRs to inject the + // interrupt, there might be more state to account for. The target + // VP will go into the user mode anyway, not much to be saved? + + // for_each_cpu(cpu, &local_mask) { + // u64 irr_reg_off; + // unsigned long *irr_reg; + // void* irr; + + // /* + // * IRRs are banked into eight 32-bit registers each starting on the + // * 16-byte boundary (4 byte of an IRR + 12 byte stride). + // */ + // irr_reg_off = (vector >> 5) << 4; + // irr = mshv_snp_secure_avic_irr(cpu); + // irr_reg = (unsigned long*)((u8*)irr + irr_reg_off); + + // /* Inject the interrupt. */ + // test_and_set_bit(vector & 0x1f, irr_reg); + // } + + return 0; +} + +#else + +static void mshv_free_apicid_to_cpuid_mapping(void) {} +static int mshv_create_apicid_to_cpuid_mapping(struct device *) { return 0; } +static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *) { return false; } + +#endif /* defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) */ + +/* + * Pull the interrupts in the `proxy_irr` field into the VAPIC page + * Returns true if an exit to user-space is required (sync tmr state) + */ +static bool __mshv_pull_proxy_irr(struct mshv_vtl_run *run, struct page *apic_page) +{ + u32 *apic_page_irr = (u32 *)((char *)page_address(apic_page) + APIC_IRR); + + if (!xchg(&run->scan_proxy_irr, 0) || !apic_page_irr) + return false; + + for (int i = 0; i < 8; i++) { + const u32 val = xchg(&run->proxy_irr[i], 0); + + if (!val) + continue; + + if (run->proxy_irr_exit_mask[i] & val) { + /* + * This vector was previously used for a level-triggered interrupt. + * An edge-triggered interrupt has now arrived, so we need to involve + * user-space to clear its copy of the tmr. + * Put the interrupt(s) back on the run page so it can do so. + * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write + */ + atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); + WRITE_ONCE(run->scan_proxy_irr, 1); + return true; + } + + /* + * IRR is non-contiguous. + * Each bank is 4 bytes with 12 bytes of padding between banks. + */ + apic_page_irr[i * 4] |= val; + } + + return false; +} + +#endif /* defined(CONFIG_X86_64) */ static long __mshv_vtl_ioctl_check_extension(u32 arg) { @@ -320,7 +599,7 @@ static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) } #ifdef CONFIG_X86_64 -static int mshv_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) +static int mshv_snp_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) { struct page *page; struct hv_register_assoc reg_assoc = {}; @@ -469,6 +748,7 @@ static void mshv_vtl_scan_proxy_interrupts(struct hv_per_cpu_context *per_cpu) } else { /* A malicious hypervisor might set a vector > 255. */ vector = READ_ONCE(proxy->u.asserted_vector) & 0xff; + const u32 bank = vector / 32; const u32 masked_irr = BIT(vector % 32) & ~READ_ONCE(run->proxy_irr_blocked[bank]); @@ -626,16 +906,43 @@ static int mshv_vtl_alloc_context(unsigned int cpu) mshv_write_tdx_apic_page(page_to_phys(tdx_apic_page)); #endif } else if (hv_isolation_type_snp()) { -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; + u64 apic_id; int ret; - ret = mshv_configure_vmsa_page(0, &per_cpu->vmsa_page); + ret = mshv_snp_configure_vmsa_page(0, &per_cpu->vmsa_page); if (ret < 0) return ret; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + goto synic; + + snp_secure_avic_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!snp_secure_avic_page) + return -ENOMEM; + + /* VMPL 2 for the VTL0 */ + ret = rmpadjust((unsigned long)page_address(snp_secure_avic_page), + RMP_PG_SIZE_4K, 2 | RMPADJUST_ENABLE_READ | RMPADJUST_ENABLE_WRITE); + if (ret) { + pr_err("failed to adjust RMP for the secure AVIC page: %d\n", ret); + free_page((u64)snp_secure_avic_page); + return -EINVAL; + } + + /* Some very basic initialization */ + // ret = sev_ghcb_msr_read(APIC_BASE_MSR + (APIC_ID >> 4), &apic_id); + // BUG_ON(ret != ES_OK); + // WRITE_ONCE(*((u32*)page_address(snp_secure_avic_page) + APIC_ID), lower_32_bits(apic_id)); + x2apic_savic_init_backing_page(page_address(snp_secure_avic_page)); // ??? + + per_cpu->snp_secure_avic_page = snp_secure_avic_page; #endif } else if (mshv_vsm_capabilities.intercept_page_available) mshv_vtl_configure_reg_page(per_cpu); +synic: mshv_vtl_synic_enable_regs(cpu); return 0; @@ -997,62 +1304,7 @@ static void mshv_vtl_idle(void) #define enter_mode(mode) ((mode) & MODE_MASK) #define reenter_mode(mode) (((mode) >> REENTER_SHIFT) & MODE_MASK) -/* - * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), - * is a hot path on TDX. By performing some of the common functionality entirely in-kernel - * we eliminate costly user<->kernel transitions. - */ -#ifndef CONFIG_INTEL_TDX_GUEST -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) {} -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *) { return 0; } -static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } -#else -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) -{ - int bkt; - struct apicid_to_cpuid_entry *entry; - struct hlist_node *tmp; - - hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { - hash_del(&entry->node); - kfree(entry); - } -} - -/* - * Creates and populates the apicid_to_cpuid hash table. - * This mapping is used for fast ICR emulation on TDX. - * Returns 0 on success. - */ -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *dev) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) { - struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - - if (!entry) { - ret = -ENOMEM; - break; - } - - entry->apicid = cpuid_to_apicid[cpu]; - entry->cpuid = cpu; - - if (entry->apicid == BAD_APICID) { - dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); - ret = -ENODEV; - break; - } - - hash_add(apicid_to_cpuid, &entry->node, entry->apicid); - } - - if (ret) - mshv_tdx_free_apicid_to_cpuid_mapping(); - - return ret; -} +#ifdef CONFIG_INTEL_TDX_GUEST static void mshv_tdx_advance_to_next_instruction(struct tdx_vp_context *context) { @@ -1090,28 +1342,6 @@ static bool mshv_tdx_is_simple_icr_write(const struct tdx_vp_context *context) return fixed && edge; } -/* - * Returns the cpumask described by dest, where dest is a logical destination. - * cpu_mask should have no CPUs set. - * Returns 0 on success - */ -static int mshv_tdx_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) -{ - int ret = 0; - - while ((u16)dest) { - const u16 i = fls((u16)dest) - 1; - const u32 physical_id = (dest >> 16 << 4) | i; - - ret = mshv_tdx_set_cpumask_from_apicid(physical_id, cpu_mask); - dest &= ~BIT(i); - if (ret) - break; - } - - return ret; -} - /* * Attempts to handle an ICR write. Returns 0 if successful, other values * indicate user-space should be invoked to gracefully handle the error. @@ -1120,101 +1350,21 @@ static int mshv_tdx_handle_simple_icr_write(struct tdx_vp_context *context) { const u32 icr_lo = context->l2_enter_guest_state.rax; const u32 dest = context->l2_enter_guest_state.rdx; - const u8 shorthand = (icr_lo >> 18) & 0b11; - const u8 vector = icr_lo; - const u64 bank = vector / 32; - const u32 mask = BIT(vector % 32); - const u32 self = smp_processor_id(); - - bool send_ipi = false; struct cpumask local_mask = {}; - unsigned int cpu = 0; int ret = 0; - if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ - cpumask_copy(&local_mask, cpu_online_mask); - } else if (shorthand == 0b11) { /* shorthand all but self */ - cpumask_copy(&local_mask, cpu_online_mask); - cpumask_clear_cpu(self, &local_mask); - } else if (shorthand == 0b01) { /* shorthand self */ - cpumask_set_cpu(self, &local_mask); - } else if (icr_lo & BIT(11)) { /* logical */ - ret = mshv_tdx_get_logical_cpumask(dest, &local_mask); - } else { /* physical */ - ret = mshv_tdx_set_cpumask_from_apicid(dest, &local_mask); - } - + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); if (ret) return ret; - - for_each_cpu(cpu, &local_mask) { - /* - * The kernel doesn't provide an atomic_or which operates on u32, - * so cast to atomic_t, which should have the same layout - */ - static_assert(sizeof(atomic_t) == sizeof(u32)); - atomic_or(mask, (atomic_t *) - (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); - smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); - send_ipi |= cpu != self; - } - - if (send_ipi) { - cpumask_clear_cpu(self, &local_mask); - __apic_send_IPI_mask(&local_mask, RESCHEDULE_VECTOR); - } - mshv_tdx_advance_to_next_instruction(context); mshv_tdx_clear_exit_reason(context); return 0; } -static u32 *mshv_tdx_vapic_irr(void) -{ - return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); -} - -/* - * Pull the interrupts in the `proxy_irr` field into the VAPIC page - * Returns true if an exit to user-space is required (sync tmr state) - */ -static bool mshv_tdx_pull_proxy_irr(struct mshv_vtl_run *run) -{ - u32 *apic_page_irr = mshv_tdx_vapic_irr(); - - if (!xchg(&run->scan_proxy_irr, 0)) - return false; - - for (int i = 0; i < 8; i++) { - const u32 val = xchg(&run->proxy_irr[i], 0); - - if (!val) - continue; - - if (run->proxy_irr_exit_mask[i] & val) { - /* - * This vector was previously used for a level-triggered interrupt. - * An edge-triggered interrupt has now arrived, so we need to involve - * user-space to clear its copy of the tmr. - * Put the interrupt(s) back on the run page so it can do so. - * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write - */ - atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); - WRITE_ONCE(run->scan_proxy_irr, 1); - return true; - } - - /* - * IRR is non-contiguous. - * Each bank is 4 bytes with 12 bytes of padding between banks. - */ - apic_page_irr[i * 4] |= val; - } - - return false; -} - /* * Checks if exit reason is due: * - An interrupt for the L1 @@ -1334,6 +1484,179 @@ static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *run) } #endif /* CONFIG_INTEL_TDX_GUEST */ +#if defined(CONFIG_SEV_GUEST) + +static struct page *snp_this_savic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); +} + +static struct sev_es_save_area *snp_this_vmsa(void) +{ + struct page *vmsa_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page); + return page_address(vmsa_page); +} + +/* + * Sets a benign guest error code so that there won't be another + * #VMEXIT for the just processed one and marks the VMSA as + * runnable. + */ +static void mshv_snp_clear_exit_code(struct sev_es_save_area *vmsa, bool int_shadow) +{ + if (int_shadow) + vmsa->vintr_ctrl |= V_INT_SHADOW_MASK; + else + vmsa->vintr_ctrl &= ~V_INT_SHADOW_MASK; + vmsa->guest_exit_code = SVM_EXIT_INTR; + vmsa->vintr_ctrl &= ~V_GUEST_BUSY_MASK; +} + +/* + * Try to handle the incomplete IPI SEV-SNP exit. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_incomplete_ipi(struct mshv_vtl_run *run, + struct sev_es_save_area *vmsa) +{ + u32 icr_lo = vmsa->guest_exit_info_1; + u32 dest = vmsa->guest_exit_info_1 >> 32; + + /* Route the INIT, SIPI, NMI to the user mode for now. */ + if ((icr_lo & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + return false; + /* Can handle only edge-triggered interrupts. */ + if (icr_lo & APIC_INT_LEVELTRIG) + return false; + + if (mshv_snp_handle_simple_icr_write(icr_lo, dest)) + return false; + + return true; +} + +/* + * Try to handle an SEV-SNP exit entirely in kernel, to avoid the overhead of a + * user<->kernel transition. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) +{ + const bool intr_inject = MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT & run->offload_flags; + const bool x2apic = MSHV_VTL_OFFLOAD_FLAG_X2APIC & run->offload_flags; + struct sev_es_save_area *vmsa; + u8 *offload_flags; + + if (!intr_inject || !x2apic) + return false; + + vmsa = snp_this_vmsa(); + + switch (vmsa->guest_exit_code) + { + case SVM_EXIT_AVIC_INCOMPLETE_IPI: + if (mshv_snp_try_handle_incomplete_ipi(run, vmsa)) + goto handled; + break; + case SVM_EXIT_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + goto handled; + case SVM_EXIT_IDLE_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + goto handled; + case SVM_EXIT_MSR: + if (vmsa->rcx == HV_X64_MSR_GUEST_IDLE && !(vmsa->guest_exit_info_1 & 1)) { + /* The guest indicates it's idle by reading this synthetic MSR. */ + vmsa->rax = 0; + vmsa->rdx = 0; + vmsa->rip = vmsa->guest_nrip; + + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + + goto handled; + } + break; + default: + break; + } + + offload_flags = &run->offload_flags; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + if (!(*offload_flags & MSHV_VTL_OFFLOAD_FLAG_HALT_OTHER)) + run->flags &= ~MSHV_VTL_RUN_FLAG_HALTED; + + return false; + +handled: + + mshv_snp_clear_exit_code(vmsa, false); + return true; +} + +static bool mshv_snp_try_handle_intercept(struct mshv_vtl_run *run) +{ + struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()]; + u32 msg_type = HVMSG_NONE; + struct hv_message *msg = NULL; + + switch (hvp->vtl_entry_reason) { + case MSHV_ENTRY_REASON_INTERRUPT: + if (!mshv_vsm_capabilities.intercept_page_available) + { + struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); + void *synic_message_page = mshv_cpu->synic_message_page; + + if (likely(synic_message_page)) + msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; + } + break; + + case MSHV_ENTRY_REASON_INTERCEPT: + WARN_ON(!mshv_vsm_capabilities.intercept_page_available); + msg = (struct hv_message *)hvp->intercept_message; + break; + + default: + panic("unknown entry reason: %d", hvp->vtl_entry_reason); + } + + if (!msg) + return true; + msg_type = READ_ONCE(msg->header.message_type); + + switch (msg_type) { + case HVMSG_NONE: + break; + case HVMSG_X64_EXCEPTION_INTERCEPT: + { + struct hv_x64_exception_intercept_message *expt_msg = + (struct hv_x64_exception_intercept_message*)msg->u.payload; + if (expt_msg->exception_vector != X86_TRAP_VC) + return false; + } + break; + case HVMSG_SYNIC_SINT_DELIVERABLE: + return false; + case HVMSG_X64_HALT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + break; + default: + return false; + } + + return true; +} +#endif /* CONFIG_SEV_GUEST */ + /* * Attempts to directly inject the interrupts in the proxy_irr field. * Returns true if an exit to user-space is required. @@ -1342,14 +1665,20 @@ static bool mshv_pull_proxy_irr(struct mshv_vtl_run *run) { bool ret = READ_ONCE(run->scan_proxy_irr); - if (!hv_isolation_type_tdx() || - !(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) + if (!(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) return ret; + if (hv_isolation_type_tdx()) { #ifdef CONFIG_INTEL_TDX_GUEST - ret = mshv_tdx_pull_proxy_irr(run); - mshv_tdx_update_rvi_halt(run); + ret = __mshv_pull_proxy_irr(run, tdx_this_apic_page()); + mshv_tdx_update_rvi_halt(run); +#endif + } else if (hv_isolation_type_snp()) { +#ifdef CONFIG_SEV_GUEST + ret = __mshv_pull_proxy_irr(run, snp_this_savic_page()); #endif + } + return ret; } @@ -1423,6 +1752,10 @@ static int mshv_vtl_ioctl_return_to_lower_vtl(void) continue; /* Exit handled entirely in kernel */ else goto done; + } else if (hv_isolation_type_snp()) { + if (mshv_snp_try_handle_intercept(mshv_vtl_this_run()) && + mshv_snp_try_handle_exit(mshv_vtl_this_run())) + continue; /* Exit handled entirely in kernel */ } hvp = hv_vp_assist_page[smp_processor_id()]; @@ -1921,7 +2254,7 @@ static void guest_vsm_vmsa_pfn_this_cpu(void *arg) cpu = get_cpu(); vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); if (!vmsa_guest_vsm_page) { - if (mshv_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) + if (mshv_snp_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) *pfn = -ENOMEM; else vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); @@ -1952,6 +2285,41 @@ static long mshv_vtl_ioctl_guest_vsm_vmsa_pfn(void __user *user_arg) return ret; } + +static void secure_avic_vtl0_this_cpu(void *arg) +{ + int cpu; + struct page *snp_secure_avic_page; + u64 *pfn = arg; + + cpu = get_cpu(); + snp_secure_avic_page = *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); + put_cpu(); + + *pfn = snp_secure_avic_page ? page_to_pfn(snp_secure_avic_page) : -ENOMEM; +} + +static long mshv_vtl_ioctl_secure_avic_vtl0_pfn(void __user *user_arg) +{ + u64 pfn; + u32 cpu_id; + long ret; + + ret = copy_from_user(&cpu_id, user_arg, sizeof(cpu_id)) ? -EFAULT : 0; + if (ret) + return ret; + + ret = smp_call_function_single(cpu_id, secure_avic_vtl0_this_cpu, &pfn, true); + if (ret) + return ret; + ret = (long)pfn; + if (ret < 0) + return ret; + + ret = copy_to_user(user_arg, &pfn, sizeof(pfn)) ? -EFAULT : 0; + + return ret; +} #endif static void ack_kick(void *cancel_cpu_run) @@ -2084,6 +2452,9 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_VTL_GUEST_VSM_VMSA_PFN: ret = mshv_vtl_ioctl_guest_vsm_vmsa_pfn((void __user *)arg); break; + case MSHV_VTL_SECURE_AVIC_VTL0_PFN: + ret = mshv_vtl_ioctl_secure_avic_vtl0_pfn((void __user *)arg); + break; #endif case MSHV_VTL_KICK_CPU: @@ -2100,7 +2471,7 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) { - struct page *page; + struct page *page = NULL; int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; @@ -2124,7 +2495,7 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; page_ptr_ptr = per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu); if (!*page_ptr_ptr) { - if (mshv_configure_vmsa_page(1, page_ptr_ptr) < 0) + if (mshv_snp_configure_vmsa_page(1, page_ptr_ptr) < 0) return VM_FAULT_SIGBUS; } page = *page_ptr_ptr; @@ -2132,18 +2503,16 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) if (!hv_isolation_type_snp()) return VM_FAULT_SIGBUS; page = *per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page, cpu); -#ifdef CONFIG_INTEL_TDX_GUEST } else if (real_off == MSHV_APIC_PAGE_OFFSET) { - if (!hv_isolation_type_tdx()) - return VM_FAULT_SIGBUS; - - page = tdx_apic_page(cpu); -#endif + page = mshv_apic_page(cpu); #endif } else { return VM_FAULT_NOPAGE; } + if (!page) + return VM_FAULT_SIGBUS; + get_page(page); vmf->page = page; @@ -2774,7 +3143,7 @@ static int __init mshv_vtl_init(void) goto unset_func; } - ret = mshv_tdx_create_apicid_to_cpuid_mapping(dev); + ret = mshv_create_apicid_to_cpuid_mapping(dev); if (ret) goto unset_func; @@ -2843,7 +3212,7 @@ static int __init mshv_vtl_init(void) static void __exit mshv_vtl_exit(void) { mshv_setup_vtl_func(NULL, NULL, NULL); - mshv_tdx_free_apicid_to_cpuid_mapping(); + mshv_free_apicid_to_cpuid_mapping(); misc_deregister(&mshv_vtl_sint_dev); misc_deregister(&mshv_vtl_hvcall); misc_deregister(&mshv_vtl_low); diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 7ba3a3f24989..ebe390277092 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -367,6 +367,7 @@ struct mshv_kick_cpus { #define MSHV_VTL_RMPQUERY _IOW(MSHV_IOCTL, 0x35, struct mshv_rmpquery) #define MSHV_VTL_INVLPGB _IOW(MSHV_IOCTL, 0x36, struct mshv_invlpgb) #define MSHV_VTL_TLBSYNC _IO(MSHV_IOCTL, 0x37) +#define MSHV_VTL_SECURE_AVIC_VTL0_PFN _IOWR(MSHV_IOCTL, 0x39, __u64) /* VMBus device IOCTLs */ From 47892f277b26188d811c3790fbe6b56bd606fb79 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Thu, 3 Jul 2025 18:12:13 -0700 Subject: [PATCH 26/26] don't rely on nrip --- drivers/hv/mshv_vtl_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index b587cce8fc82..412c227bfd39 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -1575,7 +1575,7 @@ static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) /* The guest indicates it's idle by reading this synthetic MSR. */ vmsa->rax = 0; vmsa->rdx = 0; - vmsa->rip = vmsa->guest_nrip; + vmsa->rip += 2; /* vmsa->guest_nrip might not be available although here it should be. */ run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; run->flags |= MSHV_VTL_RUN_FLAG_HALTED;