Skip to content

Commit

Permalink
Merge tag 'x86_urgent_for_v6.13_rc2' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

 - Have the Automatic IBRS setting check on AMD does not falsely fire in
   the guest when it has been set already on the host

 - Make sure cacheinfo structures memory is allocated to address a boot
   NULL ptr dereference on Intel Meteor Lake which has different numbers
   of subleafs in its CPUID(4) leaf

 - Take care of the GDT restoring on the kexec path too, as expected by
   the kernel

 - Make sure SMP is not disabled when IO-APIC is disabled on the kernel
   cmdline

 - Add a PGD flag _PAGE_NOPTISHADOW to instruct machinery not to
   propagate changes to the kernelmode page tables, to the user portion,
   in PTI

 - Mark Intel Lunar Lake as affected by an issue where MONITOR wakeups
   can get lost and thus user-visible delays happen

 - Make sure PKRU is properly restored with XRSTOR on AMD after a PRKU
   write of 0 (WRPKRU) which will mark PKRU in its init state and thus
   lose the actual buffer

* tag 'x86_urgent_for_v6.13_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/CPU/AMD: WARN when setting EFER.AUTOIBRS if and only if the WRMSR fails
  x86/cacheinfo: Delete global num_cache_leaves
  cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
  x86/kexec: Restore GDT on return from ::preserve_context kexec
  x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC
  x86/mm: Add _PAGE_NOPTISHADOW bit to avoid updating userspace page tables
  x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation
  x86/pkeys: Ensure updated PKRU value is XRSTOR'd
  x86/pkeys: Change caller of update_pkru_in_sigframe()
  • Loading branch information
torvalds committed Dec 8, 2024
2 parents 553c89e + 4920776 commit 8426226
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 58 deletions.
8 changes: 6 additions & 2 deletions arch/x86/include/asm/pgtable_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4

#ifdef CONFIG_X86_64
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
#else
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
#endif

/* If _PAGE_BIT_PRESENT is clear, we use these: */
Expand Down Expand Up @@ -139,6 +141,8 @@

#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)

#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)

/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/cpu/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c)
*/
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
cpu_has(c, X86_FEATURE_AUTOIBRS))
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0);

/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
Expand Down
43 changes: 21 additions & 22 deletions arch/x86/kernel/cpu/cacheinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};

static unsigned short num_cache_leaves;

/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
L2 not shared, no SMT etc. that is currently true on AMD CPUs.
Expand Down Expand Up @@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)

void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);

if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
num_cache_leaves = find_num_cache_leaves(c);
ci->num_leaves = find_num_cache_leaves(c);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
num_cache_leaves = 4;
ci->num_leaves = 4;
else
num_cache_leaves = 3;
ci->num_leaves = 3;
}
}

void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
{
num_cache_leaves = find_num_cache_leaves(c);
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);

ci->num_leaves = find_num_cache_leaves(c);
}

void init_intel_cacheinfo(struct cpuinfo_x86 *c)
Expand All @@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);

if (c->cpuid_level > 3) {
static int is_initialized;

if (is_initialized == 0) {
/* Init num_cache_leaves from boot CPU */
num_cache_leaves = find_num_cache_leaves(c);
is_initialized++;
}
/*
* There should be at least one leaf. A non-zero value means
* that the number of leaves has been initialized.
*/
if (!ci->num_leaves)
ci->num_leaves = find_num_cache_leaves(c);

/*
* Whenever possible use cpuid(4), deterministic cache
* parameters cpuid leaf to find the cache details
*/
for (i = 0; i < num_cache_leaves; i++) {
for (i = 0; i < ci->num_leaves; i++) {
struct _cpuid4_info_regs this_leaf = {};
int retval;

Expand Down Expand Up @@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
* trace cache
*/
if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
/* supports eax=2 call */
int j, n;
unsigned int regs[4];
unsigned char *dp = (unsigned char *)regs;
int only_trace = 0;

if (num_cache_leaves != 0 && c->x86 == 15)
if (ci->num_leaves && c->x86 == 15)
only_trace = 1;

/* Number of times to iterate */
Expand Down Expand Up @@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,

int init_cache_level(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);

if (!num_cache_leaves)
/* There should be at least one leaf. */
if (!ci->num_leaves)
return -ENOENT;
if (!this_cpu_ci)
return -EINVAL;
this_cpu_ci->num_levels = 3;
this_cpu_ci->num_leaves = num_cache_leaves;

return 0;
}

Expand Down
4 changes: 3 additions & 1 deletion arch/x86/kernel/cpu/intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c)
c->x86_vfm == INTEL_WESTMERE_EX))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);

if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
if (boot_cpu_has(X86_FEATURE_MWAIT) &&
(c->x86_vfm == INTEL_ATOM_GOLDMONT ||
c->x86_vfm == INTEL_LUNARLAKE_M))
set_cpu_bug(c, X86_BUG_MONITOR);

#ifdef CONFIG_X86_64
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kernel/cpu/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void)
{
unsigned int possible = nr_cpu_ids;

/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */
if (!setup_max_cpus || apic_is_disabled)
possible = 1;

/* 'possible_cpus=N' */
Expand All @@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void)

static __init bool restrict_to_up(void)
{
if (!smp_found_config || ioapic_is_disabled)
if (!smp_found_config)
return true;
/*
* XEN PV is special as it does not advertise the local APIC
Expand Down
20 changes: 2 additions & 18 deletions arch/x86/kernel/fpu/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,6 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
return true;
}

/*
* Update the value of PKRU register that was already pushed onto the signal frame.
*/
static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
{
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
return 0;
return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
}

/*
* Signal frame handlers.
*/
Expand Down Expand Up @@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,

static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
{
int err = 0;

if (use_xsave()) {
err = xsave_to_user_sigframe(buf);
if (!err)
err = update_pkru_in_sigframe(buf, pkru);
return err;
}
if (use_xsave())
return xsave_to_user_sigframe(buf, pkru);

if (use_fxsr())
return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
Expand Down
27 changes: 26 additions & 1 deletion arch/x86/kernel/fpu/xstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,28 @@ static inline u64 xfeatures_mask_independent(void)
return fpu_kernel_cfg.independent_features;
}

/*
* Update the value of PKRU register that was already pushed onto the signal frame.
*/
static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
{
u64 xstate_bv;
int err;

if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
return 0;

/* Mark PKRU as in-use so that it is restored correctly. */
xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;

err = __put_user(xstate_bv, &buf->header.xfeatures);
if (err)
return err;

/* Update PKRU value in the userspace xsave buffer. */
return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
}

/* XSAVE/XRSTOR wrapper functions */

#ifdef CONFIG_X86_64
Expand Down Expand Up @@ -256,7 +278,7 @@ static inline u64 xfeatures_need_sigframe_write(void)
* The caller has to zero buf::header before calling this because XSAVE*
* does not touch the reserved fields in the header.
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru)
{
/*
* Include the features which are not xsaved/rstored by the kernel
Expand All @@ -281,6 +303,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();

if (!err)
err = update_pkru_in_sigframe(buf, mask, pkru);

return err;
}

Expand Down
7 changes: 7 additions & 0 deletions arch/x86/kernel/relocate_kernel_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
movq CR0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0

#ifdef CONFIG_KEXEC_JUMP
/* Saved in save_processor_state. */
movq $saved_context, %rax
lgdt saved_context_gdt_desc(%rax)
#endif

movq %rbp, %rax

popf
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/mm/ident_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
if (result)
return result;

set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
}

return 0;
Expand Down Expand Up @@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
if (result)
return result;
if (pgtable_l5_enabled()) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
} else {
/*
* With p4d folded, pgd is equal to p4d.
* The pgd entry has to point to the pud page table in this case.
*/
pud_t *pud = pud_offset(p4d, 0);
set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
}
}

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/pti.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
* Top-level entries added to init_mm's usermode pgd after boot
* will not be automatically propagated to other mms.
*/
if (!pgdp_maps_userspace(pgdp))
if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
return pgd;

/*
Expand Down
14 changes: 8 additions & 6 deletions drivers/base/cacheinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned int cpu)
{
struct cacheinfo *llc;

if (!cache_leaves(cpu))
if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
return false;

llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
Expand Down Expand Up @@ -458,11 +458,9 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}

static inline
int allocate_cache_info(int cpu)
static inline int allocate_cache_info(int cpu)
{
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
sizeof(struct cacheinfo), GFP_ATOMIC);
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
if (!per_cpu_cacheinfo(cpu)) {
cache_leaves(cpu) = 0;
return -ENOMEM;
Expand Down Expand Up @@ -534,7 +532,11 @@ static inline int init_level_allocate_ci(unsigned int cpu)
*/
ci_cacheinfo(cpu)->early_ci_levels = false;

if (cache_leaves(cpu) <= early_leaves)
/*
* Some architectures (e.g., x86) do not use early initialization.
* Allocate memory now in such case.
*/
if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
return 0;

kfree(per_cpu_cacheinfo(cpu));
Expand Down

0 comments on commit 8426226

Please sign in to comment.