Skip to content

Commit

Permalink
change the way lock prefix is handled in Bochs decoder
Browse files Browse the repository at this point in the history
this solves issue with ALT_MOV_CR0 AMD's feature (where lock mov cr0 is treated as mov cr8)
and also speeds up decoding a bit
  • Loading branch information
Stanislav Shwartsman committed Oct 28, 2024
1 parent ce83913 commit 94df2e8
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 60 deletions.
22 changes: 11 additions & 11 deletions bochs/cpu/decoder/fetchdecode.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,18 @@ BX_CPP_INLINE Bit64u FetchQWORD(const Bit8u *iptr)
}
#endif

#define BX_PREPARE_AMX (0x800)
#define BX_EVEX_VL_IGNORE (0x400 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX_NO_BROADCAST (0x200 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX_NO_SAE (0x100 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX (0x80)
#define BX_PREPARE_OPMASK (0x40)
#define BX_PREPARE_AVX (0x20)
#define BX_PREPARE_SSE (0x10)
#define BX_PREPARE_MMX (0x08)
#define BX_PREPARE_FPU (0x04)
#define BX_LOCKABLE (0x02)
#define BX_PREPARE_AMX (0x400)
#define BX_EVEX_VL_IGNORE (0x200 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX_NO_BROADCAST (0x100 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX_NO_SAE (0x08 | BX_PREPARE_EVEX)

This comment has been minimized.

Copy link
@Vort

Vort Oct 29, 2024

Contributor

@stlintel, is 0x08 intentional here? Looks like typo and 0x80 should be instead.

This comment has been minimized.

Copy link
@stlintel

stlintel Oct 29, 2024

Contributor

You are right, typo !
Fixing it, thanks !

This comment has been minimized.

Copy link
@stlintel

stlintel Oct 29, 2024

Contributor

I had to revert entire thing. I am testing Bochs decoder against Intel's Xed reference decoder and new implementation had some mismatches related to instruction length. They are not critical, usually decide between #GP(0) because of too many opcode bytes and #UD due to misplaced lock prefix. But anyway, I prefer to have no mismatches - they really disturb during debugging new cases.

#define BX_PREPARE_EVEX (0x40)
#define BX_PREPARE_OPMASK (0x20)
#define BX_PREPARE_AVX (0x10)
#define BX_PREPARE_SSE (0x08)
#define BX_PREPARE_MMX (0x04)
#define BX_PREPARE_FPU (0x02)
#define BX_TRACE_END (0x01)
#define BX_LOCKABLE (0x00) // keep for history, nattribute not actually used anymore

struct bxIAOpcodeTable {
#ifndef BX_STANDALONE_DECODER
Expand Down
59 changes: 29 additions & 30 deletions bochs/cpu/decoder/fetchdecode32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,9 @@ int decoder_vex32(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsig
#if BX_SUPPORT_AVX
unsigned rm = 0, mod = 0, nnn = 0;

if (i->getLock())
return(BX_IA_ERROR);

if (sse_prefix)
return(BX_IA_ERROR);

Expand Down Expand Up @@ -2024,6 +2027,9 @@ int decoder_evex32(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsi
#if BX_SUPPORT_EVEX
bool displ8 = false;

if (i->getLock())
return(BX_IA_ERROR);

if (sse_prefix)
return(BX_IA_ERROR);

Expand Down Expand Up @@ -2165,6 +2171,9 @@ int decoder_xop32(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsig
}

#if BX_SUPPORT_AVX
if (i->getLock())
return(BX_IA_ERROR);

// 3 byte XOP prefix
if (sse_prefix)
return(ia_opcode);
Expand Down Expand Up @@ -2237,6 +2246,9 @@ int decoder32_fp_escape(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i,
if (! iptr)
return(-1);

if (i->getLock())
return(BX_IA_ERROR);

i->setFoo((modrm.modrm | (b1 << 8)) & 0x7ff); /* for x87 */

const Bit16u *x87_opmap[8] = {
Expand Down Expand Up @@ -2275,7 +2287,8 @@ int decoder32_modrm(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, uns
(sse_prefix << SSE_PREFIX_OFFSET) |
(i->modC0() ? (1 << MODC0_OFFSET) : 0) |
(modrm.nnn << NNN_OFFSET) |
(modrm.rm << RRR_OFFSET);
(modrm.rm << RRR_OFFSET) |
(i->getLock() << LOCK_PREFIX_OFFSET);
if (i->modC0() && modrm.nnn == modrm.rm)
decmask |= (1 << SRC_EQ_DST_OFFSET);

Expand All @@ -2298,6 +2311,7 @@ int decoder32(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsigned
Bit32u decmask = (i->osize() << OS32_OFFSET) |
(i->asize() << AS32_OFFSET) |
(sse_prefix << SSE_PREFIX_OFFSET) |
(i->getLock() << LOCK_PREFIX_OFFSET) |
(1 << MODC0_OFFSET);
if (nnn == rm)
decmask |= (1 << SRC_EQ_DST_OFFSET);
Expand Down Expand Up @@ -2332,6 +2346,7 @@ int decoder_creg32(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsi
Bit32u decmask = (i->osize() << OS32_OFFSET) |
(i->asize() << AS32_OFFSET) |
(sse_prefix << SSE_PREFIX_OFFSET) |
(i->getLock() << LOCK_PREFIX_OFFSET) |
(i->modC0() ? (1 << MODC0_OFFSET) : 0) |
(nnn << NNN_OFFSET) |
(rm << RRR_OFFSET);
Expand Down Expand Up @@ -2362,6 +2377,9 @@ int decoder32_3dnow(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, uns
return(-1);
}

if (i->getLock())
return(BX_IA_ERROR);

ia_opcode = Bx3DNowOpcode[i->modRMForm.Ib[0]];

assign_srcs(i, ia_opcode, modrm.nnn, modrm.rm);
Expand Down Expand Up @@ -2516,6 +2534,9 @@ int fetchDecode32(const Bit8u *iptr, bool is_32, bxInstruction_c *i, unsigned re
i->setCetSegOverride(seg_override_cet);
#endif

if (lock)
i->setLock();

i->modRMForm.Id = 0;

BxOpcodeDecodeDescriptor32 *decode_descriptor = &decode32_descriptor[b1];
Expand All @@ -2530,27 +2551,15 @@ int fetchDecode32(const Bit8u *iptr, bool is_32, bxInstruction_c *i, unsigned re
if (! BX_NULL_SEG_REG(seg_override))
i->setSeg(seg_override);

Bit32u op_flags = BxOpcodesTable[ia_opcode].opflags;

if (lock) {
i->setLock();
// lock prefix not allowed or destination operand is not memory
if (i->modC0() || !(op_flags & BX_LOCKABLE)) {
#if BX_CPU_LEVEL >= 6
if ((op_flags & BX_LOCKABLE) != 0) {
if (ia_opcode == BX_IA_MOV_CR0Rd)
i->setSrcReg(0, 8); // extend CR0 -> CR8
else if (ia_opcode == BX_IA_MOV_RdCR0)
i->setSrcReg(1, 8); // extend CR0 -> CR8
else
i->setIaOpcode(BX_IA_ERROR); // replace execution function with undefined-opcode
}
// lock prefix not allowed if destination operand is not memory
if (i->modC0()) {
if (ia_opcode == BX_IA_ALT_MOV_CR0Rd)
i->setSrcReg(0, 8); // extend CR0 -> CR8
else if (ia_opcode == BX_IA_ALT_MOV_RdCR0)
i->setSrcReg(1, 8); // extend CR0 -> CR8
else
#endif
{
// replace execution function with undefined-opcode
i->setIaOpcode(BX_IA_ERROR);
}
i->setIaOpcode(BX_IA_ERROR); // replace execution function with undefined-opcode
}
}

Expand Down Expand Up @@ -2746,16 +2755,6 @@ void BX_CPU_C::init_FetchDecodeTables(void)
BxOpcodesTable[BX_IA_TZCNT_GdEd] = BxOpcodesTable[BX_IA_BSF_GdEd];
#if BX_SUPPORT_X86_64
BxOpcodesTable[BX_IA_TZCNT_GqEq] = BxOpcodesTable[BX_IA_BSF_GqEq];
#endif
}

// handle lock MOV CR0 AMD extension
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_ALT_MOV_CR8)) {
BxOpcodesTable[BX_IA_MOV_CR0Rd].opflags |= BX_LOCKABLE;
BxOpcodesTable[BX_IA_MOV_RdCR0].opflags |= BX_LOCKABLE;
#if BX_SUPPORT_X86_64
BxOpcodesTable[BX_IA_MOV_CR0Rq].opflags |= BX_LOCKABLE;
BxOpcodesTable[BX_IA_MOV_RqCR0].opflags |= BX_LOCKABLE;
#endif
}
}
Expand Down
45 changes: 28 additions & 17 deletions bochs/cpu/decoder/fetchdecode64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,9 @@ int decoder_vex64(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsig
// VEX
assert((b1 & ~0x1) == 0xc4);

if (i->getLock())
return(BX_IA_ERROR);

if (sse_prefix | rex_prefix)
return(BX_IA_ERROR);

Expand Down Expand Up @@ -1309,6 +1312,9 @@ int decoder_evex64(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsi
// EVEX prefix 0x62
assert(b1 == 0x62);

if (i->getLock())
return(BX_IA_ERROR);

if (sse_prefix | rex_prefix)
return(BX_IA_ERROR);

Expand Down Expand Up @@ -1477,6 +1483,9 @@ int decoder_xop64(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsig
unsigned b2 = 0;
bool vex_w = 0;

if (i->getLock())
return(BX_IA_ERROR);

if (sse_prefix | rex_prefix)
return(ia_opcode);

Expand Down Expand Up @@ -1575,6 +1584,9 @@ int decoder64_fp_escape(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i,
if (! iptr)
return(-1);

if (i->getLock())
return(BX_IA_ERROR);

i->setFoo((modrm.modrm | (b1 << 8)) & 0x7ff); /* for x87 */

const Bit16u *x87_opmap[8] = {
Expand Down Expand Up @@ -1611,6 +1623,7 @@ int decoder64_modrm(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, uns
(i->asize() << AS32_OFFSET) |
(sse_prefix << SSE_PREFIX_OFFSET) |
(i->modC0() ? (1 << MODC0_OFFSET) : 0) |
(i->getLock() << LOCK_PREFIX_OFFSET) |
((modrm.nnn & 0x7) << NNN_OFFSET) |
((modrm.rm & 0x7) << RRR_OFFSET);
if (i->modC0() && modrm.nnn == modrm.rm)
Expand Down Expand Up @@ -1685,6 +1698,7 @@ int decoder_creg64(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, unsi
(i->osize() << OS32_OFFSET) |
(i->asize() << AS32_OFFSET) |
(sse_prefix << SSE_PREFIX_OFFSET) |
(i->getLock() << LOCK_PREFIX_OFFSET) |
(1 << MODC0_OFFSET) |
((nnn & 0x7) << NNN_OFFSET) |
((rm & 0x7) << RRR_OFFSET);
Expand Down Expand Up @@ -1714,6 +1728,9 @@ int decoder64_3dnow(const Bit8u *iptr, unsigned &remain, bxInstruction_c *i, uns
return(-1);
}

if (i->getLock())
return(BX_IA_ERROR);

ia_opcode = Bx3DNowOpcode[i->modRMForm.Ib[0]];

assign_srcs(i, ia_opcode, modrm.nnn, modrm.rm);
Expand Down Expand Up @@ -2006,6 +2023,9 @@ int fetchDecode64(const Bit8u *iptr, bxInstruction_c *i, unsigned remainingInPag
i->setCetSegOverride(seg_override_cet);
#endif

if (lock)
i->setLock();

i->modRMForm.Id = 0;

BxOpcodeDecodeDescriptor64 *decode_descriptor = &decode64_descriptor[b1];
Expand All @@ -2020,24 +2040,15 @@ int fetchDecode64(const Bit8u *iptr, bxInstruction_c *i, unsigned remainingInPag
if (seg_override == BX_SEG_REG_FS || seg_override == BX_SEG_REG_GS)
i->setSeg(seg_override);

Bit32u op_flags = BxOpcodesTable[ia_opcode].opflags;

if (lock) {
i->setLock();
// lock prefix not allowed or destination operand is not memory
if (i->modC0() || !(op_flags & BX_LOCKABLE)) {
if ((op_flags & BX_LOCKABLE) != 0) {
if (ia_opcode == BX_IA_MOV_CR0Rq)
i->setSrcReg(0, 8); // extend CR0 -> CR8
else if (ia_opcode == BX_IA_MOV_RqCR0)
i->setSrcReg(1, 8); // extend CR0 -> CR8
else
i->setIaOpcode(BX_IA_ERROR); // replace execution function with undefined-opcode
}
else {
// replace execution function with undefined-opcode
i->setIaOpcode(BX_IA_ERROR);
}
// lock prefix not allowed if destination operand is not memory
if (i->modC0()) {
if (ia_opcode == BX_IA_ALT_MOV_CR0Rq)
i->setSrcReg(0, 8); // extend CR0 -> CR8
else if (ia_opcode == BX_IA_ALT_MOV_RqCR0)
i->setSrcReg(1, 8); // extend CR0 -> CR8
else
i->setIaOpcode(BX_IA_ERROR); // replace execution function with undefined-opcode
}
}

Expand Down
4 changes: 4 additions & 0 deletions bochs/cpu/decoder/fetchdecode_opmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,7 @@ static const Bit64u BxOpcodeTableMultiByteNOP[] = { last_opcode(0, BX_IA_NOP) };

// opcode 0F 20
static const Bit64u BxOpcodeTable0F20_32[] = {
form_opcode_lockable(ATTR_NNN0 | ATTR_LOCK, BX_IA_ALT_MOV_RdCR0),
form_opcode(ATTR_NNN0, BX_IA_MOV_RdCR0),
form_opcode(ATTR_NNN2, BX_IA_MOV_RdCR2),
form_opcode(ATTR_NNN3, BX_IA_MOV_RdCR3),
Expand All @@ -1711,6 +1712,7 @@ static const Bit64u BxOpcodeTable0F20_32[] = {

#if BX_SUPPORT_X86_64
static const Bit64u BxOpcodeTable0F20_64[] = {
form_opcode_lockable(ATTR_NNN0 | ATTR_LOCK, BX_IA_ALT_MOV_RqCR0),
form_opcode(ATTR_NNN0, BX_IA_MOV_RqCR0),
form_opcode(ATTR_NNN2, BX_IA_MOV_RqCR2),
form_opcode(ATTR_NNN3, BX_IA_MOV_RqCR3),
Expand All @@ -1726,6 +1728,7 @@ static const Bit64u BxOpcodeTable0F21_64[] = { last_opcode(0, BX_IA_MOV_RqDq) };

// opcode 0F 22
static const Bit64u BxOpcodeTable0F22_32[] = {
form_opcode_lockable(ATTR_NNN0 | ATTR_LOCK, BX_IA_ALT_MOV_CR0Rd),
form_opcode(ATTR_NNN0, BX_IA_MOV_CR0Rd),
form_opcode(ATTR_NNN2, BX_IA_MOV_CR2Rd),
form_opcode(ATTR_NNN3, BX_IA_MOV_CR3Rd),
Expand All @@ -1734,6 +1737,7 @@ static const Bit64u BxOpcodeTable0F22_32[] = {

#if BX_SUPPORT_X86_64
static const Bit64u BxOpcodeTable0F22_64[] = {
form_opcode_lockable(ATTR_NNN0 | ATTR_LOCK, BX_IA_ALT_MOV_CR0Rq),
form_opcode(ATTR_NNN0, BX_IA_MOV_CR0Rq),
form_opcode(ATTR_NNN2, BX_IA_MOV_CR2Rq),
form_opcode(ATTR_NNN3, BX_IA_MOV_CR3Rq),
Expand Down
4 changes: 4 additions & 0 deletions bochs/cpu/decoder/ia_opcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -479,10 +479,12 @@ bx_define_opcode(BX_IA_LTR_Ew, "ltr", "ltr", &BX_CPU_C::LTR_Ew, &BX_CPU_C::LTR_E
bx_define_opcode(BX_IA_SMSW_Ew, "smsw", "smsw", &BX_CPU_C::SMSW_EwM, &BX_CPU_C::SMSW_EwR, 0, OP_Ew, OP_NONE, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_LMSW_Ew, "lmsw", "lmsw", &BX_CPU_C::LMSW_Ew, &BX_CPU_C::LMSW_Ew, 0, OP_NONE, OP_Ew, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_CR0Rd, "mov", "movl", NULL, &BX_CPU_C::MOV_CR0Rd, 0, OP_Cd, OP_Ed, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_ALT_MOV_CR0Rd, "mov", "movl", NULL, &BX_CPU_C::MOV_CR0Rd, BX_ISA_ALT_MOV_CR8, OP_Cd, OP_Ed, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_CR2Rd, "mov", "movl", NULL, &BX_CPU_C::MOV_CR2Rd, 0, OP_Cd, OP_Ed, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_CR3Rd, "mov", "movl", NULL, &BX_CPU_C::MOV_CR3Rd, 0, OP_Cd, OP_Ed, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_CR4Rd, "mov", "movl", NULL, &BX_CPU_C::MOV_CR4Rd, BX_ISA_PENTIUM, OP_Cd, OP_Ed, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_RdCR0, "mov", "movl", NULL, &BX_CPU_C::MOV_RdCR0, 0, OP_Ed, OP_Cd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_ALT_MOV_RdCR0, "mov", "movl", NULL, &BX_CPU_C::MOV_RdCR0, BX_ISA_ALT_MOV_CR8, OP_Ed, OP_Cd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RdCR2, "mov", "movl", NULL, &BX_CPU_C::MOV_RdCR2, 0, OP_Ed, OP_Cd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RdCR3, "mov", "movl", NULL, &BX_CPU_C::MOV_RdCR3, 0, OP_Ed, OP_Cd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RdCR4, "mov", "movl", NULL, &BX_CPU_C::MOV_RdCR4, BX_ISA_PENTIUM, OP_Ed, OP_Cd, OP_NONE, OP_NONE, 0)
Expand Down Expand Up @@ -1704,10 +1706,12 @@ bx_define_opcode(BX_IA_CVTSD2SI_GqWsd, "cvtsd2si", "cvtsd2siq", &BX_CPU_C::LOAD_
bx_define_opcode(BX_IA_MOVNTI_Op64_MdGd, "movnti", "movnti", &BX_CPU_C::MOV64_EdGdM, &BX_CPU_C::BxError, BX_ISA_SSE2, OP_Ed, OP_Gd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOVNTI_MqGq, "movnti", "movntiq", &BX_CPU_C::MOV_EqGqM, &BX_CPU_C::BxError, BX_ISA_SSE2, OP_Eq, OP_Gq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_CR0Rq, "mov", "movq", NULL, &BX_CPU_C::MOV_CR0Rq, 0, OP_Cq, OP_Eq, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_ALT_MOV_CR0Rq, "mov", "movq", NULL, &BX_CPU_C::MOV_CR0Rq, BX_ISA_ALT_MOV_CR8, OP_Cq, OP_Eq, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_CR2Rq, "mov", "movq", NULL, &BX_CPU_C::MOV_CR2Rq, 0, OP_Cq, OP_Eq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_CR3Rq, "mov", "movq", NULL, &BX_CPU_C::MOV_CR3Rq, 0, OP_Cq, OP_Eq, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_CR4Rq, "mov", "movq", NULL, &BX_CPU_C::MOV_CR4Rq, 0, OP_Cq, OP_Eq, OP_NONE, OP_NONE, BX_TRACE_END)
bx_define_opcode(BX_IA_MOV_RqCR0, "mov", "movq", NULL, &BX_CPU_C::MOV_RqCR0, 0, OP_Eq, OP_Cq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_ALT_MOV_RqCR0, "mov", "movq", NULL, &BX_CPU_C::MOV_RqCR0, BX_ISA_ALT_MOV_CR8, OP_Eq, OP_Cq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RqCR2, "mov", "movq", NULL, &BX_CPU_C::MOV_RqCR2, 0, OP_Eq, OP_Cq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RqCR3, "mov", "movq", NULL, &BX_CPU_C::MOV_RqCR3, 0, OP_Eq, OP_Cq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_RqCR4, "mov", "movq", NULL, &BX_CPU_C::MOV_RqCR4, 0, OP_Eq, OP_Cq, OP_NONE, OP_NONE, 0)
Expand Down
4 changes: 2 additions & 2 deletions bochs/cpu/decoder/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,8 @@ class bxInstruction_c {
BX_CPP_INLINE void setLock(void) {
setLockRepUsed(BX_LOCK_PREFIX_USED);
}
BX_CPP_INLINE bool getLock(void) const {
return lockRepUsedValue() == BX_LOCK_PREFIX_USED;
BX_CPP_INLINE int getLock(void) const {
return (lockRepUsedValue() == BX_LOCK_PREFIX_USED);
}

BX_CPP_INLINE unsigned getVL(void) const {
Expand Down

0 comments on commit 94df2e8

Please sign in to comment.