forked from corsix/amx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ldst.c
64 lines (53 loc) · 1.87 KB
/
ldst.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include "emulate.h"
#include <stdio.h>
#define LDST_PAIR (1ull << 62)
static void ld_common(amx_reg* regs, uint64_t operand, uint32_t regmask) {
uint32_t rn = (operand >> 56) & regmask;
const uint8_t* src = (uint8_t*)((operand << 8) >> 8);
memcpy(regs + rn, src, 64);
if (operand & LDST_PAIR) {
memcpy(regs + ((rn + 1) & regmask), src + 64, 64);
}
}
static void st_common(const amx_reg* regs, uint64_t operand, uint32_t regmask) {
uint32_t rn = (operand >> 56) & regmask;
uint8_t* dst = (uint8_t*)((operand << 8) >> 8);
memcpy(dst, regs + rn, 64);
if (operand & LDST_PAIR) {
memcpy(dst + 64, regs + ((rn + 1) & regmask), 64);
}
}
void emulate_AMX_LDX(amx_state* state, uint64_t operand) {
ld_common(state->x, operand, 7);
}
void emulate_AMX_LDY(amx_state* state, uint64_t operand) {
ld_common(state->y, operand, 7);
}
void emulate_AMX_LDZ(amx_state* state, uint64_t operand) {
ld_common(state->z, operand, 63);
}
void emulate_AMX_LDZI(amx_state* state, uint64_t operand) {
uint32_t rn = (operand >> 56) & 63;
uint32_t half = (rn & 1) << 3;
const uint32_t* src = (const uint32_t*)((operand << 8) >> 8);
for (uint32_t i = 0; i < 16; ++i) {
state->z[bit_select(rn, i, 1)].u32[half + (i >> 1)] = src[i];
}
}
void emulate_AMX_STX(amx_state* state, uint64_t operand) {
st_common(state->x, operand, 7);
}
void emulate_AMX_STY(amx_state* state, uint64_t operand) {
st_common(state->y, operand, 7);
}
void emulate_AMX_STZ(amx_state* state, uint64_t operand) {
st_common(state->z, operand, 63);
}
void emulate_AMX_STZI(amx_state* state, uint64_t operand) {
uint32_t rn = (operand >> 56) & 63;
uint32_t half = (rn & 1) << 3;
uint32_t* dst = (uint32_t*)((operand << 8) >> 8);
for (uint32_t i = 0; i < 16; ++i) {
dst[i] = state->z[bit_select(rn, i, 1)].u32[half + (i >> 1)];
}
}