Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ps whole regs #2

Open
wants to merge 7 commits into
base: development
Choose a base branch
from
91 changes: 51 additions & 40 deletions target/riscv/insn_trans/trans_rvv.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
*/
#include "tcg/tcg-op-gvec.h"
#include "tcg/tcg-gvec-desc.h"
//#include "exec/plugin-gen.h"
//#include "tcg/tcg-internal.h"
//#include "tcg/tcg-op-common.h"
#include "internals.h"

static inline bool is_overlapped(const int8_t astart, int8_t asize,
Expand Down Expand Up @@ -1100,26 +1103,34 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);

static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
gen_helper_ldst_whole *fn,
DisasContext *s)
uint32_t log2_esz, gen_helper_ldst_whole *fn,
DisasContext *s, bool is_load)
{
TCGv_ptr dest;
TCGv base;
TCGv_i32 desc;

uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
data = FIELD_DP32(data, VDATA, VM, 1);
dest = tcg_temp_new_ptr();
desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
s->cfg_ptr->vlenb, data));

base = get_gpr(s, rs1, EXT_NONE);
tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));
TCGv addr;
addr = get_address(s, rs1, 0);
uint32_t size = s->cfg_ptr->vlenb * nf;

mark_vs_dirty(s);

fn(dest, base, tcg_env, desc);
TCGv_i128 t16 = tcg_temp_new_i128();

/*
* Load/store minimum vlenb bytes per iteration.
* When possible do this atomically.
* Update vstart with the number of processed elements.
*/
for (int i=0; i < size; i+=16) {
addr = get_address(s, rs1, i);
if (is_load) {
tcg_gen_qemu_ld_i128(t16, addr, s->mem_idx,
MO_LE | MO_128 | MO_ATOM_WITHIN16);
tcg_gen_st_i128(t16, tcg_env, vreg_ofs(s, vd) + i);
tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 16 >> log2_esz);
} else {
tcg_gen_ld_i128(t16, tcg_env, vreg_ofs(s, vd) + i);
tcg_gen_qemu_st_i128(t16, addr, s->mem_idx,
MO_LE | MO_128 | MO_ATOM_WITHIN16);
tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 16 >> log2_esz);
}
}
finalize_rvv_inst(s);
return true;
}
Expand All @@ -1128,42 +1139,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
* load and store whole register instructions ignore vtype and vl setting.
* Thus, we don't need to check vill bit. (Section 7.9)
*/
#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \
#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \
static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
{ \
if (require_rvv(s) && \
QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \
return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \
gen_helper_##NAME, s); \
return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \
gen_helper_##NAME, s, IS_LOAD); \
} \
return false; \
}

GEN_LDST_WHOLE_TRANS(vl1re8_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re16_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re32_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re64_v, 1)
GEN_LDST_WHOLE_TRANS(vl2re8_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re16_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re32_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re64_v, 2)
GEN_LDST_WHOLE_TRANS(vl4re8_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re16_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re32_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re64_v, 4)
GEN_LDST_WHOLE_TRANS(vl8re8_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re16_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re32_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re64_v, 8)
GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true)
GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true)
GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true)
GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true)
GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true)
GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true)
GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true)
GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true)
GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true)
GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true)
GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true)
GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true)
GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true)
GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true)
GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true)
GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true)

/*
* The vector whole register store instructions are encoded similar to
* unmasked unit-stride store of elements with EEW=8.
*/
GEN_LDST_WHOLE_TRANS(vs1r_v, 1)
GEN_LDST_WHOLE_TRANS(vs2r_v, 2)
GEN_LDST_WHOLE_TRANS(vs4r_v, 4)
GEN_LDST_WHOLE_TRANS(vs8r_v, 8)
GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false)
GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false)
GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false)
GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false)

/*
*** Vector Integer Arithmetic Instructions
Expand Down