diff --git a/.gitmodules b/.gitmodules index b9afb32fc..1b0fced25 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "frontend/libpicofe"] path = frontend/libpicofe url = https://github.com/notaz/libpicofe.git +[submodule "deps/lightning/gnulib"] + active = false + path = deps/lightning/gnulib + url = git://git.sv.gnu.org/gnulib.git diff --git a/deps/lightning/.gitmodules b/deps/lightning/.gitmodules index e69de29bb..acb26693d 100644 --- a/deps/lightning/.gitmodules +++ b/deps/lightning/.gitmodules @@ -0,0 +1,3 @@ +[submodule "gnulib"] + path = gnulib + url = git://git.sv.gnu.org/gnulib.git diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index 55cc9e6bb..2535adc85 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -4,9 +4,9 @@ ; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme ; [subrepo] - remote = https://github.com/pcercuei/gnu_lightning.git - branch = pcsx_rearmed - commit = de026794c71386983034461bce2df3c63ccd5827 - parent = fb67ea334b0f3984a114a6e306806a56347a83ba + remote = https://git.savannah.gnu.org/git/lightning.git + branch = master + commit = 808fdde9e81cc1f43fd3ef3b01d24744c18bc123 + parent = dde06e44db790da43b379ff3ef74accb15c3586e method = merge cmdver = 0.4.6 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index 8a5588afd..5d107f253 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,40 @@ +2024-01-24 Paulo Andrade + + * check/Makefile.am: Add new ldstxbar test. + * check/all.tst: Add simple code to disassemble new codes. + * check/lightning.c: Add logic to call the new codes. + * doc/body.texi: Document the new codes and remove note about + only an immediate displacement argument supported. + * include/lightning.h.in: Add the new {ld,st}x{b,a}r_* codes. + * lib/jit_names.c: Add debug string for the new codes. + * lib/lightning.c: Implement fallbacks for the new codes. + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha.c, + lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c, + lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-sz.c, + lib/jit_mips.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-sz.c, + lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, + lib/jit_x86-sz.c, lib/jit_x86.c: Implement the new increment load + and store codes with a register displacement. + +2023-12-22 Paulo Andrade + + * check/Makefile.am: Add new ldstxbai test. + * check/all.tst: Add simple code to disassemble new codes. + * check/lightning.c: Add logic to call the new codes. + * doc/body.texi: Document the new codes. + * include/lightning.h.in: Add the new {ld,st}x{b,a}i_* codes. + * include/lightning/jit_private.h: Add jit_cc_a1_dep to tell + the instruction has argument one used as input and output. + * lib/lightning.c: Implement fallbacks for the new codes. + * lib/jit_names.c: Add debug string for the new codes. + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha.c, + lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c, + lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-sz.c, + lib/jit_mips.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-sz.c, + lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, + lib/jit_x86-sz.c, lib/jit_x86.c: Implement the new increment load + and store codes. + 2023-08-21 Paulo Andrade * check/Makefile.am, check/lightning.c: Add new hmul tests. diff --git a/deps/lightning/TODO b/deps/lightning/TODO index 8b1378917..72a0d02cd 100644 --- a/deps/lightning/TODO +++ b/deps/lightning/TODO @@ -1 +1,2 @@ - +o Use PC relative load/store in aarch64 +o Check post-index in real arm hardware diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am index 1f086eff7..d43549ded 100644 --- a/deps/lightning/check/Makefile.am +++ b/deps/lightning/check/Makefile.am @@ -18,7 +18,7 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -D_GNU_SOURCE $(LIGHTNING_CFLAGS) check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \ - catomic protect riprel cbit callee + catomic protect riprel cbit callee cldstxba lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c @@ -67,7 +67,11 @@ cbit_SOURCES = cbit.c callee_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) callee_SOURCES = callee.c +cldstxba_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +cldstxba_SOURCES = cldstxba.c + noinst_PROGRAMS = gen_cbit +gen_cgit_LDADD = gen_cbit_SOURCES = gen_cbit.c cbit.c: gen_cbit @@ -94,6 +98,8 @@ EXTRA_DIST = \ ldstr-c.tst ldstr-c.ok \ ldstxr-c.tst ldstxr-c.ok \ ldstxi-c.tst ldstxi-c.ok \ + ldstxbai.tst ldstxbai.ok \ + ldstxbar.tst ldstxbar.ok \ ext.tst ext.ok \ cvt.tst cvt.ok \ hton.tst hton.ok \ @@ -162,8 +168,9 @@ base_TESTS = \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ - ext cvt hton bswap branch \ - alu_add alux_add \ + ldstxbai ldstxbar \ + ext cvt hton bswap \ + branch alu_add alux_add \ alu_sub alux_sub alu_rsb \ alu_mul alu_hmul \ alu_div alu_rem \ @@ -193,6 +200,7 @@ x87_TESTS = \ rpn.x87 ldstr.x87 ldsti.x87 \ ldstxr.x87 ldstxi.x87 \ ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87 \ + ldstxbai.x87 ldstxbar.x87 \ ext.x87 cvt.x87 branch.x87 \ alu_add.x87 alux_add.x87 \ alu_sub.x87 alux_sub.x87 alu_rsb.x87 \ @@ -245,6 +253,7 @@ arm_TESTS = \ rpn.arm ldstr.arm ldsti.arm \ ldstxr.arm ldstxi.arm \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ + ldstxbai.arm ldstxbar.arm \ ext.arm cvt.arm hton.arm bswap.arm \ branch.arm alu_add.arm alux_add.arm \ alu_sub.arm alux_sub.arm alu_rsb.arm \ @@ -273,6 +282,7 @@ swf_TESTS = \ rpn.swf ldstr.swf ldsti.swf \ ldstxr.swf ldstxi.swf \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ + ldstxbai.swf ldstxbar.swf \ ext.swf cvt.swf hton.swf bswap.swf \ branch.swf alu_add.swf alux_add.swf \ alu_sub.swf alux_sub.swf alu_rsb.swf \ @@ -299,6 +309,7 @@ arm_swf_TESTS = \ rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \ ldstxr.arm.swf ldstxi.arm.swf \ ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \ + ldstxbai.arm.swf ldstxbar.arm.swf \ ext.arm.swf cvt.arm.swf hton.arm.swf bswap.arm.swf \ branch.arm.swf alu_add.arm.swf alux_add.arm.swf \ alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \ @@ -325,6 +336,7 @@ arm4_swf_TESTS = \ rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \ ldstxr.arm4.swf ldstxi.arm4.swf \ ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \ + ldstxbai.arm4.swf ldstxbar.arm4.swf \ ext.arm4.swf cvt.arm4.swf hton.arm4.swf bswap.arm4.swf \ branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf \ alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \ @@ -354,6 +366,7 @@ nodata_TESTS = \ rpn.nodata ldstr.nodata ldsti.nodata \ ldstxr.nodata ldstxi.nodata \ ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ + ldstxbai.nodata ldstxbar.nodata \ ext.nodata cvt.nodata branch.nodata \ alu_add.nodata alux_add.nodata \ alu_sub.nodata alux_sub.nodata alu_rsb.nodata \ diff --git a/deps/lightning/check/all.tst b/deps/lightning/check/all.tst index 2257ac63b..a63e4896e 100644 --- a/deps/lightning/check/all.tst +++ b/deps/lightning/check/all.tst @@ -198,6 +198,43 @@ ldxr_l %r0 %r1 %r2 ldxi_l %r0 %r1 8 #endif + ldxbr_c %r0 %r1 %r2 + ldxbi_c %r0 %r1 1 + ldxar_c %r0 %r1 %r2 + ldxai_c %r0 %r1 1 + ldxbr_uc %r0 %r1 %r2 + ldxbi_uc %r0 %r1 1 + ldxar_uc %r0 %r1 %r2 + ldxai_uc %r0 %r1 1 + ldxbr_s %r0 %r1 %r2 + ldxbi_s %r0 %r1 2 + ldxar_s %r0 %r1 %r2 + ldxai_s %r0 %r1 2 + ldxbr_us %r0 %r1 %r2 + ldxbi_us %r0 %r1 2 + ldxar_us %r0 %r1 %r2 + ldxai_us %r0 %r1 2 + ldxbr_i %r0 %r1 %r2 + ldxbi_i %r0 %r1 4 + ldxar_i %r0 %r1 %r2 + ldxai_i %r0 %r1 4 +#if __WORDSIZE == 64 + ldxbr_ui %r0 %r1 %r2 + ldxbi_ui %r0 %r1 4 + ldxar_ui %r0 %r1 %r2 + ldxai_ui %r0 %r1 4 + ldxbi_l %r0 %r1 8 + ldxbr_l %r0 %r1 %r2 + ldxai_l %r0 %r1 8 +#endif + ldxbr_f %f0 %r1 %r2 + ldxbi_f %f0 %r1 4 + ldxar_f %f0 %r1 %r2 + ldxai_f %f0 %r1 4 + ldxbr_d %f0 %r1 %r2 + ldxbi_d %f0 %r1 8 + ldxar_d %f0 %r1 %r2 + ldxai_d %f0 %r1 8 str_c %r1 %r0 sti_c 0x80000000 %r1 str_s %r1 %r0 @@ -218,6 +255,32 @@ stxr_l %r2 %r1 %r0 stxi_l 8 %r1 %r0 #endif + stxbr_c %r2 %r1 %r0 + stxbi_c 1 %r1 %r0 + stxar_c %r2 %r1 %r0 + stxai_c 1 %r1 %r0 + stxbr_s %r2 %r1 %r0 + stxbi_s 2 %r1 %r0 + stxar_s %r2 %r1 %r0 + stxai_s 2 %r1 %r0 + stxbr_i %r2 %r1 %r0 + stxbi_i 4 %r1 %r0 + stxar_i %r2 %r1 %r0 + stxai_i 4 %r1 %r0 +#if __WORDSIZE == 64 + stxbr_l %r2 %r1 %r0 + stxbi_l 8 %r1 %r0 + stxar_l %r2 %r1 %r0 + stxai_l 8 %r1 %r0 +#endif + stxbr_f %r2 %r0 %f0 + stxbi_f 4 %r0 %f0 + stxar_f %r2 %r0 %f0 + stxai_f 4 %r0 %f0 + stxbr_d %r2 %r0 %f0 + stxbi_d 8 %r0 %f0 + stxar_d %r2 %r0 %f0 + stxai_d 8 %r0 %f0 cond: bltr cond %r0 %r1 condi: diff --git a/deps/lightning/check/carry.tst b/deps/lightning/check/carry.tst index 180d896fc..615d964fc 100644 --- a/deps/lightning/check/carry.tst +++ b/deps/lightning/check/carry.tst @@ -136,6 +136,8 @@ ok: fadd(__LINE__, , r0, r1, x7fe, x1, x7f) tsub(__LINE__, , r0, r1, x80, x1, x7f) fsub(__LINE__, , r0, r1, x7f, x1, x7fe) + fsub(__LINE__, , r0, r1, x0, x1, ff) + fsub(__LINE__, , r0, r1, ff, ff, x0) tadd(__LINE__, _u, r0, r1, ff, x1, x0) fadd(__LINE__, _u, r0, r1, x7f, x1, x80) tsub(__LINE__, _u, r0, r1, x0, x1, ff) diff --git a/deps/lightning/check/cldstxba.c b/deps/lightning/check/cldstxba.c new file mode 100644 index 000000000..c9dd6e8f9 --- /dev/null +++ b/deps/lightning/check/cldstxba.c @@ -0,0 +1,224 @@ +#include +#include + +#if !defined(offsetof) +# define offsetof(type, field) ((char *)&((type *)0)->field - (char *)0) +#endif + +int +main(int argc, char *argv[]) +{ + jit_state_t *_jit; + jit_node_t *jmp, *fail; + void (*code)(void); +#if defined(__x86_64__) || defined(__i386__) + /* test lodsb stosb special cases */ + struct data_t { + signed char sc; + unsigned char uc; + signed short ss; + unsigned short us; + signed int si; + unsigned int ui; + unsigned long ul; + } data; + + + init_jit(argv[0]); + _jit = jit_new_state(); + jit_prolog(); + fail = jit_forward(); + +#define SC_VAL -3 + jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, sc)); + jit_movi(_RAX, SC_VAL); + jit_movr(_RSI, _RDI); + jit_stxai_c(1, _RDI, _RAX); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, 1); + jit_patch_at(jmp, fail); + data.uc = 0xa3; + +#define SS_VAL -31 + jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ss)); + jit_movi(_RAX, SS_VAL); + jit_movr(_RSI, _RDI); + jit_stxai_s(2, _RDI, _RAX); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, 2); + jit_patch_at(jmp, fail); + data.us = 0x5aa5; + +#define SI_VAL -511 + jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, si)); + jit_movi(_RAX, SI_VAL); + jit_movr(_RSI, _RDI); + jit_stxai_i(4, _RDI, _RAX); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, 4); + jit_patch_at(jmp, fail); + data.ui = 0xabcddcba; + +# if __X64 && !__X64_32 +#define UL_VAL 0x123456789abcdef + jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ul)); + jit_movi(_RAX, UL_VAL); + jit_movr(_RSI, _RDI); + jit_stxai_l(8, _RDI, _RAX); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, 8); + jit_patch_at(jmp, fail); +# endif + + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, sc)); + jit_movr(_RDI, _RSI); + jit_ldxai_c(_RAX, _RSI, 1); + jmp = jit_bnei(_RAX, SC_VAL); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -1); + jit_patch_at(jmp, fail); + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, uc)); + jit_movr(_RDI, _RSI); + jit_ldxai_uc(_RAX, _RSI, 1); + jmp = jit_bnei(_RAX, data.uc); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -1); + jit_patch_at(jmp, fail); + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ss)); + jit_movr(_RDI, _RSI); + jit_ldxai_s(_RAX, _RSI, 2); + jmp = jit_bnei(_RAX, SS_VAL); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -2); + jit_patch_at(jmp, fail); + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, us)); + jit_movr(_RDI, _RSI); + jit_ldxai_us(_RAX, _RSI, 2); + jmp = jit_bnei(_RAX, data.us); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -2); + jit_patch_at(jmp, fail); + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, si)); + jit_movr(_RDI, _RSI); + jit_ldxai_i(_RAX, _RSI, 4); + jmp = jit_bnei(_RAX, SI_VAL); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -4); + jit_patch_at(jmp, fail); +# if __X64 && !__X64_32 + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ui)); + jit_movr(_RDI, _RSI); + jit_ldxai_ui(_RAX, _RSI, 4); + jmp = jit_bnei(_RAX, data.ui); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -4); + jit_patch_at(jmp, fail); + jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ul)); + jit_movr(_RDI, _RSI); + jit_ldxai_l(_RAX, _RSI, 8); + jmp = jit_bnei(_RAX, UL_VAL); + jit_patch_at(jmp, fail); + jit_subr(_RDI, _RDI, _RSI); + jmp = jit_bnei(_RDI, -8); + jit_patch_at(jmp, fail); +# endif + + jmp = jit_jmpi(); + jit_link(fail); + jit_calli(abort); + jit_patch(jmp); + jit_prepare(); + { + jit_pushargi((jit_word_t)"ok"); + } + jit_finishi(puts); + jit_ret(); + jit_epilog(); + code = jit_emit(); + jit_clear_state(); + + (*code)(); + + jit_destroy_state(); + finish_jit(); + +#elif defined(__arm__) + /* make sure to test ldmia and stmia cases */ + struct data_t { + float f1; + float f2; + double d3; + double d4; + } data; + + init_jit(argv[0]); + _jit = jit_new_state(); + jit_prolog(); + fail = jit_forward(); + +#define F1_VAL 1 + jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, f1)); + jit_movi_f(JIT_F0, F1_VAL); + jit_movr(JIT_R1, JIT_R0); + jit_stxai_f(4, JIT_R0, JIT_F0); + jit_subr(JIT_R1, JIT_R0, JIT_R1); + jmp = jit_bnei(JIT_R1, 4); + jit_patch_at(jmp, fail); + data.f2 = 2; +#define D3_VAL 3 + jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, d3)); + jit_movi_d(JIT_F0, D3_VAL); + jit_movr(JIT_R1, JIT_R0); + jit_stxai_d(8, JIT_R0, JIT_F0); + jit_subr(JIT_R1, JIT_R0, JIT_R1); + jmp = jit_bnei(JIT_R1, 8); + jit_patch_at(jmp, fail); + data.d4 = 4; + + jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, f1)); + jit_movr(JIT_R1, JIT_R0); + jit_ldxai_f(JIT_F0, JIT_R0, 4); + jmp = jit_bnei_f(JIT_F0, F1_VAL); + jit_patch_at(jmp, fail); + jit_subr(JIT_R1, JIT_R0, JIT_R1); + jmp = jit_bnei(JIT_R1, 4); + jit_patch_at(jmp, fail); + + jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, d3)); + jit_movr(JIT_R1, JIT_R0); + jit_ldxai_d(JIT_F0, JIT_R0, 8); + jmp = jit_bnei_d(JIT_F0, D3_VAL); + jit_patch_at(jmp, fail); + jit_subr(JIT_R1, JIT_R0, JIT_R1); + jmp = jit_bnei(JIT_R1, 8); + jit_patch_at(jmp, fail); + + jmp = jit_jmpi(); + jit_link(fail); + jit_calli(abort); + jit_patch(jmp); + jit_prepare(); + { + jit_pushargi((jit_word_t)"ok"); + } + jit_finishi(puts); + jit_ret(); + jit_epilog(); + code = jit_emit(); + jit_clear_state(); + + (*code)(); + + jit_destroy_state(); + finish_jit(); +#else + puts("ok"); +#endif + return (0); +} diff --git a/deps/lightning/check/float.tst b/deps/lightning/check/float.tst index 69a6cafa8..a181f8441 100644 --- a/deps/lightning/check/float.tst +++ b/deps/lightning/check/float.tst @@ -14,14 +14,14 @@ ok: # define x80 0x8000000000000000 #endif -#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv +#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv || __sh__ # define wnan x7f #elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__ # define wnan 0 #else # define wnan x80 #endif -#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__ +#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__ || __sh__ # define wpinf x7f #elif __alpha__ /* (at least) bug compatible with gcc 4.2.3 -ieee */ @@ -49,12 +49,12 @@ T##op##r##t##r0##f0##f1##l: \ b##op##r##t bT##op##r##t##r0##f0##f1##l %f0 %f1 \ calli @abort \ bT##op##r##t##r0##f0##f1##l: \ - movi##t %f1 li \ + movi##t %f0 li \ op##i##t %r0 %f0 ri \ bnei T##op##i##t##r0##f0##f1##l %r0 0 \ calli @abort \ T##op##i##t##r0##f0##f1##l: \ - movi##t %f1 li \ + movi##t %f0 li \ b##op##i##t bT##op##i##t##r0##f0##f1##l %f0 ri \ calli @abort \ bT##op##i##t##r0##f0##f1##l: \ @@ -64,7 +64,7 @@ bT##op##i##t##r0##f0##f1##l: \ beqi F##op##r##t##r0##f0##f1##l %r0 1 \ calli @abort \ F##op##r##t##r0##f0##f1##l: \ - movi##t %f1 li \ + movi##t %f0 li \ op##i##t %r0 %f0 ri \ beqi F##op##i##t##r0##f0##f1##l %r0 1 \ calli @abort \ diff --git a/deps/lightning/check/ldstxbai.ok b/deps/lightning/check/ldstxbai.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/deps/lightning/check/ldstxbai.ok @@ -0,0 +1 @@ +ok diff --git a/deps/lightning/check/ldstxbai.tst b/deps/lightning/check/ldstxbai.tst new file mode 100644 index 000000000..f23dd689f --- /dev/null +++ b/deps/lightning/check/ldstxbai.tst @@ -0,0 +1,444 @@ +.data 256 + +/* + #define offs(field) (offsetof(data_t, field) - offsetof(data_t, si8)) + */ +#define nF32 -36 // offs(nf32) +#define nF64 -32 // offs(nf64) +#define nSI64 -24 // offs(nsi64) +#define nUI32 -16 // offs(nui32) +#define nSI32 -12 // offs(nsi32) +#define nUI16 -6 // offs(nui16) +#define nSI16 -4 // offs(nsi16) +#define nUI8 -2 // offs(nui8) +#define nSI8 -1 // offs(nsi8) +#define SI8 0 // offs(si8) +#define UI8 1 // offs(ui8) +#define SI16 2 // offs(si18) +#define UI16 4 // offs(ui16) +#define SI32 8 // offs(si32) +#define UI32 12 // offs(ui32) +#define SI64 16 // offs(si64) +#define F64 24 // offs(f64) +#define F32 32 // offs(f32) + +/* + typedef struct { + int32_t _pad0; + float32_t nf32; + float64_t nf64; + int64_t nsi64; + uint32_t nui32; + int32_t nsi32; + short _pad1; + uint16_t nui16; + int16_6 nsi16; + uint8_t nui8; + int8_t nsi8; + int8_t si8; + uint8_t ui8; + int16_t si16; + uint16_t ui16; + int16_t _pad2; + int32_t si32; + uint32_t ui32; + int64_t si64; + float64_t f64; + float32_t f32; + int32_t _pad3; + } data_t; + data_t data; + */ + +data: +.size 4 +minus_thirty_six: // nF32 +.size 4 +minus_thirty_two: // nF64 +.size 8 +minus_twenty_four: // nSI64 +.size 8 +minus_sixteen: // nUI32 +.size 4 +minus_twelve: // nSI32 +.size 4 +.size 2 // pad +minus_six: // nUI16 +.size 2 +minus_four: // nSI16 +.size 2 +minus_two: // nUI8 +.size 1 +minus_one: +.size 1 // nSI8 +zero: // SI8 +.size 1 +one: // UI8 +.size 1 +two: // SI16 +.size 2 +four: // UI16 +.size 2 +.size 2 // pad +eight: // SI32 +.size 4 +twelve: // UI32 +.size 4 +sixteen: // SI64 +.size 8 +twenty_four: // F64 +.size 8 +thirty_two: // F32 +.size 4 +thirty_six: +.align 8 +/* + data_t buffer; + */ +buffer: +.size 80 + +ok: +.c "ok" + +.code + jmpi main + +/* + void reset(void) { + memset(data, -1, sizeof(data)); + data.nf32 = nF32; + data.nf64 = nF64; + #if __WORDSIZE == 64 + data.nsi64 = nSI64; + data.nui32 = nUI32; + #endif + data.nsi32 = nSI32; + data.nui16 = nUI16; + data.nsi16 = nSI16; + data.nui8 = nUI8; + data.nsi8 = nSI8; + data.si8 = SI8; + data.ui8 = UI8; + data.si16 = SI16; + data.ui16 = UI16; + data.si32 = SI32; + #if __WORDSIZE == 64 + data.ui32 = UI32; + data.si64 = SI64; + #endif + data.f64 = F64; + data.f32 = F32; + } + */ +reset: + prolog + movi %v0 data + prepare + pushargr %v0 + pushargi -1 + pushargi 80 + finishi @memset + addi %v0 %v0 4 + movi_f %f0 nF32 + str_f %v0 %f0 + addi %v0 %v0 $(nF64 - nF32) + movi_d %f0 nF64 + str_d %v0 %f0 + addi %v0 %v0 $(nSI64 - nF64) + movi %r0 nSI64 +#if __WORDSIZE == 64 + str_l %v0 %r0 +#endif + addi %v0 %v0 $(nUI32 - nSI64) + movi %r0 nUI32 +#if __WORDSIZE == 64 + str_i %v0 %r0 +#endif + addi %v0 %v0 $(nSI32 - nUI32) + movi %r0 nSI32 + str_i %v0 %r0 + addi %v0 %v0 $(nUI16 - nSI32) + movi %r0 nUI16 + str_s %v0 %r0 + addi %v0 %v0 $(nSI16 - nUI16) + movi %r0 nSI16 + str_s %v0 %r0 + addi %v0 %v0 $(nUI8 - nSI16) + movi %r0 nUI8 + str_c %v0 %r0 + addi %v0 %v0 $(nSI8 - nUI8) + movi %r0 nSI8 + str_c %v0 %r0 + addi %v0 %v0 $(SI8 - nSI8) + movi %r0 SI8 + str_c %v0 %r0 + addi %v0 %v0 $(UI8 - SI8) + movi %r0 UI8 + str_c %v0 %r0 + addi %v0 %v0 $(SI16 - UI8) + movi %r0 SI16 + str_s %v0 %r0 + addi %v0 %v0 $(UI16 - SI16) + movi %r0 UI16 + str_s %v0 %r0 + addi %v0 %v0 $(SI32 - UI16) + movi %r0 SI32 + str_i %v0 %r0 + addi %v0 %v0 $(UI32 - SI32) + movi %r0 UI32 +#if __WORDSIZE == 64 + str_i %v0 %r0 +#endif + addi %v0 %v0 $(SI64 - UI32) + movi %r0 SI64 +#if __WORDSIZE == 64 + str_l %v0 %r0 +#endif + addi %v0 %v0 $(F64 - SI64) + movi_d %f0 F64 + str_d %v0 %f0 + addi %v0 %v0 $(F32 - F64) + movi_f %f0 F32 + str_f %v0 %f0 + ret + epilog + +#if __WORDSIZE == 64 +# define IF32(expr) /**/ +# define IF64(expr) expr +#else +# define IF32(expr) expr +# define IF64(expr) /**/ +#endif + +/* + union { + int8_t *i8; + uint8_t *u8; + int16_t *i16; + uint16_t *u16; + int32_t *i32; + uint32_t *u32; + int64_t *i64; + float32_t *f32; + float64_t *f64; + } u; + reset(); + u.i8 = (char *)data + offsetof(data_t, si8); + if (*--u.i8 != nSI8) goto fail; + if (*--u.u8 != nUI8) goto fail; + if (*--u.i16 != nSI16) goto fail; + if (*--u.u16 != nUI16) goto fail; + --u.nsi16; + if (*--u.i32 != nSI32) goto fail; +#if __WORDSIZE == 64 + if (*--u.u32 != nUI32) goto fail; + if (*--u.i64 != nSI64) goto fail; +#else + u.i8 -= 12; +#endif + if (*--u.f64 != nF64) goto fail; + if (*--u.f32 != nF32) goto fail; + u.i8 = (char *)data + offsetof(data_t, si8); + if (*u.i8++ != SI8) goto fail; + if (*u.u8++ != UI8) goto fail; + if (*u.i16++ != SI16) goto fail; + if (*u.u16++ != UI16) goto fail; + ++u.i16; + if (*u.i32++ != SI32) goto fail; +#if __WORDSIZE == 64 + if (*u.u32++ != UI32) goto fail; + if (*u.i64++ != SI64) goto fail; +#else + u.i8 += 12; +#endif + if (*u.f64++ != F64) goto fail; + if (*u.f32++ != F32) goto fail; + goto done; +fail: + abort(); +done: + memset(buffer, -1, 80); + u.i8 = (char *)buffer + offsetof(data_t, si8); + *--u.i8 = nSI8; + *--u.u8 = nUI8; + *--u.i16 = nSI16; + *--u.u16 = nUI16; + --u.i16; + *--u.i32 = nSI32; +#if __WORDSIZE == 64 + *--u.u32 = nUI32; + *--u.i64 = nSI64; +#else + u.i8 -= 12; +#endif + *--u.f64 = nF64; + *--u.f32 = nF32; + u.i8 = (char *)buffer + offsetof(data_t, si8); + u.i8++ = SI8; + u.u8++ = UI8; + u.i16++ = SI16; + u.u16++ = UI16; + ++u.i16; + u.i32++ = SI32; +#if __WORDSIZE == 64 + u.u32++ = UI32; + u.i64++ = SI64; +#else + u.i8 += 12; +#endif + u.f64++ = F64; + u.f32++ = F32; + if (memcp(buffer, data, sizeof(data_t))) + abort(); + */ +#define TEST(R0, F0, R1) \ + calli reset \ + movi %R1 zero \ + ldxbi_c %R0 %R1 $(nSI8 - SI8) \ + bnei fail##R0##F0##R1 %R0 nSI8 \ + ldxbi_uc %R0 %R1 $(nUI8 - nSI8) \ + extr_c %R0 %R0 \ + bnei fail##R0##F0##R1 %R0 nUI8 \ + ldxbi_s %R0 %R1 $(nSI16 - nUI8) \ + bnei fail##R0##F0##R1 %R0 nSI16 \ + ldxbi_us %R0 %R1 $(nUI16 - nSI16) \ + extr_s %R0 %R0 \ + bnei fail##R0##F0##R1 %R0 nUI16 \ + ldxbi_i %R0 %R1 $(nSI32 - nUI16) \ + bnei fail##R0##F0##R1 %R0 nSI32 \ + IF64(ldxbi_ui %R0 %R1 $(nUI32 - nSI32)) \ + IF64(extr_i %R0 %R0) \ + IF64(bnei fail##R0##F0##R1 %R0 nUI32) \ + IF32(addi %R1 %R1 $(nUI32 - nSI32)) \ + IF64(ldxbi_l %R0 %R1 $(nSI64 - nUI32)) \ + IF64(bnei fail##R0##F0##R1 %R0 nSI64) \ + IF32(addi %R1 %R1 $(nSI64 - nUI32)) \ + ldxbi_d %F0 %R1 $(nF64 - nSI64) \ + bnei_d fail##R0##F0##R1 %F0 nF64 \ + ldxbi_f %F0 %R1 $(nF32 - nF64) \ + bnei_f fail##R0##F0##R1 %F0 nF32 \ + movi %R1 zero \ + ldxai_c %R0 %R1 $(UI8 - SI8) \ + bnei fail##R0##F0##R1 %R0 SI8 \ + ldxai_uc %R0 %R1 $(SI16 - UI8) \ + bnei fail##R0##F0##R1 %R0 UI8 \ + ldxai_s %R0 %R1 $(UI16 - SI16) \ + bnei fail##R0##F0##R1 %R0 SI16 \ + ldxai_us %R0 %R1 $(SI32 - UI16) \ + bnei fail##R0##F0##R1 %R0 UI16 \ + ldxai_i %R0 %R1 $(UI32 - SI32) \ + bnei fail##R0##F0##R1 %R0 SI32 \ + IF64(ldxai_ui %R0 %R1 $(SI64 - UI32)) \ + IF64(bnei fail##R0##F0##R1 %R0 UI32) \ + IF32(addi %R1 %R1 $(SI64 - UI32)) \ + IF64(ldxai_l %R0 %R1 $(F64 - SI64)) \ + IF64(bnei fail##R0##F0##R1 %R0 SI64) \ + IF32(addi %R1 %R1 $(F64 - SI64)) \ + ldxai_d %F0 %R1 $(F32 - F64) \ + bnei_d fail##R0##F0##R1 %F0 F64 \ + ldxai_f %F0 %R1 $(36 - F32) \ + bnei_f fail##R0##F0##R1 %F0 F32 \ + jmpi done##R0##F0##R1 \ +fail##R0##F0##R1: \ + calli @abort \ +done##R0##F0##R1: \ + prepare \ + pushargi buffer \ + pushargi -1 \ + pushargi 80 \ + finishi @memset \ + movi %R1 buffer \ + addi %R1 %R1 40 \ + movi %R0 nSI8 \ + stxbi_c $(nSI8 - SI8) %R1 %R0 \ + movi %R0 nUI8 \ + extr_uc %R0 %R0 \ + stxbi_c $(nUI8 - nSI8) %R1 %R0 \ + movi %R0 nSI16 \ + stxbi_s $(nSI16 - nUI8) %R1 %R0 \ + movi %R0 nUI16 \ + extr_us %R0 %R0 \ + stxbi_s $(nUI16 - nSI16) %R1 %R0 \ + movi %R0 nSI32 \ + stxbi_i $(nSI32 - nUI16) %R1 %R0 \ + IF64(movi %R0 nUI32) \ + IF64(stxbi_i $(nUI32 - nSI32) %R1 %R0) \ + IF32(addi %R1 %R1 $(nUI32 - nSI32)) \ + IF64(movi %R0 nSI64) \ + IF64(stxbi_l $(nSI64 - nUI32) %R1 %R0) \ + IF32(addi %R1 %R1 $(nSI64 - nUI32)) \ + movi_d %F0 nF64 \ + stxbi_d $(nF64 - nSI64) %R1 %F0 \ + movi_f %F0 nF32 \ + stxbi_f $(nF32 - nF64) %R1 %F0 \ + movi %R1 buffer \ + addi %R1 %R1 40 \ + movi %R0 SI8 \ + stxai_c $(UI8 - SI8) %R1 %R0 \ + movi %R0 UI8 \ + stxai_c $(SI16 - UI8) %R1 %R0 \ + movi %R0 SI16 \ + stxai_s $(UI16 - SI16) %R1 %R0 \ + movi %R0 UI16 \ + stxai_s $(SI32 - UI16) %R1 %R0 \ + movi %R0 SI32 \ + stxai_i $(UI32 - SI32) %R1 %R0 \ + IF64(movi %R0 UI32) \ + IF64(stxai_i $(SI64 - UI32) %R1 %R0) \ + IF32(addi %R1 %R1 $(SI64 - UI32)) \ + IF64(movi %R0 SI64) \ + IF64(stxai_l $(F64 - SI64) %R1 %R0) \ + IF32(addi %R1 %R1 $(F64 - SI64)) \ + movi_d %F0 F64 \ + stxai_d $(F32 - F64) %R1 %F0 \ + movi_f %F0 F32 \ + stxai_f $(36 - F32) %R1 %F0 \ + prepare \ + pushargi data \ + pushargi buffer \ + pushargi 80 \ + finishi @memcmp \ + retval %R0 \ + beqi done2##R0##F0##R1 %R0 0 \ + calli @abort \ +done2##R0##F0##R1: + +main: + prolog + TEST(r0, f0, r1) + TEST(r0, f0, r2) + TEST(r0, f0, v0) + TEST(r0, f0, v1) + TEST(r0, f0, v2) + TEST(r1, f1, r0) + TEST(r1, f1, r2) + TEST(r1, f1, v0) + TEST(r1, f1, v1) + TEST(r1, f1, v2) + TEST(r2, f2, r0) + TEST(r2, f2, r1) + TEST(r2, f2, v0) + TEST(r2, f2, v1) + TEST(r2, f2, v2) + TEST(v0, f3, r0) + TEST(v0, f3, r1) + TEST(v0, f3, r2) + TEST(v0, f3, v1) + TEST(v0, f3, v2) + TEST(v1, f4, r0) + TEST(v1, f4, r1) + TEST(v1, f4, r2) + TEST(v1, f4, v0) + TEST(v1, f4, v2) + TEST(v2, f5, r0) + TEST(v2, f5, r1) + TEST(v2, f5, r2) + TEST(v2, f5, v0) + TEST(v2, f5, v1) + prepare + pushargi ok + finishi @puts + ret + epilog diff --git a/deps/lightning/check/ldstxbar.ok b/deps/lightning/check/ldstxbar.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/deps/lightning/check/ldstxbar.ok @@ -0,0 +1 @@ +ok diff --git a/deps/lightning/check/ldstxbar.tst b/deps/lightning/check/ldstxbar.tst new file mode 100644 index 000000000..cd69bdf72 --- /dev/null +++ b/deps/lightning/check/ldstxbar.tst @@ -0,0 +1,480 @@ +.data 256 + +/* + #define offs(field) (offsetof(data_t, field) - offsetof(data_t, si8)) + */ +#define nF32 -36 // offs(nf32) +#define nF64 -32 // offs(nf64) +#define nSI64 -24 // offs(nsi64) +#define nUI32 -16 // offs(nui32) +#define nSI32 -12 // offs(nsi32) +#define nUI16 -6 // offs(nui16) +#define nSI16 -4 // offs(nsi16) +#define nUI8 -2 // offs(nui8) +#define nSI8 -1 // offs(nsi8) +#define SI8 0 // offs(si8) +#define UI8 1 // offs(ui8) +#define SI16 2 // offs(si18) +#define UI16 4 // offs(ui16) +#define SI32 8 // offs(si32) +#define UI32 12 // offs(ui32) +#define SI64 16 // offs(si64) +#define F64 24 // offs(f64) +#define F32 32 // offs(f32) + +/* + typedef struct { + int32_t _pad0; + float32_t nf32; + float64_t nf64; + int64_t nsi64; + uint32_t nui32; + int32_t nsi32; + short _pad1; + uint16_t nui16; + int16_6 nsi16; + uint8_t nui8; + int8_t nsi8; + int8_t si8; + uint8_t ui8; + int16_t si16; + uint16_t ui16; + int16_t _pad2; + int32_t si32; + uint32_t ui32; + int64_t si64; + float64_t f64; + float32_t f32; + int32_t _pad3; + } data_t; + data_t data; + */ + +data: +.size 4 +minus_thirty_six: // nF32 +.size 4 +minus_thirty_two: // nF64 +.size 8 +minus_twenty_four: // nSI64 +.size 8 +minus_sixteen: // nUI32 +.size 4 +minus_twelve: // nSI32 +.size 4 +.size 2 // pad +minus_six: // nUI16 +.size 2 +minus_four: // nSI16 +.size 2 +minus_two: // nUI8 +.size 1 +minus_one: +.size 1 // nSI8 +zero: // SI8 +.size 1 +one: // UI8 +.size 1 +two: // SI16 +.size 2 +four: // UI16 +.size 2 +.size 2 // pad +eight: // SI32 +.size 4 +twelve: // UI32 +.size 4 +sixteen: // SI64 +.size 8 +twenty_four: // F64 +.size 8 +thirty_two: // F32 +.size 4 +thirty_six: +.align 8 +/* + data_t buffer; + */ +buffer: +.size 80 + +ok: +.c "ok" + +.code + jmpi main + +/* + void reset(void) { + memset(data, -1, sizeof(data)); + data.nf32 = nF32; + data.nf64 = nF64; + #if __WORDSIZE == 64 + data.nsi64 = nSI64; + data.nui32 = nUI32; + #endif + data.nsi32 = nSI32; + data.nui16 = nUI16; + data.nsi16 = nSI16; + data.nui8 = nUI8; + data.nsi8 = nSI8; + data.si8 = SI8; + data.ui8 = UI8; + data.si16 = SI16; + data.ui16 = UI16; + data.si32 = SI32; + #if __WORDSIZE == 64 + data.ui32 = UI32; + data.si64 = SI64; + #endif + data.f64 = F64; + data.f32 = F32; + } + */ +reset: + prolog + movi %v0 data + prepare + pushargr %v0 + pushargi -1 + pushargi 80 + finishi @memset + addi %v0 %v0 4 + movi_f %f0 nF32 + str_f %v0 %f0 + addi %v0 %v0 $(nF64 - nF32) + movi_d %f0 nF64 + str_d %v0 %f0 + addi %v0 %v0 $(nSI64 - nF64) + movi %r0 nSI64 +#if __WORDSIZE == 64 + str_l %v0 %r0 +#endif + addi %v0 %v0 $(nUI32 - nSI64) + movi %r0 nUI32 +#if __WORDSIZE == 64 + str_i %v0 %r0 +#endif + addi %v0 %v0 $(nSI32 - nUI32) + movi %r0 nSI32 + str_i %v0 %r0 + addi %v0 %v0 $(nUI16 - nSI32) + movi %r0 nUI16 + str_s %v0 %r0 + addi %v0 %v0 $(nSI16 - nUI16) + movi %r0 nSI16 + str_s %v0 %r0 + addi %v0 %v0 $(nUI8 - nSI16) + movi %r0 nUI8 + str_c %v0 %r0 + addi %v0 %v0 $(nSI8 - nUI8) + movi %r0 nSI8 + str_c %v0 %r0 + addi %v0 %v0 $(SI8 - nSI8) + movi %r0 SI8 + str_c %v0 %r0 + addi %v0 %v0 $(UI8 - SI8) + movi %r0 UI8 + str_c %v0 %r0 + addi %v0 %v0 $(SI16 - UI8) + movi %r0 SI16 + str_s %v0 %r0 + addi %v0 %v0 $(UI16 - SI16) + movi %r0 UI16 + str_s %v0 %r0 + addi %v0 %v0 $(SI32 - UI16) + movi %r0 SI32 + str_i %v0 %r0 + addi %v0 %v0 $(UI32 - SI32) + movi %r0 UI32 +#if __WORDSIZE == 64 + str_i %v0 %r0 +#endif + addi %v0 %v0 $(SI64 - UI32) + movi %r0 SI64 +#if __WORDSIZE == 64 + str_l %v0 %r0 +#endif + addi %v0 %v0 $(F64 - SI64) + movi_d %f0 F64 + str_d %v0 %f0 + addi %v0 %v0 $(F32 - F64) + movi_f %f0 F32 + str_f %v0 %f0 + ret + epilog + +#if __WORDSIZE == 64 +# define IF32(expr) /**/ +# define IF64(expr) expr +#else +# define IF32(expr) expr +# define IF64(expr) /**/ +#endif + +/* + union { + int8_t *i8; + uint8_t *u8; + int16_t *i16; + uint16_t *u16; + int32_t *i32; + uint32_t *u32; + int64_t *i64; + float32_t *f32; + float64_t *f64; + } u; + reset(); + u.i8 = (char *)data + offsetof(data_t, si8); + if (*--u.i8 != nSI8) goto fail; + if (*--u.u8 != nUI8) goto fail; + if (*--u.i16 != nSI16) goto fail; + if (*--u.u16 != nUI16) goto fail; + --u.nsi16; + if (*--u.i32 != nSI32) goto fail; +#if __WORDSIZE == 64 + if (*--u.u32 != nUI32) goto fail; + if (*--u.i64 != nSI64) goto fail; +#else + u.i8 -= 12; +#endif + if (*--u.f64 != nF64) goto fail; + if (*--u.f32 != nF32) goto fail; + u.i8 = (char *)data + offsetof(data_t, si8); + if (*u.i8++ != SI8) goto fail; + if (*u.u8++ != UI8) goto fail; + if (*u.i16++ != SI16) goto fail; + if (*u.u16++ != UI16) goto fail; + ++u.i16; + if (*u.i32++ != SI32) goto fail; +#if __WORDSIZE == 64 + if (*u.u32++ != UI32) goto fail; + if (*u.i64++ != SI64) goto fail; +#else + u.i8 += 12; +#endif + if (*u.f64++ != F64) goto fail; + if (*u.f32++ != F32) goto fail; + goto done; +fail: + abort(); +done: + memset(buffer, -1, 80); + u.i8 = (char *)buffer + offsetof(data_t, si8); + *--u.i8 = nSI8; + *--u.u8 = nUI8; + *--u.i16 = nSI16; + *--u.u16 = nUI16; + --u.i16; + *--u.i32 = nSI32; +#if __WORDSIZE == 64 + *--u.u32 = nUI32; + *--u.i64 = nSI64; +#else + u.i8 -= 12; +#endif + *--u.f64 = nF64; + *--u.f32 = nF32; + u.i8 = (char *)buffer + offsetof(data_t, si8); + u.i8++ = SI8; + u.u8++ = UI8; + u.i16++ = SI16; + u.u16++ = UI16; + ++u.i16; + u.i32++ = SI32; +#if __WORDSIZE == 64 + u.u32++ = UI32; + u.i64++ = SI64; +#else + u.i8 += 12; +#endif + u.f64++ = F64; + u.f32++ = F32; + if (memcp(buffer, data, sizeof(data_t))) + abort(); + */ +#define TEST(R0, F0, R1, R2) \ + calli reset \ + movi %R1 zero \ + movi %R2 $(nSI8 - SI8) \ + ldxbr_c %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 nSI8 \ + movi %R2 $(nUI8 - nSI8) \ + ldxbr_uc %R0 %R1 %R2 \ + extr_c %R0 %R0 \ + bnei fail##R0##F0##R1 %R0 nUI8 \ + movi %R2 $(nSI16 - nUI8) \ + ldxbr_s %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 nSI16 \ + movi %R2 $(nUI16 - nSI16) \ + ldxbr_us %R0 %R1 %R2 \ + extr_s %R0 %R0 \ + bnei fail##R0##F0##R1 %R0 nUI16 \ + movi %R2 $(nSI32 - nUI16) \ + ldxbr_i %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 nSI32 \ + IF64(movi %R2 $(nUI32 - nSI32)) \ + IF64(ldxbr_ui %R0 %R1 %R2) \ + IF64(extr_i %R0 %R0) \ + IF64(bnei fail##R0##F0##R1 %R0 nUI32) \ + IF32(addi %R1 %R1 $(nUI32 - nSI32)) \ + IF64(movi %R2 $(nSI64 - nUI32)) \ + IF64(ldxbr_l %R0 %R1 %R2) \ + IF64(bnei fail##R0##F0##R1 %R0 nSI64) \ + IF32(addi %R1 %R1 $(nSI64 - nUI32)) \ + movi %R2 $(nF64 - nSI64) \ + ldxbr_d %F0 %R1 %R2 \ + bnei_d fail##R0##F0##R1 %F0 nF64 \ + movi %R2 $(nF32 - nF64) \ + ldxbr_f %F0 %R1 %R2 \ + bnei_f fail##R0##F0##R1 %F0 nF32 \ + movi %R1 zero \ + movi %R2 $(UI8 - SI8) \ + ldxar_c %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 SI8 \ + movi %R2 $(SI16 - UI8) \ + ldxar_uc %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 UI8 \ + movi %R2 $(UI16 - SI16) \ + ldxar_s %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 SI16 \ + movi %R2 $(SI32 - UI16) \ + ldxar_us %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 UI16 \ + movi %R2 $(UI32 - SI32) \ + ldxar_i %R0 %R1 %R2 \ + bnei fail##R0##F0##R1 %R0 SI32 \ + IF64(movi %R2 $(SI64 - UI32)) \ + IF64(ldxar_ui %R0 %R1 %R2) \ + IF64(bnei fail##R0##F0##R1 %R0 UI32) \ + IF32(addi %R1 %R1 $(SI64 - UI32)) \ + IF64(movi %R2 $(F64 - SI64)) \ + IF64(ldxar_l %R0 %R1 %R2) \ + IF64(bnei fail##R0##F0##R1 %R0 SI64) \ + IF32(addi %R1 %R1 $(F64 - SI64)) \ + movi %R2 $(F32 - F64) \ + ldxar_d %F0 %R1 %R2 \ + bnei_d fail##R0##F0##R1 %F0 F64 \ + movi %R2 $(36 - F32) \ + ldxar_f %F0 %R1 %R2 \ + bnei_f fail##R0##F0##R1 %F0 F32 \ + jmpi done##R0##F0##R1 \ +fail##R0##F0##R1: \ + calli @abort \ +done##R0##F0##R1: \ + prepare \ + pushargi buffer \ + pushargi -1 \ + pushargi 80 \ + finishi @memset \ + movi %R1 buffer \ + addi %R1 %R1 40 \ + movi %R0 nSI8 \ + movi %R2 $(nSI8 - SI8) \ + stxbr_c %R2 %R1 %R0 \ + movi %R0 nUI8 \ + extr_uc %R0 %R0 \ + movi %R2 $(nUI8 - nSI8) \ + stxbr_c %R2 %R1 %R0 \ + movi %R0 nSI16 \ + movi %R2 $(nSI16 - nUI8) \ + stxbr_s %R2 %R1 %R0 \ + movi %R0 nUI16 \ + extr_us %R0 %R0 \ + movi %R2 $(nUI16 - nSI16) \ + stxbr_s %R2 %R1 %R0 \ + movi %R0 nSI32 \ + movi %R2 $(nSI32 - nUI16) \ + stxbr_i %R2 %R1 %R0 \ + IF64(movi %R0 nUI32) \ + IF64(movi %R2 $(nUI32 - nSI32)) \ + IF64(stxbr_i %R2 %R1 %R0) \ + IF32(addi %R1 %R1 $(nUI32 - nSI32)) \ + IF64(movi %R0 nSI64) \ + IF64(movi %R2 $(nSI64 - nUI32)) \ + IF64(stxbr_l %R2 %R1 %R0) \ + IF32(addi %R1 %R1 $(nSI64 - nUI32)) \ + movi_d %F0 nF64 \ + movi %R2 $(nF64 - nSI64) \ + stxbr_d %R2 %R1 %F0 \ + movi_f %F0 nF32 \ + movi %R2 $(nF32 - nF64) \ + stxbr_f %R2 %R1 %F0 \ + movi %R1 buffer \ + addi %R1 %R1 40 \ + movi %R0 SI8 \ + movi %R2 $(UI8 - SI8) \ + stxar_c %R2 %R1 %R0 \ + movi %R0 UI8 \ + movi %R2 $(SI16 - UI8) \ + stxar_c %R2 %R1 %R0 \ + movi %R0 SI16 \ + movi %R2 $(UI16 - SI16) \ + stxar_s %R2 %R1 %R0 \ + movi %R0 UI16 \ + movi %R2 $(SI32 - UI16) \ + stxar_s %R2 %R1 %R0 \ + movi %R0 SI32 \ + movi %R2 $(UI32 - SI32) \ + stxar_i %R2 %R1 %R0 \ + IF64(movi %R0 UI32) \ + IF64(movi %R2 $(SI64 - UI32)) \ + IF64(stxar_i %R2 %R1 %R0) \ + IF32(addi %R1 %R1 $(SI64 - UI32)) \ + IF64(movi %R0 SI64) \ + IF64(movi %R2 $(F64 - SI64)) \ + IF64(stxar_l %R2 %R1 %R0) \ + IF32(addi %R1 %R1 $(F64 - SI64)) \ + movi_d %F0 F64 \ + movi %R2 $(F32 - F64) \ + stxar_d %R2 %R1 %F0 \ + movi_f %F0 F32 \ + movi %R2 $(36 - F32) \ + stxar_f %R2 %R1 %F0 \ + prepare \ + pushargi data \ + pushargi buffer \ + pushargi 80 \ + finishi @memcmp \ + retval %R0 \ + beqi done2##R0##F0##R1 %R0 0 \ + calli @abort \ +done2##R0##F0##R1: + +main: + prolog + TEST(r0, f0, r1, r2) + TEST(r0, f0, r2, v0) + TEST(r0, f0, v0, v1) + TEST(r0, f0, v1, v2) + TEST(r0, f0, v2, r1) + TEST(r1, f1, r0, r2) + TEST(r1, f1, r2, v0) + TEST(r1, f1, v0, v1) + TEST(r1, f1, v1, v2) + TEST(r1, f1, v2, r0) + TEST(r2, f2, r0, r1) + TEST(r2, f2, r1, v0) + TEST(r2, f2, v0, v1) + TEST(r2, f2, v1, v2) + TEST(r2, f2, v2, r0) + TEST(v0, f3, r0, r1) + TEST(v0, f3, r1, r2) + TEST(v0, f3, r2, v1) + TEST(v0, f3, v1, v2) + TEST(v0, f3, v2, r0) + TEST(v1, f4, r0, r1) + TEST(v1, f4, r1, r2) + TEST(v1, f4, r2, v0) + TEST(v1, f4, v0, v2) + TEST(v1, f4, v2, r0) + TEST(v2, f5, r0, r1) + TEST(v2, f5, r1, r2) + TEST(v2, f5, r2, v0) + TEST(v2, f5, v0, v1) + TEST(v2, f5, v1, r0) + prepare + pushargi ok + finishi @puts + ret + epilog diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c index 9bb5c5bf3..d485b5c01 100644 --- a/deps/lightning/check/lightning.c +++ b/deps/lightning/check/lightning.c @@ -398,7 +398,30 @@ static void ldxr_l(void); static void ldxi_l(void); static void ldxr(void); static void ldxi(void); static void unldr(void); static void unldi(void); static void unldr_u(void); static void unldi_u(void); -static void str_c(void); static void sti_c(void); +static void ldxbr_c(void); static void ldxbi_c(void); +static void ldxar_c(void); static void ldxai_c(void); +static void ldxbr_uc(void); static void ldxbi_uc(void); +static void ldxar_uc(void); static void ldxai_uc(void); +static void ldxbr_s(void); static void ldxbi_s(void); +static void ldxar_s(void); static void ldxai_s(void); +static void ldxbr_us(void); static void ldxbi_us(void); +static void ldxar_us(void); static void ldxai_us(void); +static void ldxbr_i(void); static void ldxbi_i(void); +static void ldxar_i(void); static void ldxai_i(void); +#if __WORDSIZE == 64 +static void ldxbr_ui(void); static void ldxbi_ui(void); +static void ldxar_ui(void); static void ldxai_ui(void); +static void ldxbr_l(void); static void ldxbi_l(void); +static void ldxar_l(void); static void ldxai_l(void); +#endif +static void ldxbr(void); static void ldxbi(void); +static void ldxar(void); static void ldxai(void); +static void ldxbr_f(void); static void ldxbi_f(void); +static void ldxar_f(void); static void ldxai_f(void); +static void ldxbr_d(void); static void ldxbi_d(void); +static void ldxar_d(void); static void ldxai_d(void); +static void str_c(void); +static void sti_c(void); static void str_s(void); static void sti_s(void); static void str_i(void); static void sti_i(void); #if __WORDSIZE == 64 @@ -413,6 +436,22 @@ static void stxr_l(void); static void stxi_l(void); #endif static void stxr(void); static void stxi(void); static void unstr(void); static void unsti(void); +static void stxbr_c(void); static void stxbi_c(void); +static void stxar_c(void); static void stxai_c(void); +static void stxbr_s(void); static void stxbi_s(void); +static void stxar_s(void); static void stxai_s(void); +static void stxbr_i(void); static void stxbi_i(void); +static void stxar_i(void); static void stxai_i(void); +#if __WORDSIZE == 64 +static void stxbr_l(void); static void stxbi_l(void); +static void stxar_l(void); static void stxai_l(void); +#endif +static void stxbr_f(void); static void stxbi_f(void); +static void stxar_f(void); static void stxai_f(void); +static void stxbr_d(void); static void stxbi_d(void); +static void stxar_d(void); static void stxai_d(void); +static void stxbr(void); static void stxbi(void); +static void stxar(void); static void stxai(void); static void bltr(void); static void blti(void); static void bltr_u(void); static void blti_u(void); static void bler(void); static void blei(void); @@ -802,6 +841,28 @@ static instr_t instr_vector[] = { entry(ldxr), entry(ldxi), entry(unldr), entry(unldi), entry(unldr_u), entry(unldi_u), + entry(ldxbr_c), entry(ldxbi_c), + entry(ldxar_c), entry(ldxai_c), + entry(ldxbr_uc), entry(ldxbi_uc), + entry(ldxar_uc), entry(ldxai_uc), + entry(ldxbr_s), entry(ldxbi_s), + entry(ldxar_s), entry(ldxai_s), + entry(ldxbr_us), entry(ldxbi_us), + entry(ldxar_us), entry(ldxai_us), + entry(ldxbr_i), entry(ldxbi_i), + entry(ldxar_i), entry(ldxai_i), +#if __WORDSIZE == 64 + entry(ldxbr_ui), entry(ldxbi_ui), + entry(ldxar_ui), entry(ldxai_ui), + entry(ldxbr_l), entry(ldxbi_l), + entry(ldxar_l), entry(ldxai_l), +#endif + entry(ldxbr_f), entry(ldxbi_f), + entry(ldxar_f), entry(ldxai_f), + entry(ldxbr_d), entry(ldxbi_d), + entry(ldxar_d), entry(ldxai_d), + entry(ldxbr), entry(ldxbi), + entry(ldxar), entry(ldxai), entry(str_c), entry(sti_c), entry(str_s), entry(sti_s), entry(str_i), entry(sti_i), @@ -817,6 +878,22 @@ static instr_t instr_vector[] = { #endif entry(stxr), entry(stxi), entry(unstr), entry(unsti), + entry(stxbr_c), entry(stxbi_c), + entry(stxar_c), entry(stxai_c), + entry(stxbr_s), entry(stxbi_s), + entry(stxar_s), entry(stxai_s), + entry(stxbr_i), entry(stxbi_i), + entry(stxar_i), entry(stxai_i), +#if __WORDSIZE == 64 + entry(stxbr_l), entry(stxbi_l), + entry(stxar_l), entry(stxai_l), +#endif + entry(stxbr_f), entry(stxbi_f), + entry(stxar_f), entry(stxai_f), + entry(stxbr_d), entry(stxbi_d), + entry(stxar_d), entry(stxai_d), + entry(stxbr), entry(stxbi), + entry(stxar), entry(stxai), entry(bltr), entry(blti), entry(bltr_u), entry(blti_u), entry(bler), entry(blei), @@ -1866,6 +1943,28 @@ entry_ir_ir_ir(ldxr_l) entry_ir_ir_im(ldxi_l) entry_ir_ir_ir(ldxr) entry_ir_ir_im(ldxi) entry_ir_ir_im(unldr) entry_ir_im_im(unldi) entry_ir_ir_im(unldr_u) entry_ir_im_im(unldi_u) +entry_ir_ir_ir(ldxbr_c) entry_ir_ir_im(ldxbi_c) +entry_ir_ir_ir(ldxar_c) entry_ir_ir_im(ldxai_c) +entry_ir_ir_ir(ldxbr_uc) entry_ir_ir_im(ldxbi_uc) +entry_ir_ir_ir(ldxar_uc) entry_ir_ir_im(ldxai_uc) +entry_ir_ir_ir(ldxbr_s) entry_ir_ir_im(ldxbi_s) +entry_ir_ir_ir(ldxar_s) entry_ir_ir_im(ldxai_s) +entry_ir_ir_ir(ldxbr_us) entry_ir_ir_im(ldxbi_us) +entry_ir_ir_ir(ldxar_us) entry_ir_ir_im(ldxai_us) +entry_ir_ir_ir(ldxbr_i) entry_ir_ir_im(ldxbi_i) +entry_ir_ir_ir(ldxar_i) entry_ir_ir_im(ldxai_i) +#if __WORDSIZE == 64 +entry_ir_ir_ir(ldxbr_ui) entry_ir_ir_im(ldxbi_ui) +entry_ir_ir_ir(ldxar_ui) entry_ir_ir_im(ldxai_ui) +entry_ir_ir_ir(ldxbr_l) entry_ir_ir_im(ldxbi_l) +entry_ir_ir_ir(ldxar_l) entry_ir_ir_im(ldxai_l) +#endif +entry_ir_ir_ir(ldxbr) entry_ir_ir_im(ldxbi) +entry_ir_ir_ir(ldxar) entry_ir_ir_im(ldxai) +entry_fr_ir_ir(ldxbr_f) entry_fr_ir_im(ldxbi_f) +entry_fr_ir_ir(ldxar_f) entry_fr_ir_im(ldxai_f) +entry_fr_ir_ir(ldxbr_d) entry_fr_ir_im(ldxbi_d) +entry_fr_ir_ir(ldxar_d) entry_fr_ir_im(ldxai_d) entry_ir_ir(str_c) entry_pm_ir(sti_c) entry_ir_ir(str_s) entry_pm_ir(sti_s) entry_ir_ir(str_i) entry_pm_ir(sti_i) @@ -1881,6 +1980,22 @@ entry_ir_ir_ir(stxr_l) entry_im_ir_ir(stxi_l) #endif entry_ir_ir_ir(stxr) entry_im_ir_ir(stxi) entry_ir_ir_im(unstr) entry_im_ir_im(unsti) +entry_ir_ir_ir(stxbr_c) entry_im_ir_ir(stxbi_c) +entry_ir_ir_ir(stxar_c) entry_im_ir_ir(stxai_c) +entry_ir_ir_ir(stxbr_s) entry_im_ir_ir(stxbi_s) +entry_ir_ir_ir(stxar_s) entry_im_ir_ir(stxai_s) +entry_ir_ir_ir(stxbr_i) entry_im_ir_ir(stxbi_i) +entry_ir_ir_ir(stxar_i) entry_im_ir_ir(stxai_i) +#if __WORDSIZE == 64 +entry_ir_ir_ir(stxbr_l) entry_im_ir_ir(stxbi_l) +entry_ir_ir_ir(stxar_l) entry_im_ir_ir(stxai_l) +#endif +entry_ir_ir_ir(stxbr) entry_im_ir_ir(stxbi) +entry_ir_ir_ir(stxar) entry_im_ir_ir(stxai) +entry_ir_ir_fr(stxbr_f) entry_im_ir_fr(stxbi_f) +entry_ir_ir_fr(stxar_f) entry_im_ir_fr(stxai_f) +entry_ir_ir_fr(stxbr_d) entry_im_ir_fr(stxbi_d) +entry_ir_ir_fr(stxar_d) entry_im_ir_fr(stxai_d) entry_lb_ir_ir(bltr) entry_lb_ir_im(blti) entry_lb_ir_ir(bltr_u) entry_lb_ir_im(blti_u) entry_lb_ir_ir(bler) entry_lb_ir_im(blei) @@ -4744,6 +4859,11 @@ main(int argc, char *argv[]) opt_short += snprintf(cmdline + opt_short, sizeof(cmdline) - opt_short, " -D__loongarch__=1"); +#endif +#if defined(__sh__) + opt_short += snprintf(cmdline + opt_short, + sizeof(cmdline) - opt_short, + " -D__sh__=1"); #endif if ((parser.fp = popen(cmdline, "r")) == NULL) error("cannot execute %s", cmdline); diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac index 76457b45e..31594ad87 100644 --- a/deps/lightning/configure.ac +++ b/deps/lightning/configure.ac @@ -15,7 +15,7 @@ dnl License for more details. dnl AC_PREREQ([2.71]) -AC_INIT([GNU lightning],[2.2.2],[pcpa@gnu.org],[lightning]) +AC_INIT([GNU lightning],[2.2.3],[pcpa@gnu.org],[lightning]) AC_CONFIG_AUX_DIR([build-aux]) AC_CANONICAL_TARGET AC_CONFIG_SRCDIR([Makefile.am]) @@ -222,6 +222,7 @@ case "$target_cpu" in alpha*) cpu=alpha ;; riscv*) cpu=riscv ;; loongarch*) cpu=loongarch ;; + sh*) cpu=sh ;; *) ;; esac AM_CONDITIONAL(cpu_arm, [test cpu-$cpu = cpu-arm]) @@ -236,6 +237,7 @@ AM_CONDITIONAL(cpu_s390, [test cpu-$cpu = cpu-s390]) AM_CONDITIONAL(cpu_alpha, [test cpu-$cpu = cpu-alpha]) AM_CONDITIONAL(cpu_riscv, [test cpu-$cpu = cpu-riscv]) AM_CONDITIONAL(cpu_loongarch, [test cpu-$cpu = cpu-loongarch]) +AM_CONDITIONAL(cpu_sh, [test cpu-$cpu = cpu-sh]) # Test x87 if both, x87 and sse2 available ac_cv_test_x86_x87= diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi index f71b77cba..59b3f4d95 100644 --- a/deps/lightning/doc/body.texi +++ b/deps/lightning/doc/body.texi @@ -554,6 +554,10 @@ ldr _c _uc _s _us _i _ui _l _f _d O1 = *O2 ldi _c _uc _s _us _i _ui _l _f _d O1 = *O2 ldxr _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) ldxi _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) +ldxbr _c _uc _s _us _i _ui _l _f _d O2 += O3, O1 = *O2 +ldxbi _c _uc _s _us _i _ui _l _f _d O2 += O3, O1 = *O2 +ldxar _c _uc _s _us _i _ui _l _f _d O1 = *O2, O2 += O3 +ldxai _c _uc _s _us _i _ui _l _f _d O1 = *O2, O2 += O3 @end example @item Store operations @@ -565,6 +569,10 @@ str _c _s _i _l _f _d *O1 = O2 sti _c _s _i _l _f _d *O1 = O2 stxr _c _s _i _l _f _d *(O1+O2) = O3 stxi _c _s _i _l _f _d *(O1+O2) = O3 +stxbr _c _s _i _l _f _d O2 += O1, *O2 = O3 +stxbi _c _s _i _l _f _d O2 += O1, *O2 = O3 +stxar _c _s _i _l _f _d *O2 = O3, O2 += O1 +stxai _c _s _i _l _f _d *O2 = O3, O2 += O1 @end example Note that the unsigned type modifier is not available, as the store only writes to the 1, 2, 4 or 8 sized memory address. diff --git a/deps/lightning/gnulib b/deps/lightning/gnulib new file mode 160000 index 000000000..e54b645fc --- /dev/null +++ b/deps/lightning/gnulib @@ -0,0 +1 @@ +Subproject commit e54b645fc6b8422562327443bda575c65d931fbd diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index 4ab4a0a97..8fb270a11 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -154,6 +154,8 @@ typedef jit_int32_t jit_fpr_t; # include #elif defined(__loongarch__) # include +#elif defined(__sh__) +# include #endif #define jit_flag_node 0x0001 /* patch node not absolute */ @@ -1211,6 +1213,119 @@ typedef enum { #define jit_hmuli_u(u,v,w) jit_new_node_www(jit_code_hmuli_u,u,v,w) jit_code_hmulr_u, jit_code_hmuli_u, +#define jit_ldxbr_c(u,v,w) jit_new_node_www(jit_code_ldxbr_c,u,v,w) +#define jit_ldxbi_c(u,v,w) jit_new_node_www(jit_code_ldxbi_c,u,v,w) + jit_code_ldxbr_c, jit_code_ldxbi_c, +#define jit_ldxar_c(u,v,w) jit_new_node_www(jit_code_ldxar_c,u,v,w) +#define jit_ldxai_c(u,v,w) jit_new_node_www(jit_code_ldxai_c,u,v,w) + jit_code_ldxar_c, jit_code_ldxai_c, +#define jit_ldxbr_uc(u,v,w) jit_new_node_www(jit_code_ldxbr_uc,u,v,w) +#define jit_ldxbi_uc(u,v,w) jit_new_node_www(jit_code_ldxbi_uc,u,v,w) + jit_code_ldxbr_uc, jit_code_ldxbi_uc, +#define jit_ldxar_uc(u,v,w) jit_new_node_www(jit_code_ldxar_uc,u,v,w) +#define jit_ldxai_uc(u,v,w) jit_new_node_www(jit_code_ldxai_uc,u,v,w) + jit_code_ldxar_uc, jit_code_ldxai_uc, +#define jit_ldxbr_s(u,v,w) jit_new_node_www(jit_code_ldxbr_s,u,v,w) +#define jit_ldxbi_s(u,v,w) jit_new_node_www(jit_code_ldxbi_s,u,v,w) + jit_code_ldxbr_s, jit_code_ldxbi_s, +#define jit_ldxar_s(u,v,w) jit_new_node_www(jit_code_ldxar_s,u,v,w) +#define jit_ldxai_s(u,v,w) jit_new_node_www(jit_code_ldxai_s,u,v,w) + jit_code_ldxar_s, jit_code_ldxai_s, +#define jit_ldxbr_us(u,v,w) jit_new_node_www(jit_code_ldxbr_us,u,v,w) +#define jit_ldxbi_us(u,v,w) jit_new_node_www(jit_code_ldxbi_us,u,v,w) + jit_code_ldxbr_us, jit_code_ldxbi_us, +#define jit_ldxar_us(u,v,w) jit_new_node_www(jit_code_ldxar_us,u,v,w) +#define jit_ldxai_us(u,v,w) jit_new_node_www(jit_code_ldxai_us,u,v,w) + jit_code_ldxar_us, jit_code_ldxai_us, +#define jit_ldxbr_i(u,v,w) jit_new_node_www(jit_code_ldxbr_i,u,v,w) +#define jit_ldxbi_i(u,v,w) jit_new_node_www(jit_code_ldxbi_i,u,v,w) + jit_code_ldxbr_i, jit_code_ldxbi_i, +#define jit_ldxar_i(u,v,w) jit_new_node_www(jit_code_ldxar_i,u,v,w) +#define jit_ldxai_i(u,v,w) jit_new_node_www(jit_code_ldxai_i,u,v,w) + jit_code_ldxar_i, jit_code_ldxai_i, +#if __WORDSIZE == 32 +# define jit_ldxbr(u,v,w) jit_ldxbr_i(u,v,w) +# define jit_ldxbi(u,v,w) jit_ldxbi_i(u,v,w) +# define jit_ldxar(u,v,w) jit_ldxar_i(u,v,w) +# define jit_ldxai(u,v,w) jit_ldxai_i(u,v,w) +#else +# define jit_ldxbr(u,v,w) jit_ldxbr_l(u,v,w) +# define jit_ldxbi(u,v,w) jit_ldxbi_l(u,v,w) +# define jit_ldxar(u,v,w) jit_ldxar_l(u,v,w) +# define jit_ldxai(u,v,w) jit_ldxai_l(u,v,w) +# define jit_ldxbr_ui(u,v,w) jit_new_node_www(jit_code_ldxbr_ui,u,v,w) +# define jit_ldxbi_ui(u,v,w) jit_new_node_www(jit_code_ldxbi_ui,u,v,w) +# define jit_ldxar_ui(u,v,w) jit_new_node_www(jit_code_ldxar_ui,u,v,w) +# define jit_ldxai_ui(u,v,w) jit_new_node_www(jit_code_ldxai_ui,u,v,w) +# define jit_ldxbr_l(u,v,w) jit_new_node_www(jit_code_ldxbr_l,u,v,w) +# define jit_ldxbi_l(u,v,w) jit_new_node_www(jit_code_ldxbi_l,u,v,w) +# define jit_ldxar_l(u,v,w) jit_new_node_www(jit_code_ldxar_l,u,v,w) +# define jit_ldxai_l(u,v,w) jit_new_node_www(jit_code_ldxai_l,u,v,w) +#endif + jit_code_ldxbr_ui, jit_code_ldxbi_ui, + jit_code_ldxar_ui, jit_code_ldxai_ui, + jit_code_ldxbr_l, jit_code_ldxbi_l, + jit_code_ldxar_l, jit_code_ldxai_l, +# define jit_ldxbr_f(u,v,w) jit_new_node_www(jit_code_ldxbr_f,u,v,w) +# define jit_ldxbi_f(u,v,w) jit_new_node_www(jit_code_ldxbi_f,u,v,w) +# define jit_ldxar_f(u,v,w) jit_new_node_www(jit_code_ldxar_f,u,v,w) +# define jit_ldxai_f(u,v,w) jit_new_node_www(jit_code_ldxai_f,u,v,w) + jit_code_ldxbr_f, jit_code_ldxbi_f, + jit_code_ldxar_f, jit_code_ldxai_f, +# define jit_ldxbr_d(u,v,w) jit_new_node_www(jit_code_ldxbr_d,u,v,w) +# define jit_ldxbi_d(u,v,w) jit_new_node_www(jit_code_ldxbi_d,u,v,w) +# define jit_ldxar_d(u,v,w) jit_new_node_www(jit_code_ldxar_d,u,v,w) +# define jit_ldxai_d(u,v,w) jit_new_node_www(jit_code_ldxai_d,u,v,w) + jit_code_ldxbr_d, jit_code_ldxbi_d, + jit_code_ldxar_d, jit_code_ldxai_d, +#define jit_stxbr_c(u,v,w) jit_new_node_www(jit_code_stxbr_c,u,v,w) +#define jit_stxbi_c(u,v,w) jit_new_node_www(jit_code_stxbi_c,u,v,w) +#define jit_stxar_c(u,v,w) jit_new_node_www(jit_code_stxar_c,u,v,w) +#define jit_stxai_c(u,v,w) jit_new_node_www(jit_code_stxai_c,u,v,w) + jit_code_stxbr_c, jit_code_stxbi_c, + jit_code_stxar_c, jit_code_stxai_c, +#define jit_stxbr_s(u,v,w) jit_new_node_www(jit_code_stxbr_s,u,v,w) +#define jit_stxbi_s(u,v,w) jit_new_node_www(jit_code_stxbi_s,u,v,w) +#define jit_stxar_s(u,v,w) jit_new_node_www(jit_code_stxar_s,u,v,w) +#define jit_stxai_s(u,v,w) jit_new_node_www(jit_code_stxai_s,u,v,w) + jit_code_stxbr_s, jit_code_stxbi_s, + jit_code_stxar_s, jit_code_stxai_s, +#define jit_stxbr_i(u,v,w) jit_new_node_www(jit_code_stxbr_i,u,v,w) +#define jit_stxbi_i(u,v,w) jit_new_node_www(jit_code_stxbi_i,u,v,w) +#define jit_stxar_i(u,v,w) jit_new_node_www(jit_code_stxar_i,u,v,w) +#define jit_stxai_i(u,v,w) jit_new_node_www(jit_code_stxai_i,u,v,w) + jit_code_stxbr_i, jit_code_stxbi_i, + jit_code_stxar_i, jit_code_stxai_i, +#if __WORDSIZE == 32 +# define jit_stxbr(u,v,w) jit_stxbr_i(u,v,w) +# define jit_stxbi(u,v,w) jit_stxbi_i(u,v,w) +# define jit_stxar(u,v,w) jit_stxar_i(u,v,w) +# define jit_stxai(u,v,w) jit_stxai_i(u,v,w) +#else +# define jit_stxbr(u,v,w) jit_stxbr_l(u,v,w) +# define jit_stxbi(u,v,w) jit_stxbi_l(u,v,w) +# define jit_stxar(u,v,w) jit_stxar_l(u,v,w) +# define jit_stxai(u,v,w) jit_stxai_l(u,v,w) +# define jit_stxbr_l(u,v,w) jit_new_node_www(jit_code_stxbr_l,u,v,w) +# define jit_stxbi_l(u,v,w) jit_new_node_www(jit_code_stxbi_l,u,v,w) +# define jit_stxar_l(u,v,w) jit_new_node_www(jit_code_stxar_l,u,v,w) +# define jit_stxai_l(u,v,w) jit_new_node_www(jit_code_stxai_l,u,v,w) +#endif + jit_code_stxbr_l, jit_code_stxbi_l, + jit_code_stxar_l, jit_code_stxai_l, +# define jit_stxbr_f(u,v,w) jit_new_node_www(jit_code_stxbr_f,u,v,w) +# define jit_stxbi_f(u,v,w) jit_new_node_www(jit_code_stxbi_f,u,v,w) +# define jit_stxar_f(u,v,w) jit_new_node_www(jit_code_stxar_f,u,v,w) +# define jit_stxai_f(u,v,w) jit_new_node_www(jit_code_stxai_f,u,v,w) + jit_code_stxbr_f, jit_code_stxbi_f, + jit_code_stxar_f, jit_code_stxai_f, +# define jit_stxbr_d(u,v,w) jit_new_node_www(jit_code_stxbr_d,u,v,w) +# define jit_stxbi_d(u,v,w) jit_new_node_www(jit_code_stxbi_d,u,v,w) +# define jit_stxar_d(u,v,w) jit_new_node_www(jit_code_stxar_d,u,v,w) +# define jit_stxai_d(u,v,w) jit_new_node_www(jit_code_stxai_d,u,v,w) + jit_code_stxbr_d, jit_code_stxbi_d, + jit_code_stxar_d, jit_code_stxai_d, + jit_code_last_code } jit_code_t; diff --git a/deps/lightning/include/lightning/Makefile.am b/deps/lightning/include/lightning/Makefile.am index e21bbaa99..6d3944a60 100644 --- a/deps/lightning/include/lightning/Makefile.am +++ b/deps/lightning/include/lightning/Makefile.am @@ -67,3 +67,7 @@ if cpu_loongarch lightning_include_HEADERS = \ jit_loongarch.h endif +if cpu_sh +lightning_include_HEADERS = \ + jit_sh.h +endif diff --git a/deps/lightning/include/lightning/jit_arm.h b/deps/lightning/include/lightning/jit_arm.h index 558f55353..b42ae0736 100644 --- a/deps/lightning/include/lightning/jit_arm.h +++ b/deps/lightning/include/lightning/jit_arm.h @@ -29,6 +29,7 @@ #define jit_swf_p() (jit_cpu.vfp == 0) #define jit_hardfp_p() jit_cpu.abi #define jit_ldrt_strt_p() jit_cpu.ldrt_strt +#define jit_post_index_p() jit_cpu.post_index #define JIT_FP _R11 typedef enum { @@ -125,6 +126,13 @@ typedef struct { * is in arm mode, or the reverse, what may cause a crash upon return * of that function if generating jit for a relative jump. */ + /* Apparently a qemu 8.1.3 and possibly others bug, that treat + * ldrT Rt, [Rn, #+-]! and ldrT Rt, [Rn], #+/- + * identically, as a pre-index but the second one should adjust + * Rn after the load. + * The syntax for only offseting is ldrT Rt{, [Rn, #+/-}] + */ + jit_uint32_t post_index : 1; jit_uint32_t exchange : 1; /* By default assume cannot load unaligned data. * A3.2.1 diff --git a/deps/lightning/include/lightning/jit_ppc.h b/deps/lightning/include/lightning/jit_ppc.h index 460c491ac..e9274b639 100644 --- a/deps/lightning/include/lightning/jit_ppc.h +++ b/deps/lightning/include/lightning/jit_ppc.h @@ -82,4 +82,13 @@ typedef enum { #define JIT_NOREG _NOREG } jit_reg_t; +typedef struct { + jit_uint32_t popcntb : 1; +} jit_cpu_t; + +/* + * Initialization + */ +extern jit_cpu_t jit_cpu; + #endif /* _jit_ppc_h */ diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index a730d73e7..d350bab45 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -177,6 +177,13 @@ typedef jit_uint64_t jit_regset_t; # define JIT_RET _A0 # define JIT_FRET _FA0 typedef jit_uint64_t jit_regset_t; +#elif defined(__sh__) +# define JIT_RA0 _R4 +# define JIT_FA0 _XF4 +# define JIT_SP _R15 +# define JIT_RET _R0 +# define JIT_FRET _XF0 +typedef jit_uint32_t jit_regset_t; #endif #define jit_data(u,v,w) _jit_data(_jit,u,v,w) @@ -350,17 +357,19 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */ #define jit_cc_a1_reg 0x00000200 /* arg1 is a register */ #define jit_cc_a1_chg 0x00000400 /* arg1 is modified */ -#define jit_cc_a1_int 0x00000800 /* arg1 is immediate word */ -#define jit_cc_a1_flt 0x00001000 /* arg1 is immediate float */ -#define jit_cc_a1_dbl 0x00002000 /* arg1 is immediate double */ -#define jit_cc_a1_arg 0x00004000 /* arg1 is an argument node */ -#define jit_cc_a1_rlh 0x00008000 /* arg1 is a register pair */ -#define jit_cc_a2_reg 0x00010000 /* arg2 is a register */ -#define jit_cc_a2_chg 0x00020000 /* arg2 is modified */ -#define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */ -#define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */ -#define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */ -#define jit_cc_a2_rlh 0x00800000 /* arg2 is a register pair */ +#define jit_cc_a1_dep 0x00000800 /* arg1 is incremented + * cannot set jit_cc_a1_chg */ +#define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */ +#define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */ +#define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */ +#define jit_cc_a1_arg 0x00008000 /* arg1 is an argument node */ +#define jit_cc_a1_rlh 0x00010000 /* arg1 is a register pair */ +#define jit_cc_a2_reg 0x00020000 /* arg2 is a register */ +#define jit_cc_a2_chg 0x00040000 /* arg2 is modified */ +#define jit_cc_a2_int 0x00080000 /* arg2 is immediate word */ +#define jit_cc_a2_flt 0x00100000 /* arg2 is immediate float */ +#define jit_cc_a2_dbl 0x00200000 /* arg2 is immediate double */ +#define jit_cc_a2_rlh 0x00400000 /* arg2 is a register pair */ #if __ia64__ || (__sparc__ && __WORDSIZE == 64) extern void @@ -445,7 +454,7 @@ typedef struct jit_value jit_value_t; typedef struct jit_compiler jit_compiler_t; typedef struct jit_function jit_function_t; typedef struct jit_register jit_register_t; -#if __arm__ +#if __arm__ || __sh__ # if DISASSEMBLER typedef struct jit_data_info jit_data_info_t; # endif @@ -520,7 +529,7 @@ typedef struct { jit_node_t *node; } jit_patch_t; -#if __arm__ && DISASSEMBLER +#if (__arm__ || __sh__) && DISASSEMBLER struct jit_data_info { jit_uword_t code; /* pointer in code buffer */ jit_word_t length; /* length of constant vector */ @@ -746,6 +755,25 @@ struct jit_compiler { jit_word_t length; /* length of instrs/values vector */ } vector; } consts; +#elif defined(__sh__) +# if DISASSEMBLER + struct { + jit_data_info_t *ptr; + jit_word_t offset; + jit_word_t length; + } data_info; /* constant pools information */ +# endif + jit_bool_t mode_d; + jit_bool_t no_flag; + jit_bool_t uses_fpu; + struct { + jit_uint8_t *data; /* pointer to code */ + jit_word_t size; /* size data */ + jit_word_t offset; /* pending patches */ + jit_word_t length; /* number of pending constants */ + jit_int32_t values[1024]; /* pending constants */ + jit_word_t patches[2048]; + } consts; #endif #if GET_JIT_SIZE /* Temporary storage to calculate instructions length */ diff --git a/deps/lightning/include/lightning/jit_sh.h b/deps/lightning/include/lightning/jit_sh.h new file mode 100644 index 000000000..25ba582ee --- /dev/null +++ b/deps/lightning/include/lightning/jit_sh.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#ifndef _jit_sh_h +#define _jit_sh_h + +#define JIT_HASH_CONSTS 0 +#define JIT_NUM_OPERANDS 2 + +typedef enum { +#define jit_r(i) (JIT_R0 + (i)) +#define jit_r_num() 3 +#define jit_v(i) (JIT_V0 + (i)) +#define jit_v_num() 6 +#define jit_f(i) (JIT_F0 - (i) * 2) +#ifdef __SH_FPU_ANY__ +# define jit_f_num() 8 +#else +# define jit_f_num() 0 +#endif + _R0, + + /* caller-saved temporary registers */ +#define JIT_R0 _R1 +#define JIT_R1 _R2 +#define JIT_R2 _R3 + _R1, _R2, _R3, + + /* argument registers */ + _R4, _R5, _R6, _R7, + + /* callee-saved registers */ +#define JIT_V0 _R8 +#define JIT_V1 _R9 +#define JIT_V2 _R10 +#define JIT_V3 _R11 +#define JIT_V4 _R12 +#define JIT_V5 _R13 + _R8, _R9, _R10, _R11, _R12, _R13, + +#define JIT_FP _R14 + _R14, + _R15, + + _GBR, + + /* floating-point registers */ +#define JIT_F0 _F14 +#define JIT_F1 _F12 +#define JIT_F2 _F10 +#define JIT_F3 _F8 +#define JIT_F4 _F6 +#define JIT_F5 _F4 +#define JIT_F6 _F2 +#define JIT_F7 _F0 + _F0, _F1, _F2, _F3, _F4, _F5, _F6, _F7, + _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15, + + /* Banked floating-point registers */ + _XF0, _XF1, _XF2, _XF3, _XF4, _XF5, _XF6, _XF7, + _XF8, _XF9, _XF10, _XF11, _XF12, _XF13, _XF14, _XF15, + +#define JIT_NOREG _NOREG + _NOREG, +} jit_reg_t; + +#endif /* _jit_sh_h */ diff --git a/deps/lightning/lib/Makefile.am b/deps/lightning/lib/Makefile.am index 04b5f9249..23437096d 100644 --- a/deps/lightning/lib/Makefile.am +++ b/deps/lightning/lib/Makefile.am @@ -17,7 +17,7 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -D_GNU_SOURCE $(LIGHTNING_CFLAGS) liblightning_LTLIBRARIES = liblightning.la -liblightning_la_LDFLAGS = -version-info 2:1:0 +liblightning_la_LDFLAGS = -version-info 2:2:0 AM_CPPFLAGS = if get_jit_size @@ -83,6 +83,9 @@ EXTRA_DIST = \ jit_s390-cpu.c \ jit_s390-fpu.c \ jit_s390-sz.c \ + jit_sh.c \ + jit_sh-cpu.c \ + jit_sh-sz.c \ jit_sparc.c \ jit_sparc-cpu.c \ jit_sparc-fpu.c \ diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index b0bc26fcb..6a5805fa2 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -210,6 +210,8 @@ typedef union { jit_int32_t w; # undef ui } instr_t; +# define s9_p(d) ((d) >= -256 && (d) <= 255) +# define u12_p(d) ((d) >= 0 && (d) <= 4095) # define s26_p(d) ((d) >= -33554432 && (d) <= 33554431) # define ii(i) *_jit->pc.ui++ = i # define ldr(r0,r1) ldr_l(r0,r1) @@ -322,18 +324,40 @@ typedef union { # define A64_LDRWI 0xb9400000 # define A64_LDRSWI 0xb9800000 # define A64_STRB 0x38206800 +# define A64_STRB_B 0x38000c00 +# define A64_STRB_A 0x38000400 # define A64_LDRB 0x38606800 -# define A64_LDRSB 0x38e06800 +# define A64_LDRB_B 0x38400c00 +# define A64_LDRB_A 0x38400400 +# define A64_LDRSB 0x38a06800 +# define A64_LDRSB_B 0x38800c00 +# define A64_LDRSB_A 0x38800400 # define A64_STR 0xf8206800 +# define A64_STR_B 0xf8000c00 +# define A64_STR_A 0xf8000400 # define A64_LDR 0xf8606800 +# define A64_LDR_B 0xf8400c00 +# define A64_LDR_A 0xf8400400 # define A64_LDAXR 0xc85ffc00 # define A64_STLXR 0xc800fc00 # define A64_STRH 0x78206800 +# define A64_STRH_B 0x78000c00 +# define A64_STRH_A 0x78000400 # define A64_LDRH 0x78606800 +# define A64_LDRH_B 0x78400c00 +# define A64_LDRH_A 0x78400400 # define A64_LDRSH 0x78a06800 +# define A64_LDRSH_B 0x78800c00 +# define A64_LDRSH_A 0x78800400 # define A64_STRW 0xb8206800 +# define A64_STRW_B 0xb8000c00 +# define A64_STRW_A 0xb8000400 # define A64_LDRW 0xb8606800 +# define A64_LDRW_B 0xb8400c00 +# define A64_LDRW_A 0xb8400400 # define A64_LDRSW 0xb8a06800 +# define A64_LDRSW_B 0xb8800c00 +# define A64_LDRSW_A 0xb8800400 # define A64_STURB 0x38000000 # define A64_LDURB 0x38400000 # define A64_LDURSB 0x38800000 @@ -448,38 +472,60 @@ typedef union { # define LDRSB(Rt,Rn,Rm) oxxx(A64_LDRSB,Rt,Rn,Rm) # define LDRSBI(Rt,Rn,Imm12) oxxi(A64_LDRSBI,Rt,Rn,Imm12) # define LDURSB(Rt,Rn,Imm9) oxx9(A64_LDURSB,Rt,Rn,Imm9) +# define LDRSB_B(Rt,Rn,Imm9) oxxs9(A64_LDRSB_B,Rt,Rn,Imm9) +# define LDRSB_A(Rt,Rn,Imm9) oxxs9(A64_LDRSB_A,Rt,Rn,Imm9) # define LDRB(Rt,Rn,Rm) oxxx(A64_LDRB,Rt,Rn,Rm) # define LDRBI(Rt,Rn,Imm12) oxxi(A64_LDRBI,Rt,Rn,Imm12) # define LDURB(Rt,Rn,Imm9) oxx9(A64_LDURB,Rt,Rn,Imm9) +# define LDRB_B(Rt,Rn,Imm9) oxxs9(A64_LDRB_B,Rt,Rn,Imm9) +# define LDRB_A(Rt,Rn,Imm9) oxxs9(A64_LDRB_A,Rt,Rn,Imm9) # define LDRSH(Rt,Rn,Rm) oxxx(A64_LDRSH,Rt,Rn,Rm) # define LDRSHI(Rt,Rn,Imm12) oxxi(A64_LDRSHI,Rt,Rn,Imm12) # define LDURSH(Rt,Rn,Imm9) oxx9(A64_LDURSH,Rt,Rn,Imm9) +# define LDRSH_B(Rt,Rn,Imm9) oxxs9(A64_LDRSH_B,Rt,Rn,Imm9) +# define LDRSH_A(Rt,Rn,Imm9) oxxs9(A64_LDRSH_A,Rt,Rn,Imm9) # define LDRH(Rt,Rn,Rm) oxxx(A64_LDRH,Rt,Rn,Rm) # define LDRHI(Rt,Rn,Imm12) oxxi(A64_LDRHI,Rt,Rn,Imm12) # define LDURH(Rt,Rn,Imm9) oxx9(A64_LDURH,Rt,Rn,Imm9) +# define LDRH_B(Rt,Rn,Imm9) oxxs9(A64_LDRH_B,Rt,Rn,Imm9) +# define LDRH_A(Rt,Rn,Imm9) oxxs9(A64_LDRH_A,Rt,Rn,Imm9) # define LDRSW(Rt,Rn,Rm) oxxx(A64_LDRSW,Rt,Rn,Rm) # define LDRSWI(Rt,Rn,Imm12) oxxi(A64_LDRSWI,Rt,Rn,Imm12) # define LDURSW(Rt,Rn,Imm9) oxx9(A64_LDURSW,Rt,Rn,Imm9) +# define LDRSW_B(Rt,Rn,Imm9) oxxs9(A64_LDRSW_B,Rt,Rn,Imm9) +# define LDRSW_A(Rt,Rn,Imm9) oxxs9(A64_LDRSW_A,Rt,Rn,Imm9) # define LDRW(Rt,Rn,Rm) oxxx(A64_LDRW,Rt,Rn,Rm) # define LDRWI(Rt,Rn,Imm12) oxxi(A64_LDRWI,Rt,Rn,Imm12) # define LDURW(Rt,Rn,Imm9) oxx9(A64_LDURW,Rt,Rn,Imm9) +# define LDRW_B(Rt,Rn,Imm9) oxxs9(A64_LDRW_B,Rt,Rn,Imm9) +# define LDRW_A(Rt,Rn,Imm9) oxxs9(A64_LDRW_A,Rt,Rn,Imm9) # define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm) # define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12) # define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9) +# define LDR_B(Rt,Rn,Imm9) oxxs9(A64_LDR_B,Rt,Rn,Imm9) +# define LDR_A(Rt,Rn,Imm9) oxxs9(A64_LDR_A,Rt,Rn,Imm9) # define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn) # define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt) # define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm) # define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12) # define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9) +# define STRB_B(Rt,Rn,Imm9) oxxs9(A64_STRB_B,Rt,Rn,Imm9) +# define STRB_A(Rt,Rn,Imm9) oxxs9(A64_STRB_A,Rt,Rn,Imm9) # define STRH(Rt,Rn,Rm) oxxx(A64_STRH,Rt,Rn,Rm) # define STRHI(Rt,Rn,Imm12) oxxi(A64_STRHI,Rt,Rn,Imm12) # define STURH(Rt,Rn,Imm9) oxx9(A64_STURH,Rt,Rn,Imm9) +# define STRH_B(Rt,Rn,Imm9) oxxs9(A64_STRH_B,Rt,Rn,Imm9) +# define STRH_A(Rt,Rn,Imm9) oxxs9(A64_STRH_A,Rt,Rn,Imm9) # define STRW(Rt,Rn,Rm) oxxx(A64_STRW,Rt,Rn,Rm) # define STRWI(Rt,Rn,Imm12) oxxi(A64_STRWI,Rt,Rn,Imm12) # define STURW(Rt,Rn,Imm9) oxx9(A64_STURW,Rt,Rn,Imm9) +# define STRW_B(Rt,Rn,Imm9) oxxs9(A64_STRW_B,Rt,Rn,Imm9) +# define STRW_A(Rt,Rn,Imm9) oxxs9(A64_STRW_A,Rt,Rn,Imm9) # define STR(Rt,Rn,Rm) oxxx(A64_STR,Rt,Rn,Rm) # define STRI(Rt,Rn,Imm12) oxxi(A64_STRI,Rt,Rn,Imm12) # define STUR(Rt,Rn,Imm9) oxx9(A64_STUR,Rt,Rn,Imm9) +# define STR_B(Rt,Rn,Imm9) oxxs9(A64_STR_B,Rt,Rn,Imm9) +# define STR_A(Rt,Rn,Imm9) oxxs9(A64_STR_A,Rt,Rn,Imm9) # define LDPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7) # define STPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7) # define LDPI_PRE(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7) @@ -502,6 +548,8 @@ static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define oxx9(Op,Rd,Rn,Imm9) _oxx9(_jit,Op,Rd,Rn,Imm9) static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxs9(Op,Rd,Rn,Imm9) _oxxs9(_jit,Op,Rd,Rn,Imm9) +static void _oxxs9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define ox19(Op,Rd,Simm19) _ox19(_jit,Op,Rd,Simm19) static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define oc19(Op,Cc,Simm19) _oc19(_jit,Op,Cc,Simm19) @@ -658,48 +706,41 @@ static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldr_c(r0,r1) LDRSBI(r0,r1,0) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); -# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1) -static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldr_uc(r0,r1) LDRBI(r0, r1, 0) # define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0) static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t); # define ldr_s(r0,r1) LDRSHI(r0,r1,0) # define ldi_s(r0,i0) _ldi_s(_jit,r0,i0) static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t); -# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1) -static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldr_us(r0,r1) LDRHI(r0, r1, 0) # define ldi_us(r0,i0) _ldi_us(_jit,r0,i0) static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t); # define ldr_i(r0,r1) LDRSWI(r0,r1,0) # define ldi_i(r0,i0) _ldi_i(_jit,r0,i0) static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); -# define ldr_ui(r0,r1) _ldr_ui(_jit,r0,r1) -static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldr_ui(r0,r1) LDRWI(r0, r1, 0) # define ldi_ui(r0,i0) _ldi_ui(_jit,r0,i0) static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t); # define ldr_l(r0,r1) LDRI(r0,r1,0) static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t); # define ldi_l(r0,i0) _ldi_l(_jit,r0,i0) static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t); -# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2) -static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxr_c(r0,r1,r2) LDRSB(r0, r1, r2) # define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0) static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2) -static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxr_uc(r0,r1,r2) LDRB(r0, r1, r2) # define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0) static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldxr_s(r0,r1,r2) LDRSH(r0,r1,r2) # define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0) static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2) -static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxr_us(r0,r1,r2) LDRH(r0, r1, r2) # define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0) static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldxr_i(r0,r1,r2) LDRSW(r0,r1,r2) # define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define ldxr_ui(r0,r1,r2) _ldxr_ui(_jit,r0,r1,r2) -static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxr_ui(r0,r1,r2) LDRW(r0, r1, r2) # define ldxi_ui(r0,r1,i0) _ldxi_ui(_jit,r0,r1,i0) static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldxr_l(r0,r1,r2) LDR(r0,r1,r2) @@ -709,6 +750,48 @@ static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define unldi(r0, i0, i1) generic_unldi(r0, i0, i1) # define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0) # define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1) +# define ldxbr_c(r0, r1, r2) generic_ldxbr_c(r0, r1, r2) +# define ldxbi_c(r0, r1, i0) _ldxbi_c(_jit, r0, r1, i0) +static void _ldxbi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_uc(r0, r1, r2) generic_ldxbr_uc(r0, r1, r2) +# define ldxbi_uc(r0, r1, i0) _ldxbi_uc(_jit, r0, r1, i0) +static void _ldxbi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_s(r0, r1, r2) generic_ldxbr_s(r0, r1, r2) +# define ldxbi_s(r0, r1, i0) _ldxbi_s(_jit, r0, r1, i0) +static void _ldxbi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_us(r0, r1, r2) generic_ldxbr_us(r0, r1, r2) +# define ldxbi_us(r0, r1, i0) _ldxbi_us(_jit, r0, r1, i0) +static void _ldxbi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_i(r0, r1, r2) generic_ldxbr_i(r0, r1, r2) +# define ldxbi_i(r0, r1, i0) _ldxbi_i(_jit, r0, r1, i0) +static void _ldxbi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_ui(r0, r1, r2) generic_ldxbr_ui(r0, r1, r2) +# define ldxbi_ui(r0, r1, i0) _ldxbi_ui(_jit, r0, r1, i0) +static void _ldxbi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_l(r0, r1, r2) generic_ldxbr_l(r0, r1, r2) +# define ldxbi_l(r0, r1, i0) _ldxbi_l(_jit, r0, r1, i0) +static void _ldxbi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_c(r0, r1, r2) generic_ldxar_c(r0, r1, r2) +# define ldxai_c(r0, r1, i0) _ldxai_c(_jit, r0, r1, i0) +static void _ldxai_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_uc(r0, r1, r2) generic_ldxar_uc(r0, r1, r2) +# define ldxai_uc(r0, r1, i0) _ldxai_uc(_jit, r0, r1, i0) +static void _ldxai_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_s(r0, r1, r2) generic_ldxar_s(r0, r1, r2) +# define ldxai_s(r0, r1, i0) _ldxai_s(_jit, r0, r1, i0) +static void _ldxai_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_us(r0, r1, r2) generic_ldxar_us(r0, r1, r2) +# define ldxai_us(r0, r1, i0) _ldxai_us(_jit, r0, r1, i0) +static void _ldxai_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_i(r0, r1, r2) generic_ldxar_i(r0, r1, r2) +# define ldxai_i(r0, r1, i0) _ldxai_i(_jit, r0, r1, i0) +static void _ldxai_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_ui(r0, r1, r2) generic_ldxar_ui(r0, r1, r2) +# define ldxai_ui(r0, r1, i0) _ldxai_ui(_jit, r0, r1, i0) +static void _ldxai_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_l(r0, r1, r2) generic_ldxar_l(r0, r1, r2) +# define ldxai_l(r0, r1, i0) _ldxai_l(_jit, r0, r1, i0) +static void _ldxai_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define str_c(r0,r1) STRBI(r1,r0,0) # define sti_c(i0,r0) _sti_c(_jit,i0,r0) static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); @@ -735,6 +818,30 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define unstr(r0, r1, i0) generic_unstr(r0, r1, i0) # define unsti(i0, r0, i1) generic_unsti(i0, r0, i1) +# define stxbr_c(r0,r1,r2) generic_stxbr_c(r0,r1,r2) +# define stxbi_c(i0,r0,r1) _stxbi_c(_jit,i0,r0,r1) +static void _stxbi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_s(r0,r1,r2) generic_stxbr_s(r0,r1,r2) +# define stxbi_s(i0,r0,r1) _stxbi_s(_jit,i0,r0,r1) +static void _stxbi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_i(r0,r1,r2) generic_stxbr_i(r0,r1,r2) +# define stxbi_i(i0,r0,r1) _stxbi_i(_jit,i0,r0,r1) +static void _stxbi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_l(r0,r1,r2) generic_stxbr_l(r0,r1,r2) +# define stxbi_l(i0,r0,r1) _stxbi_l(_jit,i0,r0,r1) +static void _stxbi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_c(r0,r1,r2) generic_stxar_c(r0,r1,r2) +# define stxai_c(i0,r0,r1) _stxai_c(_jit,i0,r0,r1) +static void _stxai_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_s(r0,r1,r2) generic_stxar_s(r0,r1,r2) +# define stxai_s(i0,r0,r1) _stxai_s(_jit,i0,r0,r1) +static void _stxai_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_i(r0,r1,r2) generic_stxar_i(r0,r1,r2) +# define stxai_i(i0,r0,r1) _stxai_i(_jit,i0,r0,r1) +static void _stxai_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_l(r0,r1,r2) generic_stxar_l(r0,r1,r2) +# define stxai_l(i0,r0,r1) _stxai_l(_jit,i0,r0,r1) +static void _stxai_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -937,6 +1044,22 @@ _oxx9(jit_state_t *_jit, jit_int32_t Op, ii(i.w); } +static void +_oxxs9(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(s9_p(Imm9)); + assert(!(Op & ~0xffe00c00)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.imm9.b = Imm9; + ii(i.w); +} + static void _ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19) { @@ -1837,15 +1960,6 @@ _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_unget_reg(reg); } -static void -_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - LDRBI(r0, r1, 0); -#if 0 - extr_uc(r0, r0); -#endif -} - static void _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1866,15 +1980,6 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_unget_reg(reg); } -static void -_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - LDRHI(r0, r1, 0); -#if 0 - extr_us(r0, r0); -#endif -} - static void _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1895,15 +2000,6 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_unget_reg(reg); } -static void -_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - LDRWI(r0, r1, 0); -#if 0 - extr_ui(r0, r0); -#endif -} - static void _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1924,13 +2020,6 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_unget_reg(reg); } -static void -_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - LDRSB(r0, r1, r2); - extr_c(r0, r0); -} - static void _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1945,16 +2034,6 @@ _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) LDRSB(r0, r1, rn(reg)); jit_unget_reg(reg); } - extr_c(r0, r0); -} - -static void -_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - LDRB(r0, r1, r2); -#if 0 - extr_uc(r0, r0); -#endif } static void @@ -1971,9 +2050,6 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) ldr_uc(r0, rn(reg)); jit_unget_reg(reg); } -#if 0 - extr_uc(r0, r0); -#endif } static void @@ -1992,15 +2068,6 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -static void -_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - LDRH(r0, r1, r2); -#if 0 - extr_us(r0, r0); -#endif -} - static void _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2015,9 +2082,6 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) LDRH(r0, r1, rn(reg)); jit_unget_reg(reg); } -#if 0 - extr_us(r0, r0); -#endif } static void @@ -2036,15 +2100,6 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -static void -_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - LDRW(r0, r1, r2); -#if 0 - extr_ui(r0, r0); -#endif -} - static void _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2059,9 +2114,6 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) LDRW(r0, r1, rn(reg)); jit_unget_reg(reg); } -#if 0 - extr_ui(r0, r0); -#endif } static void @@ -2080,6 +2132,104 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSB_B(r0, r1, i0); + else generic_ldxbi_c(r0, r1, i0); +} + +static void +_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSB_A(r0, r1, i0); + else generic_ldxai_c(r0, r1, i0); +} + +static void +_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRB_B(r0, r1, i0); + else generic_ldxbi_uc(r0, r1, i0); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRB_A(r0, r1, i0); + else generic_ldxai_uc(r0, r1, i0); +} + +static void +_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSH_B(r0, r1, i0); + else generic_ldxbi_s(r0, r1, i0); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSH_A(r0, r1, i0); + else generic_ldxai_s(r0, r1, i0); +} + +static void +_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRH_B(r0, r1, i0); + else generic_ldxbi_us(r0, r1, i0); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRH_A(r0, r1, i0); + else generic_ldxai_us(r0, r1, i0); +} + +static void +_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSW_B(r0, r1, i0); + else generic_ldxbi_i(r0, r1, i0); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRSW_A(r0, r1, i0); + else generic_ldxai_i(r0, r1, i0); +} + +static void +_ldxbi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRW_B(r0, r1, i0); + else generic_ldxbi_ui(r0, r1, i0); +} + +static void +_ldxai_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDRW_A(r0, r1, i0); + else generic_ldxai_ui(r0, r1, i0); +} + +static void +_ldxbi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDR_B(r0, r1, i0); + else generic_ldxbi_l(r0, r1, i0); +} + +static void +_ldxai_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (s9_p(i0)) LDR_A(r0, r1, i0); + else generic_ldxai_l(r0, r1, i0); +} + static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -2184,6 +2334,62 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRB_B(r1, r0, i0); + else generic_stxbi_c(r0, r1, i0); +} + +static void +_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRB_A(r1, r0, i0); + else generic_stxai_c(r0, r1, i0); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRH_B(r1, r0, i0); + else generic_stxbi_s(r0, r1, i0); +} + +static void +_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRH_A(r1, r0, i0); + else generic_stxai_s(r0, r1, i0); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRW_B(r1, r0, i0); + else generic_stxbi_i(r0, r1, i0); +} + +static void +_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STRW_A(r1, r0, i0); + else generic_stxai_i(r0, r1, i0); +} + +static void +_stxbi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STR_B(r1, r0, i0); + else generic_stxbi_l(r0, r1, i0); +} + +static void +_stxai_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (s9_p(i0)) STR_A(r1, r0, i0); + else generic_stxai_l(r0, r1, i0); +} + static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) diff --git a/deps/lightning/lib/jit_aarch64-fpu.c b/deps/lightning/lib/jit_aarch64-fpu.c index 51f40ce36..9e8c9cad1 100644 --- a/deps/lightning/lib/jit_aarch64-fpu.c +++ b/deps/lightning/lib/jit_aarch64-fpu.c @@ -41,6 +41,61 @@ # define A64_FDIV 0x1e201800 # define A64_FADD 0x1e202800 # define A64_FSUB 0x1e203800 +# define A64_FLDSTR 0x3c206800 +# define A64_FLDSTU 0x3d000000 +# define A64_FLDST 0x3c000000 +# define A64_FLDST_A 0x3c000400 +# define A64_FLDST_B 0x3c000c00 +# define FLDRB(Rt,Rn,Rm) vldstr(0,A64_FLDSTR,1,Rm,Rn,Rt) +# define FLDRH(Rt,Rn,Rm) vldstr(1,A64_FLDSTR,1,Rm,Rn,Rt) +# define FLDRS(Rt,Rn,Rm) vldstr(2,A64_FLDSTR,1,Rm,Rn,Rt) +# define FLDRD(Rt,Rn,Rm) vldstr(3,A64_FLDSTR,1,Rm,Rn,Rt) +# define FLDRQ(Rt,Rn,Rm) vldstr(0,A64_FLDSTR,3,Rm,Rn,Rt) +# define FSTRB(Rt,Rn,Rm) vldstr(0,A64_FLDSTR,0,Rm,Rn,Rt) +# define FSTRH(Rt,Rn,Rm) vldstr(1,A64_FLDSTR,0,Rm,Rn,Rt) +# define FSTRS(Rt,Rn,Rm) vldstr(2,A64_FLDSTR,0,Rm,Rn,Rt) +# define FSTRD(Rt,Rn,Rm) vldstr(3,A64_FLDSTR,0,Rm,Rn,Rt) +# define FSTRQ(Rt,Rn,Rm) vldstr(0,A64_FLDSTR,2,Rm,Rn,Rt) +# define FLDRBI(Rt,Rn,Imm9) vldst(0,A64_FLDST,1,Imm9,Rn,Rt) +# define FLDRHI(Rt,Rn,Imm9) vldst(1,A64_FLDST,1,Imm9,Rn,Rt) +# define FLDRSI(Rt,Rn,Imm9) vldst(2,A64_FLDST,1,Imm9,Rn,Rt) +# define FLDRDI(Rt,Rn,Imm9) vldst(3,A64_FLDST,1,Imm9,Rn,Rt) +# define FLDRQI(Rt,Rn,Imm9) vldst(0,A64_FLDST,3,Imm9,Rn,Rt) +# define FLDRB_B(Rt,Rn,Imm9) vldst(0,A64_FLDST_B,1,Imm9,Rn,Rt) +# define FLDRH_B(Rt,Rn,Imm9) vldst(1,A64_FLDST_B,1,Imm9,Rn,Rt) +# define FLDRS_B(Rt,Rn,Imm9) vldst(2,A64_FLDST_B,1,Imm9,Rn,Rt) +# define FLDRD_B(Rt,Rn,Imm9) vldst(3,A64_FLDST_B,1,Imm9,Rn,Rt) +# define FLDRQ_B(Rt,Rn,Imm9) vldst(0,A64_FLDST_B,3,Imm9,Rn,Rt) +# define FLDRB_A(Rt,Rn,Imm9) vldst(0,A64_FLDST_A,1,Imm9,Rn,Rt) +# define FLDRH_A(Rt,Rn,Imm9) vldst(1,A64_FLDST_A,1,Imm9,Rn,Rt) +# define FLDRS_A(Rt,Rn,Imm9) vldst(2,A64_FLDST_A,1,Imm9,Rn,Rt) +# define FLDRD_A(Rt,Rn,Imm9) vldst(3,A64_FLDST_A,1,Imm9,Rn,Rt) +# define FLDRQ_A(Rt,Rn,Imm9) vldst(0,A64_FLDST_A,3,Imm9,Rn,Rt) +# define FSTRBI(Rt,Rn,Imm9) vldst(0,A64_FLDST,0,Imm9,Rn,Rt) +# define FSTRHI(Rt,Rn,Imm9) vldst(1,A64_FLDST,0,Imm9,Rn,Rt) +# define FSTRSI(Rt,Rn,Imm9) vldst(2,A64_FLDST,0,Imm9,Rn,Rt) +# define FSTRDI(Rt,Rn,Imm9) vldst(3,A64_FLDST,0,Imm9,Rn,Rt) +# define FSTRQI(Rt,Rn,Imm9) vldst(0,A64_FLDST,2,Imm9,Rn,Rt) +# define FSTRB_B(Rt,Rn,Imm9) vldst(0,A64_FLDST_B,0,Imm9,Rn,Rt) +# define FSTRH_B(Rt,Rn,Imm9) vldst(1,A64_FLDST_B,0,Imm9,Rn,Rt) +# define FSTRS_B(Rt,Rn,Imm9) vldst(2,A64_FLDST_B,0,Imm9,Rn,Rt) +# define FSTRD_B(Rt,Rn,Imm9) vldst(3,A64_FLDST_B,0,Imm9,Rn,Rt) +# define FSTRQ_B(Rt,Rn,Imm9) vldst(0,A64_FLDST_B,2,Imm9,Rn,Rt) +# define FSTRB_A(Rt,Rn,Imm9) vldst(0,A64_FLDST_A,0,Imm9,Rn,Rt) +# define FSTRH_A(Rt,Rn,Imm9) vldst(1,A64_FLDST_A,0,Imm9,Rn,Rt) +# define FSTRS_A(Rt,Rn,Imm9) vldst(2,A64_FLDST_A,0,Imm9,Rn,Rt) +# define FSTRD_A(Rt,Rn,Imm9) vldst(3,A64_FLDST_A,0,Imm9,Rn,Rt) +# define FSTRQ_A(Rt,Rn,Imm9) vldst(0,A64_FLDST_A,2,Imm9,Rn,Rt) +# define FLDRBU(Rt,Rn,Imm12) vldstu(0,A64_FLDSTU,1,Imm12,Rn,Rt) +# define FLDRHU(Rt,Rn,Imm12) vldstu(1,A64_FLDSTU,1,Imm12,Rn,Rt) +# define FLDRSU(Rt,Rn,Imm12) vldstu(2,A64_FLDSTU,1,Imm12,Rn,Rt) +# define FLDRDU(Rt,Rn,Imm12) vldstu(3,A64_FLDSTU,1,Imm12,Rn,Rt) +# define FLDRQU(Rt,Rn,Imm12) vldstu(0,A64_FLDSTU,3,Imm12,Rn,Rt) +# define FSTRBU(Rt,Rn,Imm12) vldstu(0,A64_FLDSTU,0,Imm12,Rn,Rt) +# define FSTRHU(Rt,Rn,Imm12) vldstu(1,A64_FLDSTU,0,Imm12,Rn,Rt) +# define FSTRSU(Rt,Rn,Imm12) vldstu(2,A64_FLDSTU,0,Imm12,Rn,Rt) +# define FSTRDU(Rt,Rn,Imm12) vldstu(3,A64_FLDSTU,0,Imm12,Rn,Rt) +# define FSTRQU(Rt,Rn,Imm12) vldstu(0,A64_FLDSTU,2,Imm12,Rn,Rt) # define CNT(Rd,Rn) vqo_vv(0,A64_CNT,Rn,Rd) # define ADDV(Rd,Rn) vqo_vv(0,A64_ADDV,Rn,Rd) # define FCMPES(Rn,Rm) os_vv(A64_FCMPE,0,Rn,Rm) @@ -101,6 +156,15 @@ static void _os_vv(jit_state_t*,jit_int32_t, # define vqo_vv(Q,Op,Rn,Rd) _vqo_vv(_jit,Q,Op,Rn,Rd) static void _vqo_vv(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t); +# define vldstr(size,Op,opc,Rm,Rn,Rt) _vldstr(_jit,size,Op,opc,Rm,Rn,Rt) +static void _vldstr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vldst(size,Op,opc,Imm9,Rn,Rt) _vldst(_jit,size,Op,opc,Imm9,Rn,Rt) +static void _vldst(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define vldstu(size,Op,opc,Imm12,Rn,Rt) _vldstu(_jit,size,Op,opc,Imm12,Rn,Rt) +static void _vldstu(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define popcntr(r0,r1) _popcntr(_jit,r0,r1); static void _popcntr(jit_state_t*,jit_int32_t,jit_int32_t); # define truncr_f_i(r0,r1) _truncr_f_i(_jit,r0,r1) @@ -140,6 +204,12 @@ static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_f(r0,r1,r2) generic_ldxbr_f(r0,r1,r2) +# define ldxbi_f(r0,r1,i0) _ldxbi_f(_jit,r0,r1,i0) +static void _ldxbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_f(r0,r1,r2) generic_ldxar_f(r0,r1,r2) +# define ldxai_f(r0,r1,i0) _ldxai_f(_jit,r0,r1,i0) +static void _ldxai_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define unldr_x(r0, r1, i0) generic_unldr_x(r0, r1, i0) # define unldi_x(r0, i0, i1) generic_unldi_x(r0, i0, i1) # define str_f(r0,r1) _str_f(_jit,r0,r1) @@ -150,6 +220,12 @@ static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_f(r0,r1,r2) generic_stxbr_f(r0,r1,r2) +# define stxbi_f(i0,r0,r1) _stxbi_f(_jit,i0,r0,r1) +static void _stxbi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_f(r0,r1,r2) generic_stxar_f(r0,r1,r2) +# define stxai_f(i0,r0,r1) _stxai_f(_jit,i0,r0,r1) +static void _stxai_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define unstr_x(r0, r1, i0) generic_unstr_x(r0, r1, i0) # define unsti_x(i0, r0, i1) generic_unsti_x(i0, r0, i1) # define movr_f(r0,r1) _movr_f(_jit,r0,r1) @@ -267,6 +343,12 @@ static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0) static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_d(r0,r1,r2) generic_ldxbr_d(r0,r1,r2) +# define ldxbi_d(r0,r1,i0) _ldxbi_d(_jit,r0,r1,i0) +static void _ldxbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_d(r0,r1,r2) generic_ldxar_d(r0,r1,r2) +# define ldxai_d(r0,r1,i0) _ldxai_d(_jit,r0,r1,i0) +static void _ldxai_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define str_d(r0,r1) _str_d(_jit,r0,r1) static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t); # define sti_d(i0,r0) _sti_d(_jit,i0,r0) @@ -275,6 +357,12 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_d(r0,r1,r2) generic_stxbr_d(r0,r1,r2) +# define stxbi_d(i0,r0,r1) _stxbi_d(_jit,i0,r0,r1) +static void _stxbi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_d(r0,r1,r2) generic_stxar_d(r0,r1,r2) +# define stxai_d(i0,r0,r1) _stxai_d(_jit,i0,r0,r1) +static void _stxai_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define movr_d(r0,r1) _movr_d(_jit,r0,r1) static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define movi_d(r0,i0) _movi_d(_jit,r0,i0) @@ -450,6 +538,69 @@ _vqo_vv(jit_state_t *_jit, jit_int32_t Q, ii(i.w); } +static void +_vldstr(jit_state_t *_jit, jit_int32_t ldst_size, + jit_int32_t Op, jit_int32_t opc, jit_int32_t Rm, + jit_int32_t Rn, jit_int32_t Rt) +{ + instr_t i; + assert(!(Rm & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Rt & ~0x1f)); + assert(!(opc & ~0x3)); + assert(!(ldst_size & ~0x3)); + assert(!(Op & ~0x3f20fc00)); + i.w = Op; + i.ldst_size.b = ldst_size; + i.opc.b = opc; + i.Rm.b = Rm; + i.Rn.b = Rn; + i.Rt.b = Rt; + ii(i.w); +} + +static void +_vldst(jit_state_t *_jit, jit_int32_t ldst_size, + jit_int32_t Op, jit_int32_t opc, jit_int32_t Imm9, + jit_int32_t Rn, jit_int32_t Rt) +{ + instr_t i; + assert(!(Rn & ~0x1f)); + assert(!(Rt & ~0x1f)); + assert(!(opc & ~0x3)); + assert(s9_p(Imm9)); + assert(!(ldst_size & ~0x3)); + assert(!(Op & ~0x3f200c00)); + i.w = Op; + i.ldst_size.b = ldst_size; + i.opc.b = opc; + i.imm9.b = Imm9; + i.Rn.b = Rn; + i.Rt.b = Rt; + ii(i.w); +} + +static void +_vldstu(jit_state_t *_jit, jit_int32_t ldst_size, + jit_int32_t Op, jit_int32_t opc, jit_int32_t Imm12, + jit_int32_t Rn, jit_int32_t Rt) +{ + instr_t i; + assert(!(Rn & ~0x1f)); + assert(!(Rt & ~0x1f)); + assert(!(opc & ~0x3)); + assert(u12_p(Imm12)); + assert(!(ldst_size & ~0x3)); + assert(!(Op & ~0x3f000000)); + i.w = Op; + i.ldst_size.b = ldst_size; + i.opc.b = opc; + i.imm12.b = Imm12; + i.Rn.b = Rn; + i.Rt.b = Rt; + ii(i.w); +} + #define fopi(name) \ static void \ _##name##i_f(jit_state_t *_jit, \ @@ -532,11 +683,15 @@ fopi(div) static void _ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if 1 + FLDRSU(r0, r1, 0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); ldr_i(rn(reg), r1); FMOVSW(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void @@ -544,39 +699,107 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); +#if 1 + movi(rn(reg), i0); + ldr_f(r0, rn(reg)); +#else ldi_i(rn(reg), i0); FMOVSW(r0, rn(reg)); +#endif jit_unget_reg(reg); } static void _ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +#if 1 + FLDRS(r0, r1, r2); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); ldxr_i(rn(reg), r1, r2); FMOVSW(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRSI(r0, r1, i0); + else if (u12_p(i0)) + FLDRSU(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_f(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else reg = jit_get_reg(jit_class_gpr); ldxi_i(rn(reg), r1, i0); FMOVSW(r0, rn(reg)); jit_unget_reg(reg); +#endif +} + +static void +_ldxbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRS_B(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_f(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + ldxbi_i(rn(reg), r1, i0); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +#endif +} + +static void +_ldxai_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRS_A(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxar_f(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + ldxai_i(rn(reg), r1, i0); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +#endif } static void _str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if 1 + FSTRSU(r1, r0, 0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); FMOVWS(rn(reg), r1); str_i(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void @@ -584,29 +807,93 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); +#if 1 + movi(rn(reg), i0); + str_f(rn(reg), r0); +#else FMOVWS(rn(reg), r0); sti_i(i0, rn(reg)); +#endif jit_unget_reg(reg); } static void _stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +#if 1 + FSTRS(r2, r1, r0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); FMOVWS(rn(reg), r2); stxr_i(r0, r1, rn(reg)); jit_unget_reg(reg); +#endif } static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRSI(r1, r0, i0); + else if (u12_p(i0)) + FSTRSU(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxr_f(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else reg = jit_get_reg(jit_class_gpr); FMOVWS(rn(reg), r1); stxi_i(i0, r0, rn(reg)); jit_unget_reg(reg); +#endif +} + +static void +_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRS_B(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_f(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r1); + stxbi_i(i0, r0, rn(reg)); + jit_unget_reg(reg); +#endif +} + +static void +_stxai_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRS_A(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxar_f(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r1); + stxai_i(i0, r0, rn(reg)); + jit_unget_reg(reg); +#endif } static void @@ -759,11 +1046,15 @@ dopi(div) static void _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if 1 + FLDRDU(r0, r1, 0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); ldr_l(rn(reg), r1); FMOVDX(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void @@ -771,39 +1062,107 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); +#if 1 + movi(rn(reg), i0); + ldr_d(r0, rn(reg)); +#else ldi_l(rn(reg), i0); FMOVDX(r0, rn(reg)); +#endif jit_unget_reg(reg); } static void _ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +#if 1 + FLDRD(r0, r1, r2); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); ldxr_l(rn(reg), r1, r2); FMOVDX(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRDI(r0, r1, i0); + else if (u12_p(i0)) + FLDRDU(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_d(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else reg = jit_get_reg(jit_class_gpr); ldxi_l(rn(reg), r1, i0); FMOVDX(r0, rn(reg)); jit_unget_reg(reg); +#endif +} + +static void +_ldxbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRD_B(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_d(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + ldxbi_l(rn(reg), r1, i0); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +#endif +} + +static void +_ldxai_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FLDRD_A(r0, r1, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxar_d(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + ldxai_l(rn(reg), r1, i0); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +#endif } static void _str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if 1 + FSTRDU(r1, r0, 0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); FMOVXD(rn(reg), r1); str_l(r0, rn(reg)); jit_unget_reg(reg); +#endif } static void @@ -811,29 +1170,93 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); +#if 1 + movi(rn(reg), i0); + str_d(rn(reg), r0); +#else FMOVXD(rn(reg), r0); sti_l(i0, rn(reg)); +#endif jit_unget_reg(reg); } static void _stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +#if 1 + FSTRD(r2, r1, r0); +#else jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); FMOVXD(rn(reg), r2); stxr_l(r0, r1, rn(reg)); jit_unget_reg(reg); +#endif } static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRDI(r1, r0, i0); + else if (u12_p(i0)) + FSTRDU(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxr_d(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else reg = jit_get_reg(jit_class_gpr); FMOVXD(rn(reg), r1); stxi_l(i0, r0, rn(reg)); jit_unget_reg(reg); +#endif +} + +static void +_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRD_B(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_d(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r1); + stxbi_l(i0, r0, rn(reg)); + jit_unget_reg(reg); +#endif +} + +static void +_stxai_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; +#if 1 + if (s9_p(i0)) + FSTRD_A(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxar_d(rn(reg), r0, r1); + jit_unget_reg(reg); + } +#else + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r1); + stxai_l(i0, r0, rn(reg)); + jit_unget_reg(reg); +#endif } static void diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index 435bbe95c..904770265 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -1,7 +1,7 @@ #if __WORDSIZE == 64 # if PACKED_STACK -#define JIT_INSTR_MAX 96 +#define JIT_INSTR_MAX 64 0, /* data */ 0, /* live */ 4, /* align */ @@ -11,7 +11,7 @@ 0, /* #name */ 0, /* #note */ 0, /* label */ - 96, /* prolog */ + 64, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -43,7 +43,7 @@ 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ - 12, /* va_arg_d */ + 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 20, /* addi */ @@ -117,7 +117,7 @@ 8, /* movnr */ 8, /* movzr */ 28, /* casr */ - 36, /* casi */ + 40, /* casi */ 4, /* extr_c */ 4, /* exti_c */ 4, /* extr_uc */ @@ -156,8 +156,8 @@ 16, /* ldi_ui */ 4, /* ldr_l */ 16, /* ldi_l */ - 8, /* ldxr_c */ - 20, /* ldxi_c */ + 4, /* ldxr_c */ + 16, /* ldxi_c */ 4, /* ldxr_uc */ 20, /* ldxi_uc */ 4, /* ldxr_s */ @@ -269,7 +269,7 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 96, /* epilog */ + 64, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ @@ -323,14 +323,14 @@ 4, /* extr_d_f */ 4, /* movr_f */ 8, /* movi_f */ - 8, /* ldr_f */ - 20, /* ldi_f */ - 8, /* ldxr_f */ - 24, /* ldxi_f */ - 8, /* str_f */ - 20, /* sti_f */ - 8, /* stxr_f */ - 24, /* stxi_f */ + 4, /* ldr_f */ + 16, /* ldi_f */ + 4, /* ldxr_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 16, /* sti_f */ + 4, /* stxr_f */ + 16, /* stxi_f */ 8, /* bltr_f */ 16, /* blti_f */ 8, /* bler_f */ @@ -416,21 +416,21 @@ 4, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 12, /* movi_d */ - 8, /* ldr_d */ - 20, /* ldi_d */ - 8, /* ldxr_d */ - 24, /* ldxi_d */ - 8, /* str_d */ - 20, /* sti_d */ - 8, /* stxr_d */ - 24, /* stxi_d */ + 16, /* movi_d */ + 4, /* ldr_d */ + 16, /* ldi_d */ + 4, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 16, /* sti_d */ + 4, /* stxr_d */ + 16, /* stxi_d */ 8, /* bltr_d */ 16, /* blti_d */ 8, /* bler_d */ 16, /* blei_d */ 8, /* beqr_d */ - 20, /* beqi_d */ + 24, /* beqi_d */ 8, /* bger_d */ 16, /* bgei_d */ 8, /* bgtr_d */ @@ -496,9 +496,9 @@ 8, /* qlshi */ 52, /* qlshr_u */ 8, /* qlshi_u */ - 52, /* qrshr */ + 44, /* qrshr */ 8, /* qrshi */ - 52, /* qrshr_u */ + 48, /* qrshr_u */ 8, /* qrshi_u */ 24, /* unldr */ 44, /* unldi */ @@ -506,10 +506,10 @@ 44, /* unldi_u */ 20, /* unstr */ 56, /* unsti */ - 8, /* unldr_x */ - 20, /* unldi_x */ - 8, /* unstr_x */ - 20, /* unsti_x */ + 4, /* unldr_x */ + 16, /* unldi_x */ + 4, /* unstr_x */ + 16, /* unsti_x */ 4, /* fmar_f */ 0, /* fmai_f */ 4, /* fmsr_f */ @@ -530,9 +530,69 @@ 16, /* hmuli */ 4, /* hmulr_u */ 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 4, /* ldxbi_c */ + 8, /* ldxar_c */ + 4, /* ldxai_c */ + 8, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 4, /* ldxai_uc */ + 8, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 4, /* ldxai_s */ + 8, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 4, /* ldxai_us */ + 8, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 4, /* ldxai_i */ + 8, /* ldxbr_ui */ + 4, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 4, /* ldxai_ui */ + 8, /* ldxbr_l */ + 4, /* ldxbi_l */ + 8, /* ldxar_l */ + 4, /* ldxai_l */ + 8, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 4, /* ldxai_f */ + 8, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 4, /* ldxai_d */ + 8, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 4, /* stxai_c */ + 8, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 4, /* stxai_s */ + 8, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 4, /* stxai_i */ + 8, /* stxbr_l */ + 4, /* stxbi_l */ + 8, /* stxar_l */ + 4, /* stxai_l */ + 8, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 4, /* stxai_f */ + 8, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 4, /* stxai_d */ # else /* PACKED_STACK */ -#define JIT_INSTR_MAX 120 +#define JIT_INSTR_MAX 84 0, /* data */ 0, /* live */ 12, /* align */ @@ -542,7 +602,7 @@ 0, /* #name */ 0, /* #note */ 0, /* label */ - 120, /* prolog */ + 84, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -574,7 +634,7 @@ 0, /* putargi_l */ 44, /* va_start */ 48, /* va_arg */ - 56, /* va_arg_d */ + 48, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 20, /* addi */ @@ -687,8 +747,8 @@ 16, /* ldi_ui */ 4, /* ldr_l */ 16, /* ldi_l */ - 8, /* ldxr_c */ - 20, /* ldxi_c */ + 4, /* ldxr_c */ + 16, /* ldxi_c */ 4, /* ldxr_uc */ 20, /* ldxi_uc */ 4, /* ldxr_s */ @@ -800,7 +860,7 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 96, /* epilog */ + 64, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ @@ -854,14 +914,14 @@ 4, /* extr_d_f */ 4, /* movr_f */ 8, /* movi_f */ - 8, /* ldr_f */ - 20, /* ldi_f */ - 8, /* ldxr_f */ - 24, /* ldxi_f */ - 8, /* str_f */ - 20, /* sti_f */ - 8, /* stxr_f */ - 24, /* stxi_f */ + 4, /* ldr_f */ + 16, /* ldi_f */ + 4, /* ldxr_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 16, /* sti_f */ + 4, /* stxr_f */ + 16, /* stxi_f */ 8, /* bltr_f */ 16, /* blti_f */ 8, /* bler_f */ @@ -947,21 +1007,21 @@ 4, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 12, /* movi_d */ - 8, /* ldr_d */ - 20, /* ldi_d */ - 8, /* ldxr_d */ - 24, /* ldxi_d */ - 8, /* str_d */ - 20, /* sti_d */ - 8, /* stxr_d */ - 24, /* stxi_d */ + 16, /* movi_d */ + 4, /* ldr_d */ + 16, /* ldi_d */ + 4, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 16, /* sti_d */ + 4, /* stxr_d */ + 16, /* stxi_d */ 8, /* bltr_d */ 16, /* blti_d */ 8, /* bler_d */ 16, /* blei_d */ 8, /* beqr_d */ - 20, /* beqi_d */ + 24, /* beqi_d */ 8, /* bger_d */ 16, /* bgei_d */ 8, /* bgtr_d */ @@ -1027,9 +1087,9 @@ 8, /* qlshi */ 52, /* qlshr_u */ 8, /* qlshi_u */ - 52, /* qrshr */ + 44, /* qrshr */ 8, /* qrshi */ - 52, /* qrshr_u */ + 48, /* qrshr_u */ 8, /* qrshi_u */ 24, /* unldr */ 44, /* unldi */ @@ -1037,10 +1097,10 @@ 44, /* unldi_u */ 20, /* unstr */ 56, /* unsti */ - 8, /* unldr_x */ - 20, /* unldi_x */ - 8, /* unstr_x */ - 20, /* unsti_x */ + 4, /* unldr_x */ + 16, /* unldi_x */ + 4, /* unstr_x */ + 16, /* unsti_x */ 4, /* fmar_f */ 0, /* fmai_f */ 4, /* fmsr_f */ @@ -1061,5 +1121,65 @@ 16, /* hmuli */ 4, /* hmulr_u */ 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 4, /* ldxbi_c */ + 8, /* ldxar_c */ + 4, /* ldxai_c */ + 8, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 4, /* ldxai_uc */ + 8, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 4, /* ldxai_s */ + 8, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 4, /* ldxai_us */ + 8, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 4, /* ldxai_i */ + 8, /* ldxbr_ui */ + 4, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 4, /* ldxai_ui */ + 8, /* ldxbr_l */ + 4, /* ldxbi_l */ + 8, /* ldxar_l */ + 4, /* ldxai_l */ + 8, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 4, /* ldxai_f */ + 8, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 4, /* ldxai_d */ + 8, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 4, /* stxai_c */ + 8, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 4, /* stxai_s */ + 8, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 4, /* stxai_i */ + 8, /* stxbr_l */ + 4, /* stxbi_l */ + 8, /* stxar_l */ + 4, /* stxai_l */ + 8, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 4, /* stxai_f */ + 8, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 4, /* stxai_d */ # endif #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index bc7880098..0c5a40c50 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1292,6 +1292,24 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1522,6 +1540,24 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1544,6 +1580,18 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); @@ -2119,6 +2167,10 @@ _emit_code(jit_state_t *_jit) #undef case_brr #undef case_wrr #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index fd39c0dd2..a5a886b86 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -494,7 +494,7 @@ 8, /* qlshi */ 40, /* qlshr_u */ 8, /* qlshi_u */ - 40, /* qrshr */ + 44, /* qrshr */ 8, /* qrshi */ 40, /* qrshr_u */ 8, /* qrshi_u */ @@ -524,8 +524,68 @@ 0, /* fnmai_d */ 20, /* fnmsr_d */ 0, /* fnmsi_d */ - 36, /* hmulr */ - 60, /* hmuli */ + 32, /* hmulr */ + 56, /* hmuli */ 4, /* hmulr_u */ - 28, /* hmuli_u */ + 28, /* hmuli_u */ + 12, /* ldxbr_c */ + 12, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 12, /* ldxbr_s */ + 12, /* ldxbi_s */ + 12, /* ldxar_s */ + 12, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 12, /* ldxbr_ui */ + 12, /* ldxbi_ui */ + 12, /* ldxar_ui */ + 12, /* ldxai_ui */ + 8, /* ldxbr_l */ + 8, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 8, /* stxbr_l */ + 8, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index 69bf397fa..a67421dc1 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -917,6 +917,26 @@ _emit_code(jit_state_t *_jit) rn(node->v.q.h), rn(node->w.w)); \ case jit_code_##name##i##type: \ break; +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1111,6 +1131,24 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1133,6 +1171,18 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); @@ -1720,6 +1770,10 @@ _emit_code(jit_state_t *_jit) #undef case_rrrr #undef case_rrf #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index 149db9abb..73004ce20 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -269,7 +269,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define ARM_BLI 0x0b000000 # define THUMB2_BLI 0xf000d000 /* ldr/str */ -# define ARM_P 0x00800000 /* positive offset */ +# define ARM_U 0x00800000 /* positive offset */ +# define ARM_P 0x01000000 /* index */ +# define ARM_W 0x00200000 /* writeback */ # define THUMB2_P 0x00000400 # define THUMB2_U 0x00000200 # define THUMB2_W 0x00000100 @@ -338,9 +340,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); /* ldm/stm */ # define ARM_M 0x08000000 # define ARM_M_L 0x00100000 /* load; store if not set */ -# define ARM_M_I 0x00800000 /* inc; dec if not set */ -# define ARM_M_B 0x01000000 /* before; after if not set */ -# define ARM_M_U 0x00200000 /* update Rn */ +# define ARM_M_U 0x00800000 /* inc; dec if not set */ +# define ARM_M_P 0x01000000 /* before; after if not set */ +# define ARM_M_W 0x00200000 /* update Rn */ # define THUMB2_LDM_W 0x00200000 # define THUMB2_LDM_P 0x00008000 # define THUMB2_LDM_M 0x00004000 @@ -717,81 +719,131 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int); # define CC_BLI(cc,im) cb(cc,ARM_BLI,im) # define BLI(im) CC_BLI(ARM_CC_AL,im) # define T2_BLI(im) tb(THUMB2_BLI,im) -# define CC_LDRSB(cc,rt,rn,rm) corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm) +# define CC_LDRSB(cc,rt,rn,rm) corrr(cc,ARM_LDRSB|ARM_U,rn,rt,rm) # define LDRSB(rt,rn,rm) CC_LDRSB(ARM_CC_AL,rt,rn,rm) +# define LDRSB_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRSB|ARM_P|ARM_U|ARM_W,rn,rt,rm) +# define LDRSB_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRSB|ARM_U|ARM_W,rn,rt,rm) # define T1_LDRSB(rt,rn,rm) is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRSB(rt,rn,rm) torxr(THUMB2_LDRSB,rn,rt,rm) # define CC_LDRSBN(cc,rt,rn,rm) corrr(cc,ARM_LDRSB,rn,rt,rm) # define LDRSBN(rt,rn,rm) CC_LDRSBN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRSBI(cc,rt,rn,im) corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im) +# define CC_LDRSBI(cc,rt,rn,im) corri8(cc,ARM_LDRSBI|ARM_U,rn,rt,im) # define LDRSBI(rt,rn,im) CC_LDRSBI(ARM_CC_AL,rt,rn,im) +# define LDRSBI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRSBI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_U|ARM_W,rn,rt,im) # define T2_LDRSBI(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im) # define T2_LDRSBWI(rt,rn,im) torri12(THUMB2_LDRSBWI,rn,rt,im) +# define T2_LDRSBI_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRSBI_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_LDRSBIN(cc,rt,rn,im) corri8(cc,ARM_LDRSBI,rn,rt,im) # define LDRSBIN(rt,rn,im) CC_LDRSBIN(ARM_CC_AL,rt,rn,im) +# define LDRSBIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_W,rn,rt,im) +# define LDRSBIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSBI|ARM_W,rn,rt,im) # define T2_LDRSBIN(rt,rn,im) torri8(THUMB2_LDRSBI,rn,rt,im) -# define CC_LDRB(cc,rt,rn,rm) corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm) +# define T2_LDRSBIN_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRSBIN_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_W,rn,rt,im) +# define CC_LDRB(cc,rt,rn,rm) corrr(cc,ARM_LDRB|ARM_U,rn,rt,rm) # define LDRB(rt,rn,rm) CC_LDRB(ARM_CC_AL,rt,rn,rm) +# define LDRB_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRB|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define LDRB_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRB|ARM_U|ARM_W,rn,rt,rm) # define T1_LDRB(rt,rn,rm) is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRB(rt,rn,rm) torxr(THUMB2_LDRB,rn,rt,rm) # define CC_LDRBN(cc,rt,rn,rm) corrr(cc,ARM_LDRB,rn,rt,rm) # define LDRBN(rt,rn,rm) CC_LDRBN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRBI(cc,rt,rn,im) corri(cc,ARM_LDRBI|ARM_P,rn,rt,im) +# define CC_LDRBI(cc,rt,rn,im) corri(cc,ARM_LDRBI|ARM_U,rn,rt,im) # define LDRBI(rt,rn,im) CC_LDRBI(ARM_CC_AL,rt,rn,im) +# define LDRBI_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRBI_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_U|ARM_W,rn,rt,im) # define T1_LDRBI(rt,rn,im) is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRBI(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im) # define T2_LDRBWI(rt,rn,im) torri12(THUMB2_LDRBWI,rn,rt,im) +# define T2_LDRBI_B(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRBI_A(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_LDRBIN(cc,rt,rn,im) corri(cc,ARM_LDRBI,rn,rt,im) # define LDRBIN(rt,rn,im) CC_LDRBIN(ARM_CC_AL,rt,rn,im) +# define LDRBIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_W,rn,rt,im) +# define LDRBIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRBI|ARM_W,rn,rt,im) # define T2_LDRBIN(rt,rn,im) torri8(THUMB2_LDRBI,rn,rt,im) -# define CC_LDRSH(cc,rt,rn,rm) corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm) +# define T2_LDRBIN_B(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRBIN_A(rt,rn,im) torri8(THUMB2_LDRBI|THUMB2_W,rn,rt,im) +# define CC_LDRSH(cc,rt,rn,rm) corrr(cc,ARM_LDRSH|ARM_U,rn,rt,rm) # define LDRSH(rt,rn,rm) CC_LDRSH(ARM_CC_AL,rt,rn,rm) +# define LDRSH_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRSH|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define LDRSH_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRSH|ARM_U|ARM_W,rn,rt,rm) # define T1_LDRSH(rt,rn,rm) is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRSH(rt,rn,rm) torxr(THUMB2_LDRSH,rn,rt,rm) # define CC_LDRSHN(cc,rt,rn,rm) corrr(cc,ARM_LDRSH,rn,rt,rm) # define LDRSHN(rt,rn,rm) CC_LDRSHN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRSHI(cc,rt,rn,im) corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im) +# define CC_LDRSHI(cc,rt,rn,im) corri8(cc,ARM_LDRSHI|ARM_U,rn,rt,im) # define LDRSHI(rt,rn,im) CC_LDRSHI(ARM_CC_AL,rt,rn,im) +# define LDRSHI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRSHI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_U|ARM_W,rn,rt,im) # define T2_LDRSHI(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im) # define T2_LDRSHWI(rt,rn,im) torri12(THUMB2_LDRSHWI,rn,rt,im) +# define T2_LDRSHI_B(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRSHI_A(rt,rn,im) torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_LDRSHIN(cc,rt,rn,im) corri8(cc,ARM_LDRSHI,rn,rt,im) # define LDRSHIN(rt,rn,im) CC_LDRSHIN(ARM_CC_AL,rt,rn,im) +# define LDRSHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_W,rn,rt,im) +# define LDRSHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRSHI|ARM_W,rn,rt,im) # define T2_LDRSHIN(rt,rn,im) torri8(THUMB2_LDRSHI,rn,rt,im) -# define CC_LDRH(cc,rt,rn,rm) corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm) +# define T2_LDRSHIN_B(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRSHIN_A(rt,rn,im) torri8(THUMB2_LDRSHI|THUMB2_W,rn,rt,im) +# define CC_LDRH(cc,rt,rn,rm) corrr(cc,ARM_LDRH|ARM_U,rn,rt,rm) # define LDRH(rt,rn,rm) CC_LDRH(ARM_CC_AL,rt,rn,rm) +# define LDRH_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRH|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define LDRH_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDRH|ARM_U|ARM_W,rn,rt,rm) # define T1_LDRH(rt,rn,rm) is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRH(rt,rn,rm) torxr(THUMB2_LDRH,rn,rt,rm) # define CC_LDRHN(cc,rt,rn,rm) corrr(cc,ARM_LDRH,rn,rt,rm) # define LDRHN(rt,rn,rm) CC_LDRHN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRHI(cc,rt,rn,im) corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im) +# define CC_LDRHI(cc,rt,rn,im) corri8(cc,ARM_LDRHI|ARM_U,rn,rt,im) # define LDRHI(rt,rn,im) CC_LDRHI(ARM_CC_AL,rt,rn,im) +# define LDRHI_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRHI_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_U|ARM_W,rn,rt,im) # define T1_LDRHI(rt,rn,im) is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDRHI(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im) # define T2_LDRHWI(rt,rn,im) torri12(THUMB2_LDRHWI,rn,rt,im) +# define T2_LDRHI_B(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRHI_A(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_LDRHIN(cc,rt,rn,im) corri8(cc,ARM_LDRHI,rn,rt,im) # define LDRHIN(rt,rn,im) CC_LDRHIN(ARM_CC_AL,rt,rn,im) +# define LDRHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_W,rn,rt,im) +# define LDRHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_LDRHI|ARM_W,rn,rt,im) # define T2_LDRHIN(rt,rn,im) torri8(THUMB2_LDRHI,rn,rt,im) -# define CC_LDR(cc,rt,rn,rm) corrr(cc,ARM_LDR|ARM_P,rn,rt,rm) +# define T2_LDRHIN_B(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRHIN_A(rt,rn,im) torri8(THUMB2_LDRHI|THUMB2_W,rn,rt,im) +# define CC_LDR(cc,rt,rn,rm) corrr(cc,ARM_LDR|ARM_U,rn,rt,rm) # define LDR(rt,rn,rm) CC_LDR(ARM_CC_AL,rt,rn,rm) +# define LDR_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDR|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define LDR_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_LDR|ARM_U|ARM_W,rn,rt,rm) # define T1_LDR(rt,rn,rm) is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_LDR(rt,rn,rm) torxr(THUMB2_LDR,rn,rt,rm) # define CC_LDRN(cc,rt,rn,rm) corrr(cc,ARM_LDR,rn,rt,rm) # define LDRN(rt,rn,rm) CC_LDRN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRI(cc,rt,rn,im) corri(cc,ARM_LDRI|ARM_P,rn,rt,im) +# define CC_LDRI(cc,rt,rn,im) corri(cc,ARM_LDRI|ARM_U,rn,rt,im) # define LDRI(rt,rn,im) CC_LDRI(ARM_CC_AL,rt,rn,im) +# define LDRI_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define LDRI_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_U|ARM_W,rn,rt,im) # define T1_LDRI(rt,rn,im) is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T1_LDRISP(rt,im) is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im)) # define T2_LDRI(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im) # define T2_LDRWI(rt,rn,im) torri12(THUMB2_LDRWI,rn,rt,im) +# define T2_LDRI_B(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im) +# define T2_LDRI_A(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_LDRIN(cc,rt,rn,im) corri(cc,ARM_LDRI,rn,rt,im) # define LDRIN(rt,rn,im) CC_LDRIN(ARM_CC_AL,rt,rn,im) +# define LDRIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_W,rn,rt,im) +# define LDRIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_LDRI|ARM_W,rn,rt,im) # define T2_LDRIN(rt,rn,im) torri8(THUMB2_LDRI,rn,rt,im) -# define CC_LDRD(cc,rt,rn,rm) corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm) +# define T2_LDRIN_B(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_LDRIN_A(rt,rn,im) torri8(THUMB2_LDRI|THUMB2_W,rn,rt,im) +# define CC_LDRD(cc,rt,rn,rm) corrr(cc,ARM_LDRD|ARM_U,rn,rt,rm) # define LDRD(rt,rn,rm) CC_LDRD(ARM_CC_AL,rt,rn,rm) -# define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im) +# define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_U,rn,rt,rt2,im) # define CC_LDRDN(cc,rt,rn,rm) corrr(cc,ARM_LDRD,rn,rt,rm) # define LDRDN(rd,rn,rm) CC_LDRDN(ARM_CC_AL,rt,rn,rm) -# define CC_LDRDI(cc,rt,rn,im) corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im) +# define CC_LDRDI(cc,rt,rn,im) corri8(cc,ARM_LDRDI|ARM_U,rn,rt,im) # define LDRDI(rt,rn,im) CC_LDRDI(ARM_CC_AL,rt,rn,im) # define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) # define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im) @@ -799,103 +851,133 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int); # define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf) # define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn) # define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im) -# define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm) +# define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_U,rn,rt,rm) # define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm) +# define STRB_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_STRB|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define STRB_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_STRB|ARM_U|ARM_W,rn,rt,rm) # define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRB(rt,rn,rm) torxr(THUMB2_STRB,rn,rt,rm) # define CC_STRBN(cc,rt,rn,rm) corrr(cc,ARM_STRB,rn,rt,rm) # define STRBN(rt,rn,rm) CC_STRBN(ARM_CC_AL,rt,rn,rm) -# define CC_STRBI(cc,rt,rn,im) corri(cc,ARM_STRBI|ARM_P,rn,rt,im) +# define CC_STRBI(cc,rt,rn,im) corri(cc,ARM_STRBI|ARM_U,rn,rt,im) # define STRBI(rt,rn,im) CC_STRBI(ARM_CC_AL,rt,rn,im) +# define STRBI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRBI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_U|ARM_W,rn,rt,im) # define T1_STRBI(rt,rn,im) is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRBI(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im) # define T2_STRBWI(rt,rn,im) torri12(THUMB2_STRBWI,rn,rt,im) +# define T2_STRBI_B(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRBI_A(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_STRBIN(cc,rt,rn,im) corri(cc,ARM_STRBI,rn,rt,im) # define STRBIN(rt,rn,im) CC_STRBIN(ARM_CC_AL,rt,rn,im) +# define STRBIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_W,rn,rt,im) +# define STRBIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRBI|ARM_W,rn,rt,im) # define T2_STRBIN(rt,rn,im) torri8(THUMB2_STRBI,rn,rt,im) -# define CC_STRH(cc,rt,rn,rm) corrr(cc,ARM_STRH|ARM_P,rn,rt,rm) +# define T2_STRBIN_B(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRBIN_A(rt,rn,im) torri8(THUMB2_STRBI|THUMB2_W,rn,rt,im) +# define CC_STRH(cc,rt,rn,rm) corrr(cc,ARM_STRH|ARM_U,rn,rt,rm) # define STRH(rt,rn,rm) CC_STRH(ARM_CC_AL,rt,rn,rm) +# define STRH_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_STRH|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define STRH_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_STRH|ARM_U|ARM_W,rn,rt,rm) # define T1_STRH(rt,rn,rm) is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRH(rt,rn,rm) torxr(THUMB2_STRH,rn,rt,rm) # define CC_STRHN(cc,rt,rn,rm) corrr(cc,ARM_STRH,rn,rt,rm) # define STRHN(rt,rn,rm) CC_STRHN(ARM_CC_AL,rt,rn,rm) -# define CC_STRHI(cc,rt,rn,im) corri8(cc,ARM_STRHI|ARM_P,rn,rt,im) +# define CC_STRHI(cc,rt,rn,im) corri8(cc,ARM_STRHI|ARM_U,rn,rt,im) # define STRHI(rt,rn,im) CC_STRHI(ARM_CC_AL,rt,rn,im) +# define STRHI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRHI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRHI|ARM_U|ARM_W,rn,rt,im) # define T1_STRHI(rt,rn,im) is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STRHI(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im) # define T2_STRHWI(rt,rn,im) torri12(THUMB2_STRHWI,rn,rt,im) +# define T2_STRHI_B(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRHI_A(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_STRHIN(cc,rt,rn,im) corri8(cc,ARM_STRHI,rn,rt,im) # define STRHIN(rt,rn,im) CC_STRHIN(ARM_CC_AL,rt,rn,im) +# define STRHIN_B(rt,rn,im) corri8(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_W,rn,rt,im) +# define STRHIN_A(rt,rn,im) corri8(ARM_CC_AL,ARM_STRHI|ARM_W,rn,rt,im) # define T2_STRHIN(rt,rn,im) torri8(THUMB2_STRHI,rn,rt,im) -# define CC_STR(cc,rt,rn,rm) corrr(cc,ARM_STR|ARM_P,rn,rt,rm) +# define T2_STRHIN_B(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRHIN_A(rt,rn,im) torri8(THUMB2_STRHI|THUMB2_W,rn,rt,im) +# define CC_STR(cc,rt,rn,rm) corrr(cc,ARM_STR|ARM_U,rn,rt,rm) # define STR(rt,rn,rm) CC_STR(ARM_CC_AL,rt,rn,rm) +# define STR_B(rt,rn,rm) corrr(ARM_CC_AL,ARM_STR|ARM_U|ARM_P|ARM_W,rn,rt,rm) +# define STR_A(rt,rn,rm) corrr(ARM_CC_AL,ARM_STR|ARM_U|ARM_W,rn,rt,rm) # define T1_STR(rt,rn,rm) is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T2_STR(rt,rn,rm) torxr(THUMB2_STR,rn,rt,rm) # define CC_STRN(cc,rt,rn,rm) corrr(cc,ARM_STR,rn,rt,rm) # define STRN(rt,rn,rm) CC_STRN(ARM_CC_AL,rt,rn,rm) -# define CC_STRI(cc,rt,rn,im) corri(cc,ARM_STRI|ARM_P,rn,rt,im) +# define CC_STRI(cc,rt,rn,im) corri(cc,ARM_STRI|ARM_U,rn,rt,im) # define STRI(rt,rn,im) CC_STRI(ARM_CC_AL,rt,rn,im) +# define STRI_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_U|ARM_W,rn,rt,im) +# define STRI_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_U|ARM_W,rn,rt,im) # define T1_STRI(rt,rn,im) is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)) # define T1_STRISP(rt,im) is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im))) # define T2_STRI(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U,rn,rt,im) # define T2_STRWI(rt,rn,im) torri12(THUMB2_STRWI,rn,rt,im) +# define T2_STRI_B(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRI_A(rt,rn,im) torri8(THUMB2_STRI|THUMB2_U|THUMB2_W,rn,rt,im) # define CC_STRIN(cc,rt,rn,im) corri(cc,ARM_STRI,rn,rt,im) # define STRIN(rt,rn,im) CC_STRIN(ARM_CC_AL,rt,rn,im) +# define STRIN_B(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_W,rn,rt,im) +# define STRIN_A(rt,rn,im) corri(ARM_CC_AL,ARM_STRI|ARM_W,rn,rt,im) # define T2_STRIN(rt,rn,im) torri8(THUMB2_STRI,rn,rt,im) -# define CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_P,rn,rt,rm) +# define T2_STRIN_B(rt,rn,im) torri8(THUMB2_STRI|THUMB2_P|THUMB2_W,rn,rt,im) +# define T2_STRIN_A(rt,rn,im) torri8(THUMB2_STRI|THUMB2_W,rn,rt,im) +# define CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_U,rn,rt,rm) # define STRD(rt,rn,rm) CC_STRD(ARM_CC_AL,rt,rn,rm) # define CC_STRDN(cc,rt,rn,rm) corrr(cc,ARM_STRD,rn,rt,rm) # define STRDN(rt,rn,rm) CC_STRDN(ARM_CC_AL,rt,rn,rm) -# define CC_STRDI(cc,rt,rn,im) corri8(cc,ARM_STRDI|ARM_P,rn,rt,im) +# define CC_STRDI(cc,rt,rn,im) corri8(cc,ARM_STRDI|ARM_U,rn,rt,im) # define STRDI(rt,rn,im) CC_STRDI(ARM_CC_AL,rt,rn,im) -# define T2_STRDI(rt,rt2,rn,im) torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im) +# define T2_STRDI(rt,rt2,rn,im) torrri8(THUMB2_STRDI|ARM_U,rn,rt,rt2,im) # define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im) # define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im) # define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im) # define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt) # define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn) # define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im) -# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im) +# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im) # define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im) # define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im) # define LDM(rn,im) LDMIA(rn,im) # define T1_LDMIA(rn,im) is(THUMB_LDMIA|(_u3(rn)<<8)|im) # define T2_LDMIA(rn,im) torl(THUMB2_LDMIA,rn,im) -# define CC_LDMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im) +# define CC_LDMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_W,rn,im) # define LDMIA_U(rn,im) CC_LDMIA_U(ARM_CC_AL,rn,im) # define LDM_U(r0,i0) LDMIA_U(r0,i0) -# define CC_LDMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im) +# define CC_LDMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P,rn,im) # define LDMIB(rn,im) CC_LDMIB(ARM_CC_AL,rn,im) -# define CC_LDMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im) +# define CC_LDMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P|ARM_M_W,rn,im) # define LDMIB_U(rn,im) CC_LDMIB_U(ARM_CC_AL,rn,im) # define CC_LDMDA(cc,rn,im) corl(cc,ARM_M|ARM_M_L,rn,im) # define LDMDA(rn,im) CC_LDMDA(ARM_CC_AL,rn,im) -# define CC_LDMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im) +# define CC_LDMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_W,rn,im) # define LDMDA_U(rn,im) CC_LDMDA_U(ARM_CC_AL,rn,im) -# define CC_LDMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im) +# define CC_LDMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_P,rn,im) # define LDMDB(rn,im) CC_LDMDB(ARM_CC_AL,rn,im) # define T2_LDMDB(rn,im) torl(THUMB2_LDMDB,rn,im) -# define CC_LDMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im) +# define CC_LDMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_P|ARM_M_W,rn,im) # define LDMDB_U(rn,im) CC_LDMDB_U(ARM_CC_AL,rn,im) -# define CC_STMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_I,rn,im) +# define CC_STMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_U,rn,im) # define STMIA(rn,im) CC_STMIA(ARM_CC_AL,rn,im) # define CC_STM(cc,rn,im) CC_STMIA(cc,rn,im) # define STM(rn,im) STMIA(rn,im) -# define CC_STMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im) +# define CC_STMIA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_W,rn,im) # define STMIA_U(rn,im) CC_STMIA_U(ARM_CC_AL,rn,im) # define CC_STM_U(cc,rn,im) CC_STMIA_U(cc,rn,im) # define STM_U(rn,im) STMIA_U(rn,im) -# define CC_STMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im) +# define CC_STMIB(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_P,rn,im) # define STMIB(rn,im) CC_STMIB(ARM_CC_AL,rn,im) -# define CC_STMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im) +# define CC_STMIB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U|ARM_M_P|ARM_M_W,rn,im) # define STMIB_U(rn,im) CC_STMIB_U(ARM_CC_AL,rn,im) # define CC_STMDA(cc,rn,im) corl(cc,ARM_M,rn,im) # define STMDA(rn,im) CC_STMDA(ARM_CC_AL,rn,im) -# define CC_STMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_U,rn,im) +# define CC_STMDA_U(cc,rn,im) corl(cc,ARM_M|ARM_M_W,rn,im) # define STMDA_U(rn,im) CC_STMDA_U(ARM_CC_AL,rn,im) -# define CC_STMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_B,rn,im) +# define CC_STMDB(cc,rn,im) corl(cc,ARM_M|ARM_M_P,rn,im) # define STMDB(rn,im) CC_STMDB(ARM_CC_AL,rn,im) -# define CC_STMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im) +# define CC_STMDB_U(cc,rn,im) corl(cc,ARM_M|ARM_M_P|ARM_M_W,rn,im) # define STMDB_U(rn,im) CC_STMDB_U(ARM_CC_AL,rn,im) # define CC_PUSH(cc,im) CC_STMDB_U(cc,_SP_REGNO,im) # define PUSH(im) STMDB_U(_SP_REGNO,im) @@ -1199,6 +1281,46 @@ static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1) static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define ldxbr_c(r0, r1, r2) _ldxbr_c(_jit,r0, r1, r2) +static void _ldxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ldxbi_c(r0, r1, i0) _ldxbi_c(_jit, r0, r1, i0) +static void _ldxbi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_uc(r0, r1, r2) _ldxbr_uc(_jit,r0, r1, r2) +static void _ldxbr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ldxbi_uc(r0, r1, i0) _ldxbi_uc(_jit, r0, r1, i0) +static void _ldxbi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_s(r0, r1, r2) _ldxbr_s(_jit,r0, r1, r2) +static void _ldxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ldxbi_s(r0, r1, i0) _ldxbi_s(_jit, r0, r1, i0) +static void _ldxbi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_us(r0, r1, r2) _ldxbr_us(_jit,r0, r1, r2) +static void _ldxbr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ldxbi_us(r0, r1, i0) _ldxbi_us(_jit, r0, r1, i0) +static void _ldxbi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxbr_i(r0, r1, r2) _ldxbr_i(_jit,r0, r1, r2) +static void _ldxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define ldxbi_i(r0, r1, i0) _ldxbi_i(_jit, r0, r1, i0) +static void _ldxbi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_c(r0, r1, i0) _ldxar_c(_jit, r0, r1, i0) +static void _ldxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxai_c(r0, r1, i0) _ldxai_c(_jit, r0, r1, i0) +static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_uc(r0, r1, i0) _ldxar_uc(_jit, r0, r1, i0) +static void _ldxar_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxai_uc(r0, r1, i0) _ldxai_uc(_jit, r0, r1, i0) +static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_s(r0, r1, i0) _ldxar_s(_jit, r0, r1, i0) +static void _ldxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxai_s(r0, r1, i0) _ldxai_s(_jit, r0, r1, i0) +static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_us(r0, r1, i0) _ldxar_us(_jit, r0, r1, i0) +static void _ldxar_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxai_us(r0, r1, i0) _ldxai_us(_jit, r0, r1, i0) +static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_i(r0, r1, i0) _ldxar_i(_jit, r0, r1, i0) +static void _ldxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxai_i(r0, r1, i0) _ldxai_i(_jit, r0, r1, i0) +static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define str_c(r0,r1) _str_c(_jit,r0,r1) static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t); # define sti_c(i0,r0) _sti_c(_jit,i0,r0) @@ -1227,6 +1349,30 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); #define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1) static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define stxbr_c(r0, r1, r2) _stxbr_c(_jit, r0, r1, r2) +static void _stxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxbi_c(i0, r0, r1) _stxbi_c(_jit, i0, r0, r1) +static void _stxbi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxbr_s(r0, r1, r2) _stxbr_s(_jit, r0, r1, r2) +static void _stxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxbi_s(i0, r0, r1) _stxbi_s(_jit, i0, r0, r1) +static void _stxbi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxbr_i(r0, r1, r2) _stxbr_i(_jit, r0, r1, r2) +static void _stxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxbi_i(i0, r0, r1) _stxbi_i(_jit, i0, r0, r1) +static void _stxbi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_c(r0, r1, r2) _stxar_c(_jit, r0, r1, r2) +static void _stxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxai_c(i0, r0, r1) _stxai_c(_jit, i0, r0, r1) +static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_s(r0, r1, r2) _stxar_s(_jit, r0, r1, r2) +static void _stxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxai_s(i0, r0, r1) _stxai_s(_jit, i0, r0, r1) +static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_i(r0, r1, r2) _stxar_i(_jit, r0, r1, r2) +static void _stxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define stxai_i(i0, r0, r1) _stxai_i(_jit, i0, r0, r1) +static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -3825,6 +3971,304 @@ _unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) generic_unldi_u(r0, i0, i1); } +static void +_ldxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + LDRSB_B(r0, r1, r2); + else + generic_ldxbr_c(r0, r1, r2); +} + +static void +_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSBI_B(r0, r1, i0); + else + T2_LDRSBIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSBI_B(r0, r1, i0); + else + LDRSBIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_c(r0, r1, i0); +} + +static void +_ldxbr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + LDRB_B(r0, r1, r2); + else + generic_ldxbr_uc(r0, r1, r2); +} + +static void +_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRBI_B(r0, r1, i0); + else + T2_LDRBIN_B(r0, r1, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRBI_B(r0, r1, i0); + else + LDRBIN_B(r0, r1, -i0); + } + else + generic_ldxbi_uc(r0, r1, i0); +} + +static void +_ldxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + LDRSH_B(r0, r1, r2); + else + generic_ldxbr_s(r0, r1, r2); +} + +static void +_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSHI_B(r0, r1, i0); + else + T2_LDRSHIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSHI_B(r0, r1, i0); + else + LDRSHIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_s(r0, r1, i0); +} + +static void +_ldxbr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + LDRH_B(r0, r1, r2); + else + generic_ldxbr_us(r0, r1, r2); +} + +static void +_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRHI_B(r0, r1, i0); + else + T2_LDRHIN_B(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRHI_B(r0, r1, i0); + else + LDRHIN_B(r0, r1, -i0); + } + } + else + generic_ldxbi_us(r0, r1, i0); +} + +static void +_ldxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + LDR_B(r0, r1, r2); + else + generic_ldxbr_i(r0, r1, r2); +} + +static void +_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRI_B(r0, r1, i0); + else + T2_LDRIN_B(r0, r1, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRI_B(r0, r1, i0); + else + LDRIN_B(r0, r1, -i0); + } + else + generic_ldxbi_i(r0, r1, i0); +} + +static void +_ldxar_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + LDRSB_A(r0, r1, r2); + else + generic_ldxar_c(r0, r1, r2); +} + +static void +_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSBI_A(r0, r1, i0); + else + T2_LDRSBIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSBI_A(r0, r1, i0); + else + LDRSBIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_c(r0, r1, i0); +} + +static void +_ldxar_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + LDRB_A(r0, r1, r2); + else + generic_ldxar_uc(r0, r1, r2); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRBI_A(r0, r1, i0); + else + T2_LDRBIN_A(r0, r1, -i0); + } + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRBI_A(r0, r1, i0); + else + LDRBIN_A(r0, r1, -i0); + } + else + generic_ldxai_uc(r0, r1, i0); +} + +static void +_ldxar_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + LDRSH_A(r0, r1, r2); + else + generic_ldxar_s(r0, r1, r2); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRSHI_A(r0, r1, i0); + else + T2_LDRSHIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRSHI_A(r0, r1, i0); + else + LDRSHIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_s(r0, r1, i0); +} + +static void +_ldxar_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + LDRH_A(r0, r1, r2); + else + generic_ldxar_us(r0, r1, r2); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_LDRHI_A(r0, r1, i0); + else + T2_LDRHIN_A(r0, r1, -i0); + } + else { + if (i0 >= 0) + LDRHI_A(r0, r1, i0); + else + LDRHIN_A(r0, r1, -i0); + } + } + else + generic_ldxai_us(r0, r1, i0); +} + +static void +_ldxar_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + LDR_A(r0, r1, r2); + else + generic_ldxar_i(r0, r1, r2); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_LDRI_A(r0, r1, i0); + else + T2_LDRIN_A(r0, r1, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + LDRI_A(r0, r1, i0); + else + LDRIN_A(r0, r1, -i0); + } + else + generic_ldxai_i(r0, r1, i0); +} + static void _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -4062,6 +4506,182 @@ _unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) generic_unsti(i0, r0, i1); } +static void +_stxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + STRB_B(r2, r1, r0); + else + generic_stxbr_c(r0, r1, r2); +} + +static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRBI_B(r1, r0, i0); + else + T2_STRBIN_B(r1, r0, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRBI_B(r1, r0, i0); + else + STRBIN_B(r1, r0, -i0); + } + else + generic_stxbi_c(i0, r0, r1); +} + +static void +_stxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + STRH_B(r2, r1, r0); + else + generic_stxbr_s(r0, r1, r2); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (i0 >= -255 && i0 <= 255) { + if (jit_thumb_p()) { + if (i0 >= 0) + T2_STRHI_B(r1, r0, i0); + else + T2_STRHIN_B(r1, r0, -i0); + } + else { + if (i0 >= 0) + STRHI_B(r1, r0, i0); + else + STRHIN_B(r1, r0, -i0); + } + } + else + generic_stxbi_s(i0, r0, r1); +} + +static void +_stxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!jit_thumb_p()) + STR_B(r2, r1, r0); + else + generic_stxbr_i(r0, r1, r2); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRI_B(r1, r0, i0); + else + T2_STRIN_B(r1, r0, -i0); + } + else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRI_B(r1, r0, i0); + else + STRIN_B(r1, r0, -i0); + } + else + generic_stxbi_i(i0, r0, r1); +} + +static void +_stxar_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + STRB_A(r2, r1, r0); + else + generic_stxar_c(r0, r1, r2); +} + +static void +_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRBI_A(r1, r0, i0); + else + T2_STRBIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRBI_A(r1, r0, i0); + else + STRBIN_A(r1, r0, -i0); + } + else + generic_stxai_c(i0, r0, r1); +} + +static void +_stxar_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + STRH_A(r2, r1, r0); + else + generic_stxar_s(r0, r1, r2); +} + +static void +_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRHI_A(r1, r0, i0); + else + T2_STRHIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRHI_A(r1, r0, i0); + else + STRHIN_A(r1, r0, -i0); + } + else + generic_stxai_s(i0, r0, r1); +} + +static void +_stxar_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_post_index_p() && !jit_thumb_p()) + STR_A(r2, r1, r0); + else + generic_stxar_i(r0, r1, r2); +} + +static void +_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_post_index_p() && + jit_thumb_p() && i0 >= -255 && i0 <= 255) { + if (i0 >= 0) + T2_STRI_A(r1, r0, i0); + else + T2_STRIN_A(r1, r0, -i0); + } + else if (jit_post_index_p() && + !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) { + if (i0 >= 0) + STRI_A(r1, r0, i0); + else + STRIN_A(r1, r0, -i0); + } + else + generic_stxai_i(i0, r0, r1); +} + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -4548,11 +5168,11 @@ _patch_at(jit_state_t *_jit, assert((thumb.i & 0x0f700000) == ARM_LDRI); d = label - (instr + 8); if (d < 0) { - thumb.i &= ~ARM_P; + thumb.i &= ~ARM_U; d = -d; } else - thumb.i |= ARM_P; + thumb.i |= ARM_U; assert(!(d & 0xfffff000)); u.i[0] = (thumb.i & 0xfffff000) | d; } diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 7ec5e9e89..cbbfd5988 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -530,6 +530,66 @@ 12, /* hmuli */ 4, /* hmulr_u */ 8, /* hmuli_u */ + 8, /* ldxbr_c */ + 4, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 12, /* ldxbr_f */ + 12, /* ldxbi_f */ + 12, /* ldxar_f */ + 12, /* ldxai_f */ + 20, /* ldxbr_d */ + 20, /* ldxbi_d */ + 20, /* ldxar_d */ + 20, /* ldxai_d */ + 8, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 12, /* stxbr_f */ + 12, /* stxbi_f */ + 12, /* stxar_f */ + 12, /* stxai_f */ + 20, /* stxbr_d */ + 20, /* stxbi_d */ + 20, /* stxar_d */ + 20, /* stxai_d */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -1064,5 +1124,65 @@ 12, /* hmuli */ 4, /* hmulr_u */ 8, /* hmuli_u */ + 8, /* ldxbr_c */ + 4, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_arm-vfp.c b/deps/lightning/lib/jit_arm-vfp.c index adf6a3b66..d48eb4510 100644 --- a/deps/lightning/lib/jit_arm-vfp.c +++ b/deps/lightning/lib/jit_arm-vfp.c @@ -129,7 +129,8 @@ # define ARM_VMVNI 0x02800030 # define ARM_VLDR 0x0d100a00 # define ARM_VSTR 0x0d000a00 -# define ARM_VM 0x0c000a00 +# define ARM_VM_T1A1 0x0c000b00 +# define ARM_VM_T2A2 0x0c000a00 # define ARM_VMOV_ADV_U 0x00800000 /* zero extend */ # define ARM_VMOV_ADV_8 0x00400000 # define ARM_VMOV_ADV_16 0x00000020 @@ -292,39 +293,7 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); # define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1) # define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1) # define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1) -# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0) -# define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0) -# define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0) -# define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0) -# define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0) -# define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0) -# define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0) -# define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0) -# define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0) -# define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0) -# define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0) -# define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0) -# define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0) -# define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0) -# define CC_VPUSH_F64(cc,r0,i0) CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0) -# define VPUSH_F64(r0,i0) CC_VPUSH_F64(ARM_CC_AL,r0,i0) -# define CC_VPOP_F32(cc,r0,i0) CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0) -# define VPOP_F32(r0,i0) CC_VPOP_F32(ARM_CC_AL,r0,i0) -# define CC_VPOP_F64(cc,r0,i0) CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0) -# define VPOP_F64(r0,i0) CC_VPOP_F64(ARM_CC_AL,r0,i0) -# define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1) +# define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1) # define VMOV_A_S8(r0,r1) CC_VMOV_A_S8(ARM_CC_AL,r0,r1) # define CC_VMOV_A_U8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1) # define VMOV_A_U8(r0,r1) CC_VMOV_A_U8(ARM_CC_AL,r0,r1) @@ -336,6 +305,14 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); # define VMOV_A_S32(r0,r1) CC_VMOV_A_S32(ARM_CC_AL,r0,r1) # define CC_VMOV_A_U32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1) # define VMOV_A_U32(r0,r1) CC_VMOV_A_U32(ARM_CC_AL,r0,r1) +# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM_T2A2|ARM_M_L|ARM_M_U|ARM_M_W,r0,r1,i0) +# define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0) +# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM_T2A2|ARM_M_U|ARM_M_W,r0,r1,i0) +# define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0) +# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM_T1A1|ARM_M_L|ARM_M_U|ARM_M_W,r0,r1,(i0)<<1) +# define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0) +# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM_T1A1|ARM_M_U|ARM_M_W,r0,r1,(i0)<<1) +# define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0) # define CC_VMOV_V_I8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0) # define VMOV_V_I8(r0,r1) CC_VMOV_V_I8(ARM_CC_AL,r0,r1) # define CC_VMOV_V_I16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0) @@ -475,19 +452,19 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int); /* index is multipled by four */ # define CC_VLDRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR,r0,r1,i0) # define VLDRN_F32(r0,r1,i0) CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0) +# define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_U,r0,r1,i0) # define VLDR_F32(r0,r1,i0) CC_VLDR_F32(ARM_CC_AL,r0,r1,i0) # define CC_VLDRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0) # define VLDRN_F64(r0,r1,i0) CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0) +# define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_U,r0,r1,i0) # define VLDR_F64(r0,r1,i0) CC_VLDR_F64(ARM_CC_AL,r0,r1,i0) # define CC_VSTRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR,r0,r1,i0) # define VSTRN_F32(r0,r1,i0) CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0) -# define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0) +# define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_U,r0,r1,i0) # define VSTR_F32(r0,r1,i0) CC_VSTR_F32(ARM_CC_AL,r0,r1,i0) # define CC_VSTRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0) # define VSTRN_F64(r0,r1,i0) CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0) -# define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0) +# define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_U,r0,r1,i0) # define VSTR_F64(r0,r1,i0) CC_VSTR_F64(ARM_CC_AL,r0,r1,i0) # define vfp_popcntr(r0,r1) _vfp_popcntr(_jit,r0,r1) static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t); @@ -852,6 +829,10 @@ static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define vfp_unldi_x(r0, i0, i1) _vfp_unldi_x(_jit, r0, i0, i1) static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define vfp_ldxai_f(r0, r1, i0) _vfp_ldxai_f(_jit, r0, r1, i0) +static void _vfp_ldxai_f(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define vfp_ldxai_d(r0, r1, i0) _vfp_ldxai_d(_jit, r0, r1, i0) +static void _vfp_ldxai_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0) # define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0) # define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0) @@ -870,6 +851,10 @@ static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); #define vfp_unsti_x(i0, r0, i1) _vfp_unsti_x(_jit, i0, r0, i1) static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define vfp_stxai_f(i0, r0, r1) _vfp_stxai_f(_jit, i0, r0, r1) +static void _vfp_stxai_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define vfp_stxai_d(i0, r0, r1) _vfp_stxai_d(_jit, i0, r0, r1) +static void _vfp_stxai_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1) static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t); #endif @@ -1273,11 +1258,9 @@ _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0) jit_thumb_t thumb; assert(!(cc & 0x0fffffff)); assert(!(o & 0xf00ff0ff)); - /* save i0 double precision registers */ - if (o & ARM_V_F64) i0 <<= 1; - /* if (r1 & 1) cc & ARM_V_F64 must be false */ - if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1); - assert(i0 && !(i0 & 1) && r1 + i0 <= 32); + assert(!(r1 & 1)); + r1 = vfp_regno(r1); + assert(i0 && r1 + i0 <= 32); thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0); if (jit_thumb_p()) iss(thumb.s[0], thumb.s[1]); @@ -2615,6 +2598,30 @@ _vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) } } +static void +_vfp_ldxai_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(jit_fpr_p(r0)); + if (i0 == 4) + VLDMIA_F32(r1, r0, 1); + else { + addi(r1, r1, i0); + vfp_ldr_f(r0, r1); + } +} + +static void +_vfp_ldxai_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(jit_fpr_p(r0)); + if (i0 == 8) + VLDMIA_F64(r1, r0, 1); + else { + addi(r1, r1, i0); + vfp_ldr_d(r0, r1); + } +} + static void _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -2848,6 +2855,30 @@ _vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) } } +static void +_vfp_stxai_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + assert(jit_fpr_p(r1)); + if (i0 == 4) + VSTMIA_F32(r0, r1, 1); + else { + addi(r0, r0, i0); + vfp_str_f(r0, r1); + } +} + +static void +_vfp_stxai_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + assert(jit_fpr_p(r1)); + if (i0 == 8) + VSTMIA_F64(r0, r1, 1); + else { + addi(r0, r0, i0); + vfp_str_d(r0, r1); + } +} + static void _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index 25aa7cbb6..d2bee7634 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -1352,6 +1352,24 @@ _emit_code(jit_state_t *_jit) rn(node->v.q.h), rn(node->w.w));\ case jit_code_##name##i##type: \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1667,6 +1685,68 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case jit_code_ldxbr_f: + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + goto L_ldxbi_f; + case jit_code_ldxbi_f: + addi(rn(node->v.w), rn(node->v.w), node->w.w); + L_ldxbi_f: + if (jit_swf_p()) + swf_ldr_f(rn(node->u.w), rn(node->v.w)); + else + vfp_ldr_f(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_ldxar_f: + if (jit_swf_p()) + swf_ldr_f(rn(node->u.w), rn(node->v.w)); + else + vfp_ldr_f(rn(node->u.w), rn(node->v.w)); + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_ldxai_f: + if (jit_swf_p()) { + swf_ldr_f(rn(node->u.w), rn(node->v.w)); + addi(rn(node->v.w), rn(node->v.w), node->w.w); + } + else + vfp_ldxai_f(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_ldxbr_d: + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + goto L_ldxbi_d; + case jit_code_ldxbi_d: + addi(rn(node->v.w), rn(node->v.w), node->w.w); + L_ldxbi_d: + if (jit_swf_p()) + swf_ldr_d(rn(node->u.w), rn(node->v.w)); + else + vfp_ldr_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_ldxar_d: + if (jit_swf_p()) + swf_ldr_d(rn(node->u.w), rn(node->v.w)); + else + vfp_ldr_d(rn(node->u.w), rn(node->v.w)); + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_ldxai_d: + if (jit_swf_p()) { + swf_ldr_d(rn(node->u.w), rn(node->v.w)); + addi(rn(node->v.w), rn(node->v.w), node->w.w); + } + else + vfp_ldxai_d(rn(node->u.w), rn(node->v.w), node->w.w); + break; case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1685,6 +1765,64 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case jit_code_stxbr_f: + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + goto L_stxbi_f; + case jit_code_stxbi_f: + addi(rn(node->v.w), rn(node->v.w), node->u.w); + L_stxbi_f: + if (jit_swf_p()) + swf_str_f(rn(node->v.w), rn(node->w.w)); + else + vfp_str_f(rn(node->v.w), rn(node->w.w)); + break; + case jit_code_stxar_f: + if (jit_swf_p()) + swf_str_f(rn(node->v.w), rn(node->w.w)); + else + vfp_str_f(rn(node->v.w), rn(node->w.w)); + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + break; + case jit_code_stxai_f: + if (jit_swf_p()) { + swf_str_f(rn(node->v.w), rn(node->w.w)); + addi(rn(node->v.w), rn(node->v.w), node->u.w); + } + else + vfp_stxai_f(node->u.w, rn(node->v.w), rn(node->w.w)); + break; + case jit_code_stxbr_d: + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + goto L_stxbi_d; + case jit_code_stxbi_d: + addi(rn(node->v.w), rn(node->v.w), node->u.w); + L_stxbi_d: + if (jit_swf_p()) + swf_str_d(rn(node->v.w), rn(node->w.w)); + else + vfp_str_d(rn(node->v.w), rn(node->w.w)); + break; + case jit_code_stxar_d: + if (jit_swf_p()) + swf_str_d(rn(node->v.w), rn(node->w.w)); + else + vfp_str_d(rn(node->v.w), rn(node->w.w)); + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + break; + case jit_code_stxai_d: + if (jit_swf_p()) { + swf_str_d(rn(node->v.w), rn(node->w.w)); + addi(rn(node->v.w), rn(node->v.w), node->u.w); + } + else + vfp_stxai_d(node->u.w, rn(node->v.w), rn(node->w.w)); + break; case_rr(hton, _us); case_rr(hton, _ui); case_rr(bswap, _us); @@ -2327,6 +2465,10 @@ _emit_code(jit_state_t *_jit) #undef case_vvw #undef case_rrw #undef case_vvv +#undef case_rrx +#undef case_rrX +#undef case_xrr +#undef case_Xrr #undef case_rrr #undef case_wv #undef case_wr @@ -2353,7 +2495,7 @@ _emit_code(jit_state_t *_jit) * FIXME can this cause issues in the preprocessor prefetch * or something else? should not, as the constants are after * an unconditional jump */ - if (value & ARM_P) value = value & 0x00000fff; + if (value & ARM_U) value = value & 0x00000fff; else value = -(value & 0x00000fff); word = word + 8 + value; } diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c index 90d90b0f1..7866f2e5b 100644 --- a/deps/lightning/lib/jit_disasm.c +++ b/deps/lightning/lib/jit_disasm.c @@ -112,6 +112,12 @@ jit_init_debug(const char *progname, FILE *stream) # if defined(__s390__) || defined(__s390x__) disasm_info.disassembler_options = "zarch"; # endif +# if defined(__sh__) + disasm_info.arch = bfd_arch_sh; + disasm_info.mach = bfd_mach_sh4; + disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_LITTLE; +# endif + disasm_info.print_address_func = disasm_print_address; # if BINUTILS_2_29 diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c index 3a471eb2b..ce7bffd2a 100644 --- a/deps/lightning/lib/jit_fallback.c +++ b/deps/lightning/lib/jit_fallback.c @@ -302,6 +302,10 @@ static void _fallback_unsti_x(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define fallback_patch_bmsi(inst, lbl) \ patch_at(inst, lbl) # endif +# if __WORDSIZE == 32 +# define fallback_divi_u(r0,r1,i0) _fallback_divi_u(_jit,r0,r1,i0) +static void _fallback_divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# endif #endif #if CODE @@ -4241,4 +4245,31 @@ _fallback_unsti_x(jit_state_t *_jit, jit_unget_reg(t0); } # endif + +# if __WORDSIZE == 32 +static void _fallback_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + unsigned int p, m; + + if (i0 == 1) { + movr(r0, r1); + } else if (i0 >= 0x80000001) { + gei_u(r0, r1, i0); + } else { + p = 31 - __builtin_clz(i0) + !!(i0 & (i0 - 1)); + m = (unsigned int)(((0x1ull << (32 + p)) + i0 - 1) / (unsigned long long)i0); + + t0 = fallback_jit_get_reg(jit_class_gpr); + + hmuli_u(rn(t0), r1, m); + subr(r0, r1, rn(t0)); + rshi_u(r0, r0, 1); + addr(r0, r0, rn(t0)); + rshi_u(r0, r0, p - 1); + + jit_unget_reg(t0); + } +} +# endif #endif diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index 7a23ebda1..e621cb783 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -528,4 +528,64 @@ 40, /* hmuli */ 48, /* hmulr_u */ 56, /* hmuli_u */ + 12, /* ldxbr_c */ + 12, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 12, /* ldxbr_s */ + 12, /* ldxbi_s */ + 12, /* ldxar_s */ + 12, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index 6330bf68b..60f7786b1 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -912,6 +912,26 @@ _emit_code(jit_state_t *_jit) rn(node->v.q.h), rn(node->w.w)); \ case jit_code_##name##i##type: \ break; +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1195,6 +1215,20 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1215,6 +1249,16 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_brr(blt,); case_brw(blt,); case_brr(blt, _u); @@ -1688,6 +1732,10 @@ _emit_code(jit_state_t *_jit) #undef case_rrrw #undef case_rrw #undef case_rrrr +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index 9033334b6..5a397b58c 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -1,5 +1,5 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 144 +#define JIT_INSTR_MAX 208 0, /* data */ 0, /* live */ 32, /* align */ @@ -502,12 +502,12 @@ 48, /* unldi */ 96, /* unldr_u */ 48, /* unldi_u */ - 128, /* unstr */ - 96, /* unsti */ + 192, /* unstr */ + 64, /* unsti */ 80, /* unldr_x */ 48, /* unldi_x */ - 144, /* unstr_x */ - 112, /* unsti_x */ + 208, /* unstr_x */ + 64, /* unsti_x */ 16, /* fmar_f */ 0, /* fmai_f */ 16, /* fmsr_f */ @@ -524,8 +524,68 @@ 0, /* fnmai_d */ 16, /* fnmsr_d */ 0, /* fnmsi_d */ - 32, /* hmulr */ - 32, /* hmuli */ - 32, /* hmulr_u */ - 32, /* hmuli_u */ + 32, /* hmulr */ + 32, /* hmuli */ + 32, /* hmulr_u */ + 32, /* hmuli_u */ + 16, /* ldxbr_c */ + 16, /* ldxbi_c */ + 16, /* ldxar_c */ + 16, /* ldxai_c */ + 16, /* ldxbr_uc */ + 16, /* ldxbi_uc */ + 16, /* ldxar_uc */ + 16, /* ldxai_uc */ + 16, /* ldxbr_s */ + 16, /* ldxbi_s */ + 16, /* ldxar_s */ + 16, /* ldxai_s */ + 16, /* ldxbr_us */ + 16, /* ldxbi_us */ + 16, /* ldxar_us */ + 16, /* ldxai_us */ + 16, /* ldxbr_i */ + 16, /* ldxbi_i */ + 16, /* ldxar_i */ + 16, /* ldxai_i */ + 16, /* ldxbr_ui */ + 16, /* ldxbi_ui */ + 16, /* ldxar_ui */ + 16, /* ldxai_ui */ + 16, /* ldxbr_l */ + 16, /* ldxbi_l */ + 16, /* ldxar_l */ + 16, /* ldxai_l */ + 16, /* ldxbr_f */ + 16, /* ldxbi_f */ + 16, /* ldxar_f */ + 16, /* ldxai_f */ + 16, /* ldxbr_d */ + 16, /* ldxbi_d */ + 16, /* ldxar_d */ + 16, /* ldxai_d */ + 16, /* stxbr_c */ + 16, /* stxbi_c */ + 16, /* stxar_c */ + 16, /* stxai_c */ + 16, /* stxbr_s */ + 16, /* stxbi_s */ + 16, /* stxar_s */ + 16, /* stxai_s */ + 16, /* stxbr_i */ + 16, /* stxbi_i */ + 16, /* stxar_i */ + 16, /* stxai_i */ + 16, /* stxbr_l */ + 16, /* stxbi_l */ + 16, /* stxar_l */ + 16, /* stxai_l */ + 16, /* stxbr_f */ + 16, /* stxbi_f */ + 16, /* stxar_f */ + 16, /* stxai_f */ + 16, /* stxbr_d */ + 16, /* stxbi_d */ + 16, /* stxar_d */ + 16, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index f689231e1..a711a8b4c 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -1043,6 +1043,26 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1344,6 +1364,24 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1366,6 +1404,18 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_brr(blt,); case_brw(blt,); case_brr(blt, _u); @@ -1897,6 +1947,10 @@ _emit_code(jit_state_t *_jit) #undef case_rrf #undef case_rrw #undef case_rrrr +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_loongarch-sz.c b/deps/lightning/lib/jit_loongarch-sz.c index 18e73aa20..6eaf39760 100644 --- a/deps/lightning/lib/jit_loongarch-sz.c +++ b/deps/lightning/lib/jit_loongarch-sz.c @@ -528,4 +528,64 @@ 20, /* hmuli */ 4, /* hmulr_u */ 20, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 8, /* ldxbr_ui */ + 8, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 8, /* ldxbr_l */ + 8, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 8, /* stxbr_l */ + 8, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_loongarch.c b/deps/lightning/lib/jit_loongarch.c index cd38c4e56..7b4981985 100644 --- a/deps/lightning/lib/jit_loongarch.c +++ b/deps/lightning/lib/jit_loongarch.c @@ -963,6 +963,26 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1203,6 +1223,24 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1225,6 +1263,18 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); @@ -1813,6 +1863,10 @@ _emit_code(jit_state_t *_jit) #undef case_brr #undef case_wrr #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c index 61db30e34..8ad97e91e 100644 --- a/deps/lightning/lib/jit_mips-fpu.c +++ b/deps/lightning/lib/jit_mips-fpu.c @@ -349,7 +349,7 @@ static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*); # define movi64(r0, i0) _movi64(_jit, r0, i0) static void _movi64(jit_state_t*,jit_int32_t,jit_int64_t); # define movi_d_w(r0, i0) _movi_d_w(_jit, r0, i0) -static void _movi_d_w(jit_state_t*,jit_int32_t,jit_int64_t); +static void _movi_d_w(jit_state_t*,jit_int32_t,jit_float64_t); # elif __WORDSIZE == 64 # define movi64(r0, i0) movi(r0, i0) # endif @@ -1152,7 +1152,7 @@ _movi64(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0) } static void -_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0) +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) { union { jit_int64_t l; diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index 156fc955c..cde9cb795 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -529,6 +529,66 @@ 16, /* hmuli */ 8, /* hmulr_u */ 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 12, /* ldxbr_d */ + 12, /* ldxbi_d */ + 12, /* ldxar_d */ + 12, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 12, /* stxbr_d */ + 12, /* stxbi_d */ + 12, /* stxar_d */ + 12, /* stxai_d */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -1061,4 +1121,64 @@ 28, /* hmuli */ 8, /* hmulr_u */ 28, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 8, /* ldxbr_ui */ + 8, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 8, /* ldxbr_l */ + 8, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 8, /* stxbr_l */ + 8, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index 1fec10920..6eb41a08f 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1424,6 +1424,26 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1645,6 +1665,26 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); +#if __WORDSIZE == 64 + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); +#endif + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1671,6 +1711,21 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); +#if __WORDSIZE == 64 + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); +#endif + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); #if __WORDSIZE == 64 @@ -2129,8 +2184,7 @@ _emit_code(jit_state_t *_jit) case jit_code_movi_w_d: movi_w_d(rn(node->u.w), node->v.w); break; -#endif -#if __WORDSIZE == 32 +#else case jit_code_movr_ww_d: movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); break; @@ -2320,6 +2374,10 @@ _emit_code(jit_state_t *_jit) #undef case_brr #undef case_wrr #undef case_rrf +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrw #undef case_rrr #undef case_wr diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index 88bc717ba..e72e5496e 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -291,4 +291,34 @@ static char *code_name[] = { "fnmsr_d", "fnmsi_d", "hmulr", "hmuli", "hmulr_u", "hmuli_u", + "ldxbr_c", "ldxbi_c", + "ldxar_c", "ldxai_c", + "ldxbr_uc", "ldxbi_uc", + "ldxar_uc", "ldxai_uc", + "ldxbr_s", "ldxbi_s", + "ldxar_s", "ldxai_s", + "ldxbr_us", "ldxbi_us", + "ldxar_us", "ldxai_us", + "ldxbr_i", "ldxbi_i", + "ldxar_i", "ldxai_i", + "ldxbr_ui", "ldxbi_ui", + "ldxar_ui", "ldxai_ui", + "ldxbr_l", "ldxbi_l", + "ldxar_l", "ldxai_l", + "ldxbr_f", "ldxbi_f", + "ldxar_f", "ldxai_f", + "ldxbr_d", "ldxbi_d", + "ldxar_d", "ldxai_d", + "stxbr_c", "stxbi_c", + "stxar_c", "stxai_c", + "stxbr_s", "stxbi_s", + "stxar_s", "stxai_s", + "stxbr_i", "stxbi_i", + "stxar_i", "stxai_i", + "stxbr_l", "stxbi_l", + "stxar_l", "stxai_l", + "stxbr_f", "stxbi_f", + "stxar_f", "stxai_f", + "stxbr_d", "stxbi_d", + "stxar_d", "stxai_d", }; diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index 8ea8e62cd..e1829c7bc 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -301,6 +301,8 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LWZX(d,a,b) FX(31,d,a,b,23) # define LD(d,a,s) FDs(58,d,a,s) # define LDX(d,a,b) FX(31,d,a,b,21) +# define LDU(d,a,s) FDs(58,d,a,s|1) +# define LDUX(d,a,b) FX(31,d,a,b,53) # define MCRF(d,s) FXL(19,((d)<<2),((s)<<2),0) # if DEBUG /* In case instruction is emulated, check the kernel can handle it. @@ -893,6 +895,52 @@ static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0) static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif +# define ldxbr_c(r0,r1,r2) _ldxbr_c(_jit,r0,r1,r2) +static void _ldxbr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_c(r0,r1,i0) _ldxbi_c(_jit,r0,r1,i0) +static void _ldxbi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_uc(r0,r1,r2) _ldxbr_uc(_jit,r0,r1,r2) +static void _ldxbr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_uc(r0,r1,i0) _ldxbi_uc(_jit,r0,r1,i0) +static void _ldxbi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_s(r0,r1,r2) _ldxbr_s(_jit,r0,r1,r2) +static void _ldxbr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_s(r0,r1,i0) _ldxbi_s(_jit,r0,r1,i0) +static void _ldxbi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_us(r0,r1,r2) _ldxbr_us(_jit,r0,r1,r2) +static void _ldxbr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_us(r0,r1,i0) _ldxbi_us(_jit,r0,r1,i0) +static void _ldxbi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_i(r0,r1,r2) _ldxbr_i(_jit,r0,r1,r2) +static void _ldxbr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_i(r0,r1,i0) _ldxbi_i(_jit,r0,r1,i0) +static void _ldxbi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldxbr_ui(r0,r1,r2) _ldxbr_ui(_jit,r0,r1,r2) +static void _ldxbr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_ui(r0,r1,i0) _ldxbi_ui(_jit,r0,r1,i0) +static void _ldxbi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_l(r0,r1,r2) _ldxbr_l(_jit,r0,r1,r2) +static void _ldxbr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_l(r0,r1,i0) _ldxbi_l(_jit,r0,r1,i0) +static void _ldxbi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# endif +# define ldxar_c(r0,r1,r2) generic_ldxar_c(r0,r1,r2) +# define ldxai_c(r0,r1,i0) generic_ldxai_c(r0,r1,i0) +# define ldxar_uc(r0,r1,r2) generic_ldxar_uc(r0,r1,r2) +# define ldxai_uc(r0,r1,i0) generic_ldxai_uc(r0,r1,i0) +# define ldxar_s(r0,r1,r2) generic_ldxar_s(r0,r1,r2) +# define ldxai_s(r0,r1,i0) generic_ldxai_s(r0,r1,i0) +# define ldxar_us(r0,r1,r2) generic_ldxar_us(r0,r1,r2) +# define ldxai_us(r0,r1,i0) generic_ldxai_us(r0,r1,i0) +# define ldxar_i(r0,r1,r2) generic_ldxar_i(r0,r1,r2) +# define ldxai_i(r0,r1,i0) generic_ldxai_i(r0,r1,i0) +# if __WORDSIZE == 64 +# define ldxar_ui(r0,r1,r2) generic_ldxar_ui(r0,r1,r2) +# define ldxai_ui(r0,r1,i0) generic_ldxai_ui(r0,r1,i0) +# define ldxar_l(r0,r1,r2) generic_ldxar_l(r0,r1,r2) +# define ldxai_l(r0,r1,i0) generic_ldxai_l(r0,r1,i0) +# endif # define str_c(r0,r1) STBX(r1, _R0_REGNO, r0) # define sti_c(i0,r0) _sti_c(_jit,i0,r0) static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); @@ -923,6 +971,34 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # endif +# define stxbr_c(r0,r1,r2) _stxbr_c(_jit,r0,r1,r2) +static void _stxbr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_c(i0,r0,r1) _stxbi_c(_jit,i0,r0,r1) +static void _stxbi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_s(r0,r1,r2) _stxbr_s(_jit,r0,r1,r2) +static void _stxbr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_s(i0,r0,r1) _stxbi_s(_jit,i0,r0,r1) +static void _stxbi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_i(r0,r1,r2) _stxbr_i(_jit,r0,r1,r2) +static void _stxbr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_i(i0,r0,r1) _stxbi_i(_jit,i0,r0,r1) +static void _stxbi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define stxbr_l(r0,r1,r2) _stxbr_l(_jit,r0,r1,r2) +static void _stxbr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_l(i0,r0,r1) _stxbi_l(_jit,i0,r0,r1) +static void _stxbi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# endif +# define stxar_c(r0,r1,r2) generic_stxar_c(r0,r1,r2) +# define stxai_c(r0,r1,i0) generic_stxai_c(r0,r1,i0) +# define stxar_s(r0,r1,r2) generic_stxar_s(r0,r1,r2) +# define stxai_s(r0,r1,i0) generic_stxai_s(r0,r1,i0) +# define stxar_i(r0,r1,r2) generic_stxar_i(r0,r1,r2) +# define stxai_i(r0,r1,i0) generic_stxai_i(r0,r1,i0) +# if __WORDSIZE == 64 +# define stxar_l(r0,r1,r2) generic_stxar_l(r0,r1,r2) +# define stxai_l(r0,r1,i0) generic_stxai_l(r0,r1,i0) +# endif # define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); # define jmpi(i0) _jmpi(_jit,i0) @@ -1301,17 +1377,21 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - POPCNTB(r0, r1); + if (jit_cpu.popcntb) { + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + POPCNTB(r0, r1); #if __WORDSIZE == 32 - movi(rn(reg), 0x01010101); + movi(rn(reg), 0x01010101); #else - movi(rn(reg), 0x0101010101010101); + movi(rn(reg), 0x0101010101010101); #endif - mullr(r0, r0, rn(reg)); - rshi_u(r0, r0, __WORDSIZE - 8); - jit_unget_reg(reg); + mullr(r0, r0, rn(reg)); + rshi_u(r0, r0, __WORDSIZE - 8); + jit_unget_reg(reg); + } + else + fallback_popcnt(r0, r1); } static void @@ -3340,6 +3420,182 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } # endif +static void +_ldxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + ldxbr_uc(r0, r1, r2); + extr_c(r0, r0); +} + +static void +_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxbi_uc(r0, r1, i0); + extr_c(r0, r0); +} + +static void +_ldxbr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LBZUX(r0, r1, r2); +} + +static void +_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LBZU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_uc(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LHAUX(r0, r1, r2); +} + +static void +_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LHAU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_s(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxbr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LHZUX(r0, r1, r2); +} + +static void +_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LHZU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_us(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); +# if __WORDSIZE == 32 + LWZUX(r0, r1, r2); +# else + ldxbr_ui(r0, r1, r2); + extr_i(r0, r0); +# endif +} + +static void +_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); +# if __WORDSIZE == 32 + LWZU(r0, r1, i0); +# else + ldxbi_ui(r0, r1, i0); + extr_i(r0, r0); +# endif + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_i(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +# if __WORDSIZE == 64 +static void +_ldxbr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LWZUX(r0, r1, r2); +} + +static void +_ldxbi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LWZU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_ui(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxbr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LDUX(r0, r1, r2); +} + +static void +_ldxbi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LDU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_l(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -3614,6 +3870,100 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } # endif +static void +_stxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STBUX(r2, r1, r0); +} + +static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STBU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_c(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STHUX(r2, r1, r0); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STHU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_s(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STWUX(r2, r1, r0); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STWU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_i(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +# if __WORDSIZE == 64 +static void +_stxbr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STDUX(r2, r1, r0); +} + +static void +_stxbi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STDU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_l(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _jmpr(jit_state_t *_jit, jit_int32_t r0) { diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index 605bd4fa8..8457b69bb 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -401,6 +401,18 @@ static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_f(r0,r1,r2) _ldxbr_f(_jit,r0,r1,r2) +static void _ldxbr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_f(r0,r1,i0) _ldxbi_f(_jit,r0,r1,i0) +static void _ldxbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxbr_d(r0,r1,r2) _ldxbr_d(_jit,r0,r1,r2) +static void _ldxbr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxbi_d(r0,r1,i0) _ldxbi_d(_jit,r0,r1,i0) +static void _ldxbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxar_f(r0,r1,r2) generic_ldxar_f(r0,r1,r2) +# define ldxai_f(r0,r1,i0) generic_ldxai_f(r0,r1,i0) +# define ldxar_d(r0,r1,r2) generic_ldxar_d(r0,r1,r2) +# define ldxai_d(r0,r1,i0) generic_ldxai_d(r0,r1,i0) # define str_f(r0,r1) STFSX(r1, _R0_REGNO, r0) # define sti_f(i0,r0) _sti_f(_jit,i0,r0) static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); @@ -422,6 +434,18 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_f(r0,r1,r2) _stxbr_f(_jit,r0,r1,r2) +static void _stxbr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_f(i0,r0,r1) _stxbi_f(_jit,i0,r0,r1) +static void _stxbi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxbr_d(r0,r1,r2) _stxbr_d(_jit,r0,r1,r2) +static void _stxbr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxbi_d(i0,r0,r1) _stxbi_d(_jit,i0,r0,r1) +static void _stxbi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxar_f(r0,r1,r2) generic_stxar_f(r0,r1,r2) +# define stxai_f(i0,r0,r1) generic_stxai_f(i0,r0,r1) +# define stxar_d(r0,r1,r2) generic_stxar_d(r0,r1,r2) +# define stxai_d(i0,r0,r1) generic_stxai_d(i0,r0,r1) #endif #if CODE @@ -1156,6 +1180,56 @@ _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_ldxbr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LFSUX(r0, r1, r2); +} + +static void +_ldxbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LFSU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_f(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxbr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LFDUX(r0, r1, r2); +} + +static void +_ldxbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + assert(r0 != r1); + LFDU(r0, r1, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxbr_d(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -1291,4 +1365,50 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } } + +static void +_stxbr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STFSUX(r2, r1, r0); +} + +static void +_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STFSU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_f(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_stxbr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r1 != _R0_REGNO); + STFDUX(r2, r1, r0); +} + +static void +_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_short_p(i0)) { + assert(r1 != _R0_REGNO); + STFDU(r1, r0, i0); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxbr_d(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} #endif diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 136f1d400..c275d28ec 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -531,6 +531,66 @@ 12, /* hmuli */ 4, /* hmulr_u */ 12, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 4, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 4, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 4, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 4, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 4, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 4, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 4, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 4, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 4, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 4, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 4, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* !_CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -1069,6 +1129,66 @@ 12, /* hmuli */ 4, /* hmulr_u */ 12, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 4, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 4, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 4, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 4, /* ldxbr_i */ + 4, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 4, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 4, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 4, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 4, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 4, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 4, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 4, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* _CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -1606,6 +1726,66 @@ 24, /* hmuli */ 4, /* hmulr_u */ 24, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 4, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 4, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 4, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 4, /* ldxbr_ui */ + 4, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 4, /* ldxbr_l */ + 4, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 4, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 4, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 4, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 4, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 4, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 4, /* stxbr_l */ + 4, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 4, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 4, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -2142,6 +2322,66 @@ 24, /* hmuli */ 4, /* hmulr_u */ 24, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 12, /* ldxar_c */ + 12, /* ldxai_c */ + 4, /* ldxbr_uc */ + 4, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 4, /* ldxbr_s */ + 4, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 4, /* ldxbr_us */ + 4, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 4, /* ldxbr_ui */ + 4, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 4, /* ldxbr_l */ + 4, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 4, /* ldxbr_f */ + 4, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 4, /* ldxbr_d */ + 4, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 4, /* stxbr_c */ + 4, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 4, /* stxbr_s */ + 4, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 4, /* stxbr_i */ + 4, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 4, /* stxbr_l */ + 4, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 4, /* stxbr_f */ + 4, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 4, /* stxbr_d */ + 4, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index 9f98176e9..3d506baf7 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -16,6 +16,12 @@ * Authors: * Paulo Cesar Pereira de Andrade */ +#define CHECK_POPCNTB 0 + +#if CHECK_POPCNTB +#include +#include +#endif #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #if !_CALL_SYSV @@ -112,6 +118,7 @@ extern void __clear_cache(void *, void *); /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { { rc(sav) | 0, "r0" }, { rc(sav) | 11, "r11" }, /* env */ @@ -187,6 +194,9 @@ jit_register_t _rvs[] = { { rc(arg) | rc(fpr) | 1, "f1" }, { _NOREG, "" }, }; +#if CHECK_POPCNTB +static sigjmp_buf jit_env; +#endif static jit_int32_t iregs[] = { _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, @@ -200,9 +210,57 @@ static jit_int32_t fregs[] = { /* * Implementation */ +#if CHECK_POPCNTB +static void +sigill_handler(int signum) +{ + jit_cpu.popcntb = 0; + siglongjmp(jit_env, 1); +} +#endif + void jit_get_cpu(void) { +#if CHECK_POPCNTB + long r12; + struct sigaction new_action, old_action; + new_action.sa_handler = sigill_handler; + sigemptyset(&new_action.sa_mask); + new_action.sa_flags = 0; + sigaction(SIGILL, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) { + sigaction(SIGILL, &new_action, NULL); + if (!sigsetjmp(jit_env, 1)) { + jit_cpu.popcntb = 1; + /* popcntb %r12, %r12 */ + __asm__ volatile("mr %%r12, %0;" + "popcntb %%r12, %%r12;" + "mr %0, %%r12;" + : "=r" (r12), "=r" (r12)); + sigaction(SIGILL, &old_action, NULL); + } + } +#elif defined(__linux__) + FILE *fp; + char *ptr; + long vers; + char buf[128]; + + if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) { + while (fgets(buf, sizeof(buf), fp)) { + if (strncmp(buf, "cpu\t\t: POWER", 12) == 0) { + vers = strtol(buf + 12, &ptr, 10); + jit_cpu.popcntb = vers > 5; + break; + } + } + fclose(fp); + } +#else + /* By default, assume it is not available */ + jit_cpu.popcntb = 0; +#endif } void @@ -1262,6 +1320,24 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1604,6 +1680,26 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); +#if __WORDSIZE == 64 + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); +#endif + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rrr(stx, _c); @@ -1630,6 +1726,20 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); +#if __WORDSIZE == 64 + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); +#endif + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(mov, _f); case jit_code_movi_f: assert(node->flag & jit_flag_data); @@ -2167,6 +2277,10 @@ _emit_code(jit_state_t *_jit) #undef case_wrr #undef case_rrf #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index c08e5bd58..5aa243ec2 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -528,4 +528,64 @@ 16, /* hmuli */ 4, /* hmulr_u */ 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 8, /* ldxbr_ui */ + 8, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 8, /* ldxbr_l */ + 8, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 8, /* stxbr_l */ + 8, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 27b0c5ada..c787efb9e 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1015,6 +1015,26 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1263,6 +1283,24 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1285,6 +1323,18 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); @@ -1877,6 +1927,10 @@ _emit_code(jit_state_t *_jit) #undef case_brr #undef case_wrr #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index b8a87e856..498a49a7e 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -1,5 +1,5 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 630 +#define JIT_INSTR_MAX 200 0, /* data */ 0, /* live */ 4, /* align */ @@ -494,7 +494,7 @@ 12, /* qlshi */ 66, /* qlshr_u */ 12, /* qlshi_u */ - 70, /* qrshr */ + 68, /* qrshr */ 12, /* qrshi */ 66, /* qrshr_u */ 12, /* qrshi_u */ @@ -502,12 +502,12 @@ 38, /* unldi */ 86, /* unldr_u */ 38, /* unldi_u */ - 238, /* unstr */ - 100, /* unsti */ + 84, /* unstr */ + 42, /* unsti */ 200, /* unldr_x */ 86, /* unldi_x */ - 630, /* unstr_x */ - 294, /* unsti_x */ + 194, /* unstr_x */ + 102, /* unsti_x */ 8, /* fmar_f */ 0, /* fmai_f */ 8, /* fmsr_f */ @@ -524,14 +524,74 @@ 0, /* fnmai_d */ 10, /* fnmsr_d */ 0, /* fnmsi_d */ - 34, /* hmulr */ - 42, /* hmuli */ + 34, /* hmulr */ + 42, /* hmuli */ 8, /* hmulr_u */ - 16, /* hmuli_u */ + 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 10, /* ldxbi_c */ + 8, /* ldxar_c */ + 10, /* ldxai_c */ + 8, /* ldxbr_uc */ + 10, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 10, /* ldxai_uc */ + 6, /* ldxbr_s */ + 8, /* ldxbi_s */ + 6, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 10, /* ldxbi_us */ + 8, /* ldxar_us */ + 10, /* ldxai_us */ + 8, /* ldxbr_i */ + 10, /* ldxbi_i */ + 8, /* ldxar_i */ + 10, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 6, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 6, /* ldxbr_d */ + 8, /* ldxbi_d */ + 6, /* ldxar_d */ + 8, /* ldxai_d */ + 6, /* stxbr_c */ + 8, /* stxbi_c */ + 6, /* stxar_c */ + 8, /* stxai_c */ + 6, /* stxbr_s */ + 8, /* stxbi_s */ + 6, /* stxar_s */ + 8, /* stxai_s */ + 6, /* stxbr_i */ + 8, /* stxbi_i */ + 6, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 6, /* stxbr_f */ + 8, /* stxbi_f */ + 6, /* stxar_f */ + 8, /* stxai_f */ + 6, /* stxbr_d */ + 8, /* stxbi_d */ + 6, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 364 +#define JIT_INSTR_MAX 264 0, /* data */ 0, /* live */ 20, /* align */ @@ -1026,7 +1086,7 @@ 12, /* qlshi */ 74, /* qlshr_u */ 12, /* qlshi_u */ - 78, /* qrshr */ + 76, /* qrshr */ 12, /* qrshi */ 74, /* qrshr_u */ 12, /* qrshi_u */ @@ -1034,12 +1094,12 @@ 58, /* unldi */ 122, /* unldr_u */ 58, /* unldi_u */ - 296, /* unstr */ - 150, /* unsti */ + 252, /* unstr */ + 82, /* unsti */ 130, /* unldr_x */ 70, /* unldi_x */ - 364, /* unstr_x */ - 194, /* unsti_x */ + 264, /* unstr_x */ + 94, /* unsti_x */ 8, /* fmar_f */ 0, /* fmai_f */ 8, /* fmsr_f */ @@ -1056,8 +1116,68 @@ 0, /* fnmai_d */ 10, /* fnmsr_d */ 0, /* fnmsi_d */ - 44, /* hmulr */ - 60, /* hmuli */ - 12, /* hmulr_u */ - 28, /* hmuli_u */ + 44, /* hmulr */ + 60, /* hmuli */ + 12, /* hmulr_u */ + 28, /* hmuli_u */ + 10, /* ldxbr_c */ + 10, /* ldxbi_c */ + 10, /* ldxar_c */ + 10, /* ldxai_c */ + 10, /* ldxbr_uc */ + 10, /* ldxbi_uc */ + 10, /* ldxar_uc */ + 10, /* ldxai_uc */ + 10, /* ldxbr_s */ + 10, /* ldxbi_s */ + 10, /* ldxar_s */ + 10, /* ldxai_s */ + 10, /* ldxbr_us */ + 10, /* ldxbi_us */ + 10, /* ldxar_us */ + 10, /* ldxai_us */ + 10, /* ldxbr_i */ + 10, /* ldxbi_i */ + 10, /* ldxar_i */ + 10, /* ldxai_i */ + 10, /* ldxbr_ui */ + 10, /* ldxbi_ui */ + 10, /* ldxar_ui */ + 10, /* ldxai_ui */ + 10, /* ldxbr_l */ + 10, /* ldxbi_l */ + 10, /* ldxar_l */ + 10, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 10, /* stxbr_l */ + 10, /* stxbi_l */ + 10, /* stxar_l */ + 10, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index 851d0d0b5..9a8373f08 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -973,6 +973,26 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1231,6 +1251,26 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); +#if __WORDSIZE == 64 + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); +#endif + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1259,6 +1299,20 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); +#if __WORDSIZE == 64 + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); +#endif + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); #if __WORDSIZE == 64 @@ -1864,6 +1918,10 @@ _emit_code(jit_state_t *_jit) #undef case_brr #undef case_wrr #undef case_rrw +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrr #undef case_wr #undef case_rw diff --git a/deps/lightning/lib/jit_sh-cpu.c b/deps/lightning/lib/jit_sh-cpu.c new file mode 100644 index 000000000..cfb8b2665 --- /dev/null +++ b/deps/lightning/lib/jit_sh-cpu.c @@ -0,0 +1,3209 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#if PROTO + +# ifdef __SH4_SINGLE__ +# define SH_DEFAULT_FPU_MODE 0 +# else +# define SH_DEFAULT_FPU_MODE 1 +# endif + +# ifndef SH_HAS_FPU +# ifdef __SH_FPU_ANY__ +# define SH_HAS_FPU 1 +# else +# define SH_HAS_FPU 0 +# endif +# endif + +# ifdef __SH4_SINGLE_ONLY__ +# define SH_SINGLE_ONLY 1 +# else +# define SH_SINGLE_ONLY 0 +# endif + + +struct jit_instr_ni { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t i :8; + jit_uint16_t n :4; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t n :4; + jit_uint16_t i :8; +#endif +}; + +struct jit_instr_nmd { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :4; + jit_uint16_t m :4; + jit_uint16_t n :4; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t n :4; + jit_uint16_t m :4; + jit_uint16_t d :4; +#endif +}; + +struct jit_instr_md { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :4; + jit_uint16_t m :4; + jit_uint16_t c :8; +#else + jit_uint16_t c :8; + jit_uint16_t m :4; + jit_uint16_t d :4; +#endif +}; + +struct jit_instr_d { +#if __BYTE_ORDER == __LITTLE_ENDIAN + jit_uint16_t d :12; + jit_uint16_t c :4; +#else + jit_uint16_t c :4; + jit_uint16_t d :12; +#endif +}; + +typedef union { + struct jit_instr_ni ni; + struct jit_instr_nmd nmd; + struct jit_instr_md md; + struct jit_instr_d d; + jit_uint16_t op; +} jit_instr_t; + +static void _cni(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void +_cnmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void _cmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +static void _cd(jit_state_t*,jit_uint16_t,jit_uint16_t); + +# define STRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x4) +# define STRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x5) +# define STRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x6) +# define MULL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0x7) +# define LDRB(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xc) +# define LDRW(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xd) +# define LDRL(rn, rm) _cnmd(_jit, 0x0, rn, rm, 0xe) +# define BSRF(rn) _cni(_jit, 0x0, rn, 0x03) +# define STCGBR(rn) _cni(_jit, 0x0, rn, 0x12) +# define STSH(rn) _cni(_jit, 0x0, rn, 0x0a) +# define STSL(rn) _cni(_jit, 0x0, rn, 0x1a) +# define BRAF(rn) _cni(_jit, 0x0, rn, 0x23) +# define MOVT(rn) _cni(_jit, 0x0, rn, 0x29) + +# define STSPR(rn) _cni(_jit, 0x0, rn, 0x2a) +# define STSUL(rn) _cni(_jit, 0x0, rn, 0x5a) +# define STSFP(rn) _cni(_jit, 0x0, rn, 0x6a) + +# define STDL(rn, rm, imm) _cnmd(_jit, 0x1, rn, rm, imm) + +# define STB(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x0) +# define STW(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x1) +# define STL(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x2) +# define STBU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x4) +# define STWU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x5) +# define STLU(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x6) +# define DIV0S(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x7) +# define TST(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x8) +# define AND(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0x9) +# define XOR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xa) +# define OR(rn, rm) _cnmd(_jit, 0x2, rn, rm, 0xb) + +# define CMPEQ(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x0) +# define CMPHS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x2) +# define CMPGE(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x3) +# define DIV1(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x4) +# define DMULU(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x5) +# define CMPHI(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x6) +# define CMPGT(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x7) +# define SUB(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0x8) +# define SUBC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xa) +# define SUBV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xb) +# define ADD(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xc) +# define ADDC(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xe) +# define ADDV(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xf) +# define DMULS(rn, rm) _cnmd(_jit, 0x3, rn, rm, 0xd) + +# define SHLL(rn) _cni(_jit, 0x4, rn, 0x00) +# define SHLR(rn) _cni(_jit, 0x4, rn, 0x01) +# define ROTL(rn) _cni(_jit, 0x4, rn, 0x04) +# define ROTR(rn) _cni(_jit, 0x4, rn, 0x05) +# define SHLL2(rn) _cni(_jit, 0x4, rn, 0x08) +# define SHLR2(rn) _cni(_jit, 0x4, rn, 0x09) +# define JSR(rn) _cni(_jit, 0x4, rn, 0x0b) +# define DT(rn) _cni(_jit, 0x4, rn, 0x10) +# define CMPPZ(rn) _cni(_jit, 0x4, rn, 0x11) +# define CMPPL(rn) _cni(_jit, 0x4, rn, 0x15) +# define SHLL8(rn) _cni(_jit, 0x4, rn, 0x18) +# define SHLR8(rn) _cni(_jit, 0x4, rn, 0x19) +# define TAS(rn) _cni(_jit, 0x4, rn, 0x1b) +# define LDCGBR(rm) _cni(_jit, 0x4, rm, 0x1e) +# define SHAL(rn) _cni(_jit, 0x4, rn, 0x20) +# define SHAR(rn) _cni(_jit, 0x4, rn, 0x21) +# define ROTCL(rn) _cni(_jit, 0x4, rn, 0x24) +# define ROTCR(rn) _cni(_jit, 0x4, rn, 0x25) +# define SHLL16(rn) _cni(_jit, 0x4, rn, 0x28) +# define SHLR16(rn) _cni(_jit, 0x4, rn, 0x29) +# define LDSPR(rn) _cni(_jit, 0x4, rn, 0x2a) +# define JMP(rn) _cni(_jit, 0x4, rn, 0x2b) +# define LDS(rn) _cni(_jit, 0x4, rn, 0x5a) +# define LDSFP(rn) _cni(_jit, 0x4, rn, 0x6a) +# define SHAD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xc) +# define SHLD(rn, rm) _cnmd(_jit, 0x4, rn, rm, 0xd) + +# define LDDL(rn, rm, imm) _cnmd(_jit, 0x5, rn, rm, imm) + +# define LDB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x0) +# define LDW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x1) +# define LDL(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x2) +# define MOV(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x3) +# define LDBU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x4) +# define LDWU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x5) +# define LDLU(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x6) +# define NOT(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x7) +# define SWAPB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x8) +# define SWAPW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0x9) +# define NEGC(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xa) +# define NEG(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xb) +# define EXTUB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xc) +# define EXTUW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xd) +# define EXTSB(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xe) +# define EXTSW(rn, rm) _cnmd(_jit, 0x6, rn, rm, 0xf) + +# define ADDI(rn, imm) _cni(_jit, 0x7, rn, imm) + +# define LDDB(rm, imm) _cnmd(_jit, 0x8, 0x4, rm, imm) +# define LDDW(rm, imm) _cnmd(_jit, 0x8, 0x5, rm, imm) +# define CMPEQI(imm) _cni(_jit, 0x8, 0x8, imm) +# define BT(imm) _cni(_jit, 0x8, 0x9, imm) +# define BF(imm) _cni(_jit, 0x8, 0xb, imm) +# define BTS(imm) _cni(_jit, 0x8, 0xd, imm) +# define BFS(imm) _cni(_jit, 0x8, 0xf, imm) + +# define LDPW(rn, imm) _cni(_jit, 0x9, rn, imm) + +# define BRA(imm) _cd(_jit, 0xa, imm) + +# define BSR(imm) _cd(_jit, 0xb, imm) + +# define GBRSTB(imm) _cni(_jit, 0xc, 0x0, imm) +# define GBRSTW(imm) _cni(_jit, 0xc, 0x1, imm) +# define GBRSTL(imm) _cni(_jit, 0xc, 0x2, imm) +# define GBRLDB(imm) _cni(_jit, 0xc, 0x4, imm) +# define GBRLDW(imm) _cni(_jit, 0xc, 0x5, imm) +# define GBRLDL(imm) _cni(_jit, 0xc, 0x6, imm) +# define MOVA(imm) _cni(_jit, 0xc, 0x7, imm) +# define TSTI(imm) _cni(_jit, 0xc, 0x8, imm) +# define ANDI(imm) _cni(_jit, 0xc, 0x9, imm) +# define XORI(imm) _cni(_jit, 0xc, 0xa, imm) +# define ORI(imm) _cni(_jit, 0xc, 0xb, imm) + +# define LDPL(rn, imm) _cni(_jit, 0xd, rn, imm) + +# define MOVI(rn, imm) _cni(_jit, 0xe, rn, imm) + +# define FADD(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x0) +# define FSUB(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x1) +# define FMUL(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x2) +# define FDIV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x3) +# define FCMPEQ(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x4) +# define FCMPGT(rn,rm) _cnmd(_jit, 0xf, rn, rm, 0x5) +# define LDXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x6) +# define STXF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x7) +# define LDF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x8) +# define LDFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0x9) +# define STF(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xa) +# define STFS(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xb) +# define FMOV(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xc) +# define FMAC(rn, rm) _cnmd(_jit, 0xf, rn, rm, 0xe) +# define FSTS(rn) _cni(_jit, 0xf, rn, 0x0d) +# define FLDS(rn) _cni(_jit, 0xf, rn, 0x1d) +# define FLOAT(rn) _cni(_jit, 0xf, rn, 0x2d) +# define FTRC(rn) _cni(_jit, 0xf, rn, 0x3d) +# define FNEG(rn) _cni(_jit, 0xf, rn, 0x4d) +# define FABS(rn) _cni(_jit, 0xf, rn, 0x5d) +# define FSQRT(rn) _cni(_jit, 0xf, rn, 0x6d) +# define FLDI0(rn) _cni(_jit, 0xf, rn, 0x8d) +# define FLDI1(rn) _cni(_jit, 0xf, rn, 0x9d) +# define FCNVSD(rn) _cni(_jit, 0xf, rn, 0xad) +# define FCNVDS(rn) _cni(_jit, 0xf, rn, 0xbd) + +# define FMOVXX(rn, rm) FMOV((rn) | 1, (rm) | 1) +# define FMOVDX(rn, rm) FMOV((rn) | 0, (rm) | 1) +# define FMOVXD(rn, rm) FMOV((rn) | 1, (rm) | 0) + +# define CLRT() ii(0x8) +# define NOP() ii(0x9) +# define RTS() ii(0xb) +# define SETT() ii(0x18) +# define DIV0U() ii(0x19) +# define FSCHG() ii(0xf3fd) +# define FRCHG() ii(0xfbfd) + +# define ii(i) *_jit->pc.us++ = i + +# define stack_framesize ((JIT_V_NUM + 2) * 4) + +# define PR_FLAG (1 << 19) +# define SZ_FLAG (1 << 20) +# define FR_FLAG (1 << 21) + +static void _nop(jit_state_t*,jit_word_t); +# define nop(i0) _nop(_jit,i0) +static void _movr(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr(r0,r1) _movr(_jit,r0,r1) +static void _movi(jit_state_t*,jit_uint16_t,jit_word_t); +# define movi(r0,i0) _movi(_jit,r0,i0) +static void _movnr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2,1) +# define movzr(r0,r1,r2) _movnr(_jit,r0,r1,r2,0) +# define casx(r0,r1,r2,r3,i0) _casx(_jit,r0,r1,r2,r3,i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define casr(r0,r1,r2,r3) casx(r0,r1,r2,r3,0) +# define casi(r0,i0,r1,r2) casx(r0,_NOREG,r1,r2,i0) +static void _addr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) +static void _addcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2) +static void _addxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define addxr(r0,r1,r2) _addxr(_jit,r0,r1,r2) +static void _addi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) +static void _addci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0) +static void _addxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0) +static void _subr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr(r0,r1,r2) _subr(_jit,r0,r1,r2) +static void _subcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subcr(r0,r1,r2) _subcr(_jit,r0,r1,r2) +static void _subxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subxr(r0,r1,r2) _subxr(_jit,r0,r1,r2) +static void _subi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0) +static void _subci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0) +static void _subxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) +static void _rsbi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rsbi(r0,r1,i0) _rsbi(_jit,r0,r1,i0) +static void _mulr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) +static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmulr(r0,r1,r2) _hmulr(_jit,r0,r1,r2) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define hmulr_u(r0,r1,r2) _hmulr_u(_jit,r0,r1,r2) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _qmulr(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _muli(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) +static void _qmuli(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _divr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) +static void _divr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_u(r0,r1,r2) _divr_u(_jit,r0,r1,r2) +static void _qdivr(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qdivr(r0,r1,r2,r3) _qdivr(_jit,r0,r1,r2,r3) +static void _qdivr_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define qdivr_u(r0,r1,r2,r3) _qdivr_u(_jit,r0,r1,r2,r3) +static void _divi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) +# define divi_u(r0,r1,i0) fallback_divi_u(r0,r1,i0) +static void _qdivi(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0) +static void _qdivi_u(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_word_t); +# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0) +static void _remr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) +static void _remr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2) +static void _remi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) +static void _remi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_uint16_t,jit_uint16_t); +#define extr(r0,r1,i0,i1) fallback_ext(r0,r1,i0,i1) +#define extr_u(r0,r1,i0,i1) fallback_ext_u(r0,r1,i0,i1) +#define depr(r0,r1,i0,i1) fallback_dep(r0,r1,i0,i1) +# define extr_c(r0, r1) EXTSB(r0,r1) +# define extr_s(r0,r1) EXTSW(r0,r1) +# define extr_uc(r0,r1) EXTUB(r0,r1) +# define extr_us(r0,r1) EXTUW(r0,r1) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +# define lroti(r0,r1,i0) rroti(r0,r1,__WORDSIZE-i0) +static void _andr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2) +static void _andi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) +static void _orr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define orr(r0,r1,r2) _orr(_jit,r0,r1,r2) +static void _ori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0) +static void _xorr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define xorr(r0,r1,r2) _xorr(_jit,r0,r1,r2) +static void _xori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) +# define comr(r0,r1) NOT(r0,r1) +# define negr(r0,r1) NEG(r0,r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0,r1) _clor(_jit,r0,r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0,r1) _clzr(_jit,r0,r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0,r1) _ctor(_jit,r0,r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0,r1) _ctzr(_jit,r0,r1) +static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t); +# define rbitr(r0, r1) _rbitr(_jit, r0, r1) +static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t); +# define popcntr(r0, r1) _popcntr(_jit, r0, r1) +static void _gtr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define gtr(r0,r1,r2) _gtr(_jit,r0,r1,r2) +static void _ger(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ger(r0,r1,r2) _ger(_jit,r0,r1,r2) +static void _gtr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define gtr_u(r0,r1,r2) _gtr_u(_jit,r0,r1,r2) +static void _ger_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ger_u(r0,r1,r2) _ger_u(_jit,r0,r1,r2) +# define ltr(r0,r1,r2) gtr(r0,r2,r1) +# define ltr_u(r0,r1,r2) gtr_u(r0,r2,r1) +# define ler(r0,r1,r2) ger(r0,r2,r1) +# define ler_u(r0,r1,r2) ger_u(r0,r2,r1) +static void _eqr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define eqr(r0,r1,r2) _eqr(_jit,r0,r1,r2) +static void _ner(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ner(r0,r1,r2) _ner(_jit,r0,r1,r2) +static void _eqi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define eqi(r0,r1,i0) _eqi(_jit,r0,r1,i0) +static void _nei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) +static void _gti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gti(r0,r1,i0) _gti(_jit,r0,r1,i0) +static void _gei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gei(r0,r1,i0) _gei(_jit,r0,r1,i0) +static void _gti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gti_u(r0,r1,i0) _gti_u(_jit,r0,r1,i0) +static void _gei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define gei_u(r0,r1,i0) _gei_u(_jit,r0,r1,i0) +static void _lti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) +static void _lei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lei(r0,r1,i0) _lei(_jit,r0,r1,i0) +static void _lti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lti_u(r0,r1,i0) _lti_u(_jit,r0,r1,i0) +static void _lei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lei_u(r0,r1,i0) _lei_u(_jit,r0,r1,i0) +static void _lshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2) +static void _rshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2) +static void _rshr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) +static void _lshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +static void _rshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +# define qlshr(r0,r1,r2,r3) _qlshr(_jit,r0,r1,r2,r3) +static void +_qlshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshr_u(r0, r1, r2, r3) _qlshr_u(_jit,r0,r1,r2,r3) +static void +_qlshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0) +# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0) +# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0) +static void +_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define qrshr(r0, r1, r2, r3) _qrshr(_jit,r0,r1,r2,r3) +static void +_qrshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshr_u(r0, r1, r2, r3) _qrshr_u(_jit,r0,r1,r2,r3) +static void +_qrshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0) +# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0) +# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0) +static void +_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define ldr_c(r0,r1) LDB(r0,r1) +# define ldr_s(r0,r1) LDW(r0,r1) +# define ldr_i(r0,r1) LDL(r0,r1) +static void _ldr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1) +static void _ldr_us(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1) +static void _ldi_c(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) +static void _ldi_s(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0) +static void _ldi_i(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0) +static void _ldi_uc(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0) +static void _ldi_us(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0) +static void _ldxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2) +static void _ldxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_s(r0,r1,r2) _ldxr_s(_jit,r0,r1,r2) +static void _ldxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_i(r0,r1,r2) _ldxr_i(_jit,r0,r1,r2) +static void _ldxr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2) +static void _ldxr_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2) +static void _ldxi_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0) +static void _ldxi_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0) +static void _ldxi_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) +static void _ldxi_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0) +static void _ldxi_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0) +# define ldxbi_c(r0,r1,i0) generic_ldxbi_c(r0,r1,i0) +# define ldxbi_uc(r0,r1,i0) generic_ldxbi_uc(r0,r1,i0) +# define ldxbi_s(r0,r1,i0) generic_ldxbi_s(r0,r1,i0) +# define ldxbi_us(r0,r1,i0) generic_ldxbi_us(r0,r1,i0) +# define ldxbi_i(r0,r1,i0) generic_ldxbi_i(r0,r1,i0) +static void _ldxai_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_c(r0,r1,i0) _ldxai_c(_jit,r0,r1,i0) +static void _ldxai_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_uc(r0,r1,i0) _ldxai_uc(_jit,r0,r1,i0) +static void _ldxai_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_s(r0,r1,i0) _ldxai_s(_jit,r0,r1,i0) +static void _ldxai_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_us(r0,r1,i0) _ldxai_us(_jit,r0,r1,i0) +static void _ldxai_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxai_i(r0,r1,i0) _ldxai_i(_jit,r0,r1,i0) +# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0) +# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1) +# define unldr_u(r0, r1, i0) fallback_unldr_u(r0, r1, i0) +# define unldi_u(r0, i0, i1) fallback_unldi_u(r0, i0, i1) +# define str_c(r0,r1) STB(r0,r1) +# define str_s(r0,r1) STW(r0,r1) +# define str_i(r0,r1) STL(r0,r1) +static void _sti_c(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_c(i0,r0) _sti_c(_jit,i0,r0) +static void _sti_s(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_s(i0,r0) _sti_s(_jit,i0,r0) +static void _sti_i(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_i(i0,r0) _sti_i(_jit,i0,r0) +static void _stxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_c(r0,r1,r2) _stxr_c(_jit,r0,r1,r2) +static void _stxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_s(r0,r1,r2) _stxr_s(_jit,r0,r1,r2) +static void _stxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_i(r0,r1,r2) _stxr_i(_jit,r0,r1,r2) +static void _stxi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1) +static void _stxi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1) +static void _stxi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1) +static void _stxbi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_c(i0,r0,r1) _stxbi_c(_jit,i0,r0,r1) +static void _stxbi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_s(i0,r0,r1) _stxbi_s(_jit,i0,r0,r1) +static void _stxbi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxbi_i(i0,r0,r1) _stxbi_i(_jit,i0,r0,r1) +# define stxai_c(i0,r0,r1) generic_stxai_c(i0,r0,r1) +# define stxai_s(i0,r0,r1) generic_stxai_s(i0,r0,r1) +# define stxai_i(i0,r0,r1) generic_stxai_i(i0,r0,r1) +# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0) +# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1) +static jit_word_t _bger(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bltr(i0,r0,r1) bltr_p(i0,r0,r1,0) +# define bler(i0,r0,r1) bler_p(i0,r0,r1,0) +# define bgtr(i0,r0,r1) bgtr_p(i0,r0,r1,0) +# define bger(i0,r0,r1) bger_p(i0,r0,r1,0) +# define bltr_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,0,p) +# define bler_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,1,p) +# define bgtr_p(i0,r0,r1,p) _bger(_jit,i0,r1,r0,0,p) +# define bger_p(i0,r0,r1,p) _bger(_jit,i0,r0,r1,1,p) +static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bltr_u(i0,r0,r1) bltr_u_p(i0,r0,r1,0) +# define bler_u(i0,r0,r1) bler_u_p(i0,r0,r1,0) +# define bgtr_u(i0,r0,r1) bgtr_u_p(i0,r0,r1,0) +# define bger_u(i0,r0,r1) bger_u_p(i0,r0,r1,0) +# define bltr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,0,p) +# define bler_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,1,p) +# define bgtr_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r1,r0,0,p) +# define bger_u_p(i0,r0,r1,p) _bger_u(_jit,i0,r0,r1,1,p) +static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define beqr(i0,r0,r1) beqr_p(i0,r0,r1,0) +# define beqr_p(i0,r0,r1,p) _beqr(_jit,i0,r0,r1,p) +static jit_word_t _bner(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bner(i0,r0,r1) bner_p(i0,r0,r1,0) +# define bner_p(i0,r0,r1,p) _bner(_jit,i0,r0,r1,p) +static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bmsr(i0,r0,r1) bmsr_p(i0,r0,r1,0) +# define bmsr_p(i0,r0,r1,p) _bmsr(_jit,i0,r0,r1,p) +static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t); +# define bmcr(i0,r0,r1) bmcr_p(i0,r0,r1,0) +# define bmcr_p(i0,r0,r1,p) _bmcr(_jit,i0,r0,r1,p) +static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define boaddr(i0,r0,r1) boaddr_p(i0,r0,r1,0) +# define bxaddr(i0,r0,r1) bxaddr_p(i0,r0,r1,0) +# define boaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,1,p) +# define bxaddr_p(i0,r0,r1,p) _boaddr(_jit,i0,r0,r1,0,p) +static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define boaddr_u(i0,r0,r1) boaddr_u_p(i0,r0,r1,0) +# define bxaddr_u(i0,r0,r1) bxaddr_u_p(i0,r0,r1,0) +# define boaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,1,p) +# define bxaddr_u_p(i0,r0,r1,p) _boaddr_u(_jit,i0,r0,r1,0,p) +static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bosubr(i0,r0,r1) bosubr_p(i0,r0,r1,0) +# define bxsubr(i0,r0,r1) bxsubr_p(i0,r0,r1,0) +# define bosubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,1,p) +# define bxsubr_p(i0,r0,r1,p) _bosubr(_jit,i0,r0,r1,0,p) +static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_uint16_t,jit_bool_t,jit_bool_t); +# define bosubr_u(i0,r0,r1) bosubr_u_p(i0,r0,r1,0) +# define bxsubr_u(i0,r0,r1) bxsubr_u_p(i0,r0,r1,0) +# define bosubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,1,p) +# define bxsubr_u_p(i0,r0,r1,p) _bosubr_u(_jit,i0,r0,r1,0,p) +static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blei(i0,r0,i1) blei_p(i0,r0,i1,0) +# define bgti(i0,r0,i1) bgti_p(i0,r0,i1,0) +# define blei_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,0,p) +# define bgti_p(i0,r0,i1,p) _bgti(_jit,i0,r0,i1,1,p) +static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blti(i0,r0,i1) blti_p(i0,r0,i1,0) +# define bgei(i0,r0,i1) bgei_p(i0,r0,i1,0) +# define blti_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,0,p) +# define bgei_p(i0,r0,i1,p) _bgei(_jit,i0,r0,i1,1,p) +static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blei_u(i0,r0,i1) blei_u_p(i0,r0,i1,0) +# define bgti_u(i0,r0,i1) bgti_u_p(i0,r0,i1,0) +# define blei_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,0,p) +# define bgti_u_p(i0,r0,i1,p) _bgti_u(_jit,i0,r0,i1,1,p) +static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define blti_u(i0,r0,i1) blti_u_p(i0,r0,i1,0) +# define bgei_u(i0,r0,i1) bgei_u_p(i0,r0,i1,0) +# define blti_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,0,p) +# define bgei_u_p(i0,r0,i1,p) _bgei_u(_jit,i0,r0,i1,1,p) +static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define beqi(i0,r0,i1) beqi_p(i0,r0,i1,0) +# define bnei(i0,r0,i1) bnei_p(i0,r0,i1,0) +# define beqi_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,1,p) +# define bnei_p(i0,r0,i1,p) _beqi(_jit,i0,r0,i1,0,p) +static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bmsi(i0,r0,i1) bmsi_p(i0,r0,i1,0) +# define bmci(i0,r0,i1) bmci_p(i0,r0,i1,0) +# define bmsi_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,0,p) +# define bmci_p(i0,r0,i1,p) _bmsi(_jit,i0,r0,i1,1,p) +static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define boaddi(i0,r0,i1) boaddi_p(i0,r0,i1,0) +# define bxaddi(i0,r0,i1) bxaddi_p(i0,r0,i1,0) +# define boaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,1,p) +# define bxaddi_p(i0,r0,i1,p) _boaddi(_jit,i0,r0,i1,0,p) +static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define boaddi_u(i0,r0,i1) boaddi_u_p(i0,r0,i1,0) +# define bxaddi_u(i0,r0,i1) bxaddi_u_p(i0,r0,i1,0) +# define boaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,1,p) +# define bxaddi_u_p(i0,r0,i1,p) _boaddi_u(_jit,i0,r0,i1,0,p) +static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bosubi(i0,r0,i1) bosubi_p(i0,r0,i1,0) +# define bxsubi(i0,r0,i1) bxsubi_p(i0,r0,i1,0) +# define bosubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,1,p) +# define bxsubi_p(i0,r0,i1,p) _bosubi(_jit,i0,r0,i1,0,p) +static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_uint16_t, + jit_word_t,jit_bool_t,jit_bool_t); +# define bosubi_u(i0,r0,i1) bosubi_u_p(i0,r0,i1,0) +# define bxsubi_u(i0,r0,i1) bxsubi_u_p(i0,r0,i1,0) +# define bosubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,1,p) +# define bxsubi_u_p(i0,r0,i1,p) _bosubi_u(_jit,i0,r0,i1,0,p) +static void _jmpr(jit_state_t*,jit_int16_t); +# define jmpr(r0) _jmpr(_jit,r0) +static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t); +# define jmpi(i0) _jmpi(_jit,i0,0) +static void _callr(jit_state_t*,jit_int16_t); +# define callr(r0) _callr(_jit,r0) +static void _calli(jit_state_t*,jit_word_t); +# define calli(i0) _calli(_jit,i0) + +static jit_word_t _movi_p(jit_state_t*,jit_uint16_t,jit_word_t); +# define movi_p(r0,i0) _movi_p(_jit,r0,i0) +static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); +# define jmpi_p(i0) _jmpi_p(_jit,i0) +static jit_word_t _calli_p(jit_state_t*,jit_word_t); +# define calli_p(i0) _calli_p(_jit,i0) +static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t); +# define patch_abs(instr,label) _patch_abs(_jit,instr,label) +static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +# define patch_at(jump,label) _patch_at(_jit,jump,label) +static void _prolog(jit_state_t*,jit_node_t*); +# define prolog(node) _prolog(_jit,node) +static void _epilog(jit_state_t*,jit_node_t*); +# define epilog(node) _epilog(_jit,node) +static void _vastart(jit_state_t*, jit_int32_t); +# define vastart(r0) _vastart(_jit, r0) +static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); +# define vaarg(r0, r1) _vaarg(_jit, r0, r1) + +# define ldr(r0,r1) ldr_i(r0,r1) +# define ldi(r0,i0) ldi_i(r0,i0) +# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2) +# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) +# define str(r0,r1) str_i(r0,r1) +# define sti(i0,r0) sti_i(i0,r0) +# define stxr(r0,r1,r2) stxr_i(r0,r1,r2) +# define stxi(i0,r0,r1) stxi_i(i0,r0,r1) + +# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) +# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) +# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0) +# define masked_bits_count(im) __builtin_popcountl(im) +# define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im)) + +# if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +# define jit_sh34_p() 1 +# else +# define jit_sh34_p() 0 +# endif + +static void _maybe_emit_frchg(jit_state_t *_jit); +# define maybe_emit_frchg() _maybe_emit_frchg(_jit) +static void _maybe_emit_fschg(jit_state_t *_jit); +# define maybe_emit_fschg() _maybe_emit_fschg(_jit) +#endif /* PROTO */ + +#if CODE +static void +_cni(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, jit_uint16_t i) +{ + jit_instr_t op; + + op.ni = (struct jit_instr_ni){ .c = c, .n = n, .i = i }; + + ii(op.op); +} + +static void +_cnmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, + jit_uint16_t m, jit_uint16_t d) +{ + jit_instr_t op; + + op.nmd = (struct jit_instr_nmd){ .c = c, .n = n, .m = m, .d = d }; + + ii(op.op); +} + +static void +_cmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t m, jit_uint16_t d) +{ + jit_instr_t op; + + op.md = (struct jit_instr_md){ .c = c, .m = m, .d = d }; + + ii(op.op); +} + +static void +_cd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t d) +{ + jit_instr_t op; + + op.d = (struct jit_instr_d){ .c = c, .d = d }; + + ii(op.op); +} + +static void +_nop(jit_state_t *_jit, jit_word_t i0) +{ + for (; i0 > 0; i0 -= 2) + NOP(); + assert(i0 == 0); +} + +static void +_movr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (r1 == _GBR) + STCGBR(r0); + else if (r0 == _GBR) + LDCGBR(r1); + else + MOV(r0, r1); + } +} + +static void +movi_loop(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t tmp; + + if (i0 >= -128 && i0 < 128) { + MOVI(r0, i0); + } else { + tmp = (i0 >> 8) + !!(i0 & 0x80); + if (tmp & 0xff) { + movi_loop(_jit, r0, tmp); + if (tmp != 0) + SHLL8(r0); + } else { + tmp = (i0 >> 16) + !!(i0 & 0x80); + movi_loop(_jit, r0, tmp); + if (tmp != 0) + SHLL16(r0); + } + if (i0 & 0xff) + ADDI(r0, i0 & 0xff); + } +} + +static jit_word_t +movi_loop_cnt(jit_word_t i0) +{ + jit_word_t tmp, cnt = 0; + + if (i0 >= -128 && i0 < 128) { + cnt = 1; + } else { + tmp = (i0 >> 8) + !!(i0 & 0x80); + if (tmp & 0xff) { + cnt += !!tmp + movi_loop_cnt(tmp); + } else { + tmp = (i0 >> 16) + !!(i0 & 0x80); + cnt += !!tmp + movi_loop_cnt(tmp); + } + cnt += !!(i0 & 0xff); + } + + return cnt; +} + +static void +_movi(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t w = _jit->pc.w & ~3; + + if (i0 >= -128 && i0 < 128) { + MOVI(r0, i0); + } else if (!(i0 & 0x1) && i0 >= -256 && i0 < 256) { + MOVI(r0, i0 >> 1); + SHLL(r0); + } else if (!(i0 & 0x3) && i0 >= -512 && i0 < 512) { + MOVI(r0, i0 >> 2); + SHLL2(r0); + } else if (i0 >= w && i0 <= w + 0x3ff && !((i0 - w) & 0x3)) { + MOVA((i0 - w) >> 2); + movr(r0, _R0); + } else if (is_low_mask(i0)) { + MOVI(r0, -1); + rshi_u(r0, r0, unmasked_bits_count(i0)); + } else if (is_high_mask(i0)) { + MOVI(r0, -1); + lshi(r0, r0, unmasked_bits_count(i0)); + } else if (movi_loop_cnt(i0) < 4) { + movi_loop(_jit, r0, i0); + } else { + load_const(0, r0, i0); + } +} + +static void +emit_branch_opcode(jit_state_t *_jit, jit_word_t i0, jit_word_t w, + int t_set, int force_patchable) +{ + jit_int32_t disp = (i0 - w >> 1) - 2; + jit_uint16_t reg; + + if (!force_patchable && i0 == 0) { + /* Positive displacement - we don't know the target yet. */ + if (t_set) + BT(0); + else + BF(0); + + /* Leave space after the BF/BT in case we need to add a + * BRA opcode. */ + w = _jit->code.length - (_jit->pc.uc - _jit->code.ptr); + if (w > 254) { + NOP(); + NOP(); + } + } else if (!force_patchable && disp >= -128) { + if (t_set) + BT(disp); + else + BF(disp); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (force_patchable) + movi_p(rn(reg), i0); + else + movi(rn(reg), i0); + if (t_set) + BF(0); + else + BT(0); + JMP(rn(reg)); + NOP(); + + jit_unget_reg(reg); + } +} + +static void _maybe_emit_frchg(jit_state_t *_jit) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + if (_jitc->no_flag && instr->op == 0xfbfd) + _jit->pc.us--; + else + FRCHG(); +} + +static void _maybe_emit_fschg(jit_state_t *_jit) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + if (_jitc->no_flag && instr->op == 0xf3fd) + _jit->pc.us--; + else + FSCHG(); +} + +static void maybe_emit_tst(jit_state_t *_jit, jit_uint16_t r0, jit_bool_t *set) +{ + jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2); + + /* If the previous opcode is a MOVT(r0), we can skip the TST opcode, + * but we need to invert the branch condition. */ + if (_jitc->no_flag && instr->op == (0x29 | (r0 << 8))) + *set ^= 1; + else + TST(r0, r0); +} + +static void _movnr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_bool_t set) +{ + maybe_emit_tst(_jit, r2, &set); + + emit_branch_opcode(_jit, 4, 0, set, 0); + movr(r0, r1); +} + +static char atomic_byte; + +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi, addr_reg; + + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + + addr_reg = jit_get_reg(jit_class_gpr); + movi(rn(addr_reg), (uintptr_t)&atomic_byte); + + TAS(rn(addr_reg)); + BF(-3); + + LDL(r0, r1); + CMPEQ(r0, r2); + MOVT(r0); + + BF(0); + STL(r1, r3); + + MOVI(_R0, 0); + STB(rn(addr_reg), _R0); + + jit_unget_reg(addr_reg); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_addr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + ADD(r0, r1); + } else { + movr(r0, r1); + ADD(r0, r2); + } +} + +static void +_addcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CLRT(); + addxr(r0, r1, r2); +} + +static void +_addxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + ADDC(r0, r1); + } else { + movr(r0, r1); + ADDC(r0, r2); + } +} + +static void +_addi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 >= -128 && i0 < 127) { + movr(r0, r1); + ADDI(r0, i0); + } else if (r0 != r1) { + movi(r0, i0); + addr(r0, r1, r0); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + addr(r0, r1, _R0); + } +} + +static void +_addci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + CLRT(); + addxi(r0, r1, i0); +} + +static void +_addxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + addxr(r0, r1, _R0); +} + +static void +_subr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r1 == r2) { + movi(r0, 0); + } else if (r0 == r2) { + NEG(r0, r2); + ADD(r0, r1); + } else { + movr(r0, r1); + SUB(r0, r2); + } +} + +static void +_subcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CLRT(); + subxr(r0, r1, r2); +} + +static void +_subxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg; + + if (r0 != r2) { + movr(r0, r1); + SUBC(r0, r2); + } else { + reg = jit_get_reg(jit_class_gpr); + + movr(rn(reg), r0); + movr(r0, r1); + SUBC(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_subi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + addi(r0, r1, -i0); +} + +static void +_subci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + subcr(r0, r1, _R0); +} + +static void +_subxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + subxr(r0, r1, _R0); +} + +static void +_rsbi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if ((jit_uword_t)((i0 >> 7) + 1) < 2) { + negr(r0, r1); + ADDI(r0, i0); + } else if (r0 != r1) { + assert(r0 != _R0 && r1 != _R0); + + movi(r0, i0); + subr(r0, r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + subr(r0, _R0, r1); + } +} + +static void +_mulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + MULL(r1, r2); + STSL(r0); +} + +static void +_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + DMULS(r1, r2); + STSH(r0); +} + +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + movi(_R0, i0); + hmulr(r0, r1, _R0); +} + +static void +_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + DMULU(r1, r2); + STSH(r0); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + movi(_R0, i0); + hmulr_u(r0, r1, _R0); +} + +static void +_qmulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + DMULS(r2, r3); + STSL(r0); + STSH(r1); +} + +static void +_qmulr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + DMULU(r2, r3); + STSL(r0); + STSH(r1); +} + +static void +_muli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + mulr(r0, r1, _R0); +} + +static void +_qmuli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + assert(r2 != _R0); + + movi(_R0, i0); + qmulr(r0, r1, r2, _R0); +} + +static void +_qmuli_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + assert(r2 != _R0); + + movi(_R0, i0); + qmulr_u(r0, r1, r2, _R0); +} + +static void +_divr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg, reg2; + jit_uint16_t divisor; + + assert(r1 != _R0 && r2 != _R0); + + if (r1 == r2) { + MOVI(r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (r0 == r2) { + reg2 = jit_get_reg(jit_class_gpr); + movr(rn(reg2), r2); + divisor = rn(reg2); + } else { + divisor = r2; + } + + movr(r0, r1); + MOVI(_R0, 0); + + CMPGT(_R0, r0); + SUBC(rn(reg), rn(reg)); + SUBC(r0, _R0); + + MOVI(_R0, -2); + DIV0S(rn(reg), divisor); + + ROTCL(r0); + DIV1(rn(reg), divisor); + ROTCL(_R0); + XORI(1); + BTS(-6); + TSTI(1); + + ROTCL(r0); + MOVI(_R0, 0); + ADDC(r0, _R0); + + jit_unget_reg(reg); + if (r0 == r2) + jit_unget_reg(reg2); + } +} + +static void +_divr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg, reg2; + jit_uint16_t divisor; + + assert(r1 != _R0 && r2 != _R0); + + if (r1 == r2) { + MOVI(r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + + if (r0 == r2) { + reg2 = jit_get_reg(jit_class_gpr); + movr(rn(reg2), r2); + divisor = rn(reg2); + } else { + divisor = r2; + } + + movr(r0, r1); + MOVI(rn(reg), 0); + MOVI(_R0, -2); + DIV0U(); + + ROTCL(r0); + DIV1(rn(reg), divisor); + ROTCL(_R0); + XORI(1); + BTS(-6); + TSTI(1); + + ROTCL(r0); + + jit_unget_reg(reg); + if (r0 == r2) + jit_unget_reg(reg2); + } +} + +static void +_qdivr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint32_t reg; + + assert(r2 != _R0 && r3 != _R0); + + if (r0 != r2 && r0 != r3) { + divr(r0, r2, r3); + mulr(_R0, r0, r3); + subr(r1, r2, _R0); + } else { + reg = jit_get_reg(jit_class_gpr); + + divr(rn(reg), r2, r3); + mulr(_R0, rn(reg), r3); + subr(r1, r2, _R0); + movr(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_qdivr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint32_t reg; + + assert(r2 != _R0 && r3 != _R0); + + if (r0 != r2 && r0 != r3) { + divr_u(r0, r2, r3); + mulr(_R0, r0, r3); + subr(r1, r2, _R0); + } else { + reg = jit_get_reg(jit_class_gpr); + + divr_u(rn(reg), r2, r3); + mulr(_R0, rn(reg), r3); + subr(r1, r2, _R0); + movr(r0, rn(reg)); + + jit_unget_reg(reg); + } +} + +static void +_divi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + divr(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_qdivi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + qdivr(r0, r1, r2, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_qdivi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_word_t i0) +{ + if (r0 != r2 && r1 != r2) { + fallback_divi_u(r0, r2, i0); + muli(r1, r0, i0); + subr(r1, r2, r1); + } else { + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + fallback_divi_u(rn(reg), r2, i0); + muli(_R0, rn(reg), i0); + subr(r1, r2, _R0); + + jit_unget_reg(reg); + } +} + +static void +_remr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + assert(r1 != _R0 && r2 != _R0); + + qdivr(rn(reg), r0, r1, r2); + + jit_unget_reg(reg); +} + +static void +_remr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + assert(r1 != _R0 && r2 != _R0); + + qdivr_u(rn(reg), r0, r1, r2); + + jit_unget_reg(reg); +} + +static void +_remi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + remr(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_remi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg = jit_get_reg(jit_class_gpr); + + qdivi_u(rn(reg), r0, r1, i0); + + jit_unget_reg(reg); +} + +static void +_bswapr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + EXTUW(r0, r1); + SWAPB(r0, r0); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + SWAPB(r0, r1); + SWAPW(r0, r0); + SWAPB(r0, r0); +} + +static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + + ROTL(r0); + TST(_R0, _R0); + BFS(-4); + ADDI(_R0, -1); + + ROTR(r0); +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + + ROTR(r0); + TST(_R0, _R0); + BFS(-4); + ADDI(_R0, -1); + + ROTL(r0); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + unsigned int i; + + assert(i0 >= 0 && i0 <= __WORDSIZE - 1); + assert(r0 != _R0); + + movr(r0, r1); + + if (i0 < 6) { + for (i = 0; i < i0; i++) + ROTR(r0); + } else if (__WORDSIZE - i0 < 6) { + for (i = 0; i < __WORDSIZE - i0; i++) + ROTL(r0); + } else { + movi(_R0, i0); + rrotr(r0, r0, _R0); + } +} + +static void +_andr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + AND(r0, r1); + } else { + movr(r0, r1); + AND(r0, r2); + } +} + +static void +_andi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0xff) { + extr_uc(r0, r1); + } else if (i0 == 0xffff) { + extr_us(r0, r1); + } else if (i0 == 0xffff0000) { + SWAPW(r0, r1); + SHLL16(r0); + } else if (r0 != r1) { + movi(r0, i0); + AND(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + AND(r0, _R0); + } +} + +static void +_orr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + OR(r0, r1); + } else { + movr(r0, r1); + OR(r0, r2); + } +} + +static void +_ori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (r0 != r1) { + movi(r0, i0); + OR(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + OR(r0, _R0); + } +} + +static void +_xorr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + XOR(r0, r1); + } else { + movr(r0, r1); + XOR(r0, r2); + } +} + +static void +_xori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (r0 == _R0 && !(i0 & ~0xff)) { + movr(r0, r1); + XORI(i0); + } else if (r0 != r1) { + movi(r0, i0); + XOR(r0, r1); + } else { + assert(r0 != _R0); + + movi(_R0, i0); + XOR(r0, _R0); + } +} + +static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SHLL(_R0); + BTS(-3); + ADDI(r0, 1); +} + +static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SETT(); + ROTCL(_R0); + BFS(-3); + ADDI(r0, 1); +} + +static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SHLR(_R0); + BTS(-3); + ADDI(r0, 1); +} + +static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + movi(r0, -1); + + SETT(); + ROTCR(_R0); + BFS(-3); + ADDI(r0, 1); +} + +static void +_rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr(_R0, r1); + + SETT(); + ROTCR(_R0); + ROTCL(r0); + CMPEQI(1); + emit_branch_opcode(_jit, -6, 0, 0, 0); +} + +static void +_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + assert(r0 != _R0); + + movr(_R0, r1); + movi(r0, 0); + + SHLR(_R0); + NEGC(r0, r0); + TST(_R0, _R0); + BFS(-5); + NEG(r0, r0); +} + +static void +_gtr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPGT(r1, r2); + MOVT(r0); +} + +static void +_gtr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPHI(r1, r2); + MOVT(r0); +} + +static void +_ger(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPGE(r1, r2); + MOVT(r0); +} + +static void +_ger_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPHS(r1, r2); + MOVT(r0); +} + +static void +_eqr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + CMPEQ(r1, r2); + MOVT(r0); +} + +static void +_ner(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + MOVI(_R0, -1); + CMPEQ(r1, r2); + NEGC(r0, _R0); +} + +static void +_eqi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + TST(r1, r1); + } else if (i0 >= -128 && i0 < 128) { + assert(r1 != _R0); + + movr(_R0, r1); + CMPEQI(i0); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPEQ(r1, _R0); + } + MOVT(r0); +} + +static void +_nei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r0 != _R0 && r1 != _R0); + + if (i0 == 0) { + TST(r1, r1); + } else if (i0 >= -128 && i0 < 128) { + movr(_R0, r1); + CMPEQI(i0); + } else { + movi(_R0, i0); + CMPEQ(r1, _R0); + } + + MOVI(_R0, -1); + NEGC(r0, _R0); +} + +static void +_gti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + CMPPL(r1); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPGT(r1, _R0); + } + MOVT(r0); +} + +static void +_gei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 0) { + CMPPZ(r1); + } else { + assert(r1 != _R0); + + movi(_R0, i0); + CMPGE(r1, _R0); + } + MOVT(r0); +} + +static void +_gti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHI(r1, _R0); + MOVT(r0); +} + +static void +_gei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHS(r1, _R0); + MOVT(r0); +} + +static void +_lti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (i0 == 0) { + movr(r0, r1); + ROTCL(r0); + MOVT(r0); + } else { + movi(_R0, i0); + CMPGT(_R0, r1); + MOVT(r0); + } +} + +static void +_lei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPGE(_R0, r1); + MOVT(r0); +} + +static void +_lti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHI(_R0, r1); + MOVT(r0); +} + +static void +_lei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + movi(_R0, i0); + CMPHS(_R0, r1); + MOVT(r0); +} + +static void +emit_shllr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (jit_sh34_p()) + SHLD(r0, r1); + else { + movr(_R0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHLL(r0); + } +} + +static void +_lshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + if (r0 == r2) { + assert(r1 != _R0); + + movr(_R0, r2); + movr(r0, r1); + emit_shllr(_jit, r0, _R0); + } else { + movr(r0, r1); + emit_shllr(_jit, r0, r2); + } +} + +static void +_rshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + if (jit_sh34_p()) { + negr(_R0, r2); + movr(r0, r1); + SHAD(r0, _R0); + } else { + movr(_R0, r2); + movr(r0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHAR(r0); + } +} + +static void +_rshr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r0 != _R0 && r1 != _R0); + + if (jit_sh34_p()) { + negr(_R0, r2); + movr(r0, r1); + SHLD(r0, _R0); + } else { + movr(_R0, r2); + movr(r0, r1); + + TST(_R0, _R0); + BTS(2); + DT(_R0); + BFS(-3); + SHLR(r0); + } +} + +static void +_lshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg, mask = 0x00838387; + + movr(r0, r1); + + if (i0 == 0) + return; + + if (i0 == 4) { + SHLL2(r0); + SHLL2(r0); + } else if (mask & (1 << (i0 - 1))) { + if (i0 & 0x10) + SHLL16(r0); + if (i0 & 0x8) + SHLL8(r0); + if (i0 & 0x2) + SHLL2(r0); + if (i0 & 0x1) + SHLL(r0); + } else { + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + lshr(r0, r0, rn(reg)); + + if (r0 == _R0) + jit_unget_reg(reg); + } +} + +static void +_rshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg; + + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + movr(r0, r1); + if (jit_sh34_p()) { + movi(rn(reg), -i0); + SHAD(r0, rn(reg)); + } else { + assert(i0 > 0); + movi(rn(reg), i0); + DT(rn(reg)); + BFS(-3); + SHAR(r0); + } + + if (r0 == _R0) + jit_unget_reg(reg); +} + +static void +_rshi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + jit_uint32_t reg, mask = 0x00838387; + + movr(r0, r1); + + if (i0 == 0) + return; + + if (i0 == 4) { + SHLR2(r0); + SHLR2(r0); + } else if (mask & (1 << (i0 - 1))) { + if (i0 & 0x10) + SHLR16(r0); + if (i0 & 0x8) + SHLR8(r0); + if (i0 & 0x2) + SHLR2(r0); + if (i0 & 0x1) + SHLR(r0); + } else { + reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr); + + if (jit_sh34_p()) { + movi(rn(reg), -i0); + SHLD(r0, rn(reg)); + } else { + movi(rn(reg), i0); + DT(rn(reg)); + BFS(-3); + SHLR(r0); + } + + if (r0 == _R0) + jit_unget_reg(reg); + } +} + +static void +_qlshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + movr(_R0, r3); + movr(r0, r2); + CMPEQI(32); + movr(r1, r2); + BF(0); + XOR(r0, r0); + SHAD(r0, _R0); + ADDI(_R0, -__WORDSIZE); + SHAD(r1, _R0); +} + +static void +_qlshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + movr(_R0, r3); + movr(r0, r2); + CMPEQI(32); + movr(r1, r2); + BF(0); + XOR(r0, r0); + SHLD(r0, _R0); + ADDI(_R0, -__WORDSIZE); + SHLD(r1, _R0); +} + +static void +_xlshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + if (sign) + rshi(r1, r2, __WORDSIZE - i0); + else + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } +} + +static void +_qrshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + NEG(_R0, r3); + movr(r1, r2); + CMPEQI(0); + movr(r0, r2); + BF(0); + MOV(r1, _R0); + SHAD(r0, _R0); + ADDI(_R0, __WORDSIZE); + SHAD(r1, _R0); +} + +static void +_qrshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + assert(r0 != r1); + NEG(_R0, r3); + movr(r1, r2); + CMPEQI(0); + movr(r0, r2); + BF(0); + MOV(r1, _R0); + SHLD(r0, _R0); + ADDI(_R0, __WORDSIZE); + SHLD(r1, _R0); +} + +static void +_xrshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + if (sign) + rshi(r0, r2, __WORDSIZE - 1); + else + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + lshi(r1, r2, __WORDSIZE - i0); + if (sign) + rshi(r0, r2, i0); + else + rshi_u(r0, r2, i0); + } +} + +static void _ldr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + ldr_c(r0, r1); + extr_uc(r0, r0); +} + +static void _ldr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + ldr_s(r0, r1); + extr_us(r0, r0); +} + +static void _ldi_c(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_c(r0, _R0); +} + +static void _ldi_s(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_s(r0, _R0); +} + +static void _ldi_i(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_i(r0, _R0); +} + +static void _ldi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_uc(r0, _R0); +} + +static void _ldi_us(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_us(r0, _R0); +} + +static void +_ldxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRB(r0, r1); +} + +static void +_ldxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRW(r0, r1); +} + +static void +_ldxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0); + + movr(_R0, r2); + LDRL(r0, r1); +} + +static void +_ldxr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + ldxr_c(r0, r1, r2); + extr_uc(r0, r0); +} + +static void +_ldxr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + ldxr_s(r0, r1, r2); + extr_us(r0, r0); +} + +static void +_ldxi_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0xff) { + GBRLDB(i0); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_c(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0xf) { + LDDB(r1, i0); + movr(r0, _R0); + } else { + movi(_R0, i0); + ldxr_c(r0, r1, _R0); + } +} + +static void +_ldxi_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) { + GBRLDW(i0 >> 1); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_s(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0x1f && !(i0 & 0x1)) { + LDDW(r1, i0 >> 1); + movr(r0, _R0); + } else { + movi(_R0, i0); + ldxr_s(r0, r1, _R0); + } +} + +static void +_ldxi_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + if (r1 == _GBR) { + if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) { + GBRLDL(i0 >> 2); + movr(r0, _R0); + } else { + movr(r0, r1); + ldxi_i(r0, r0, i0); + } + } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 0x3)) { + LDDL(r0, r1, i0 >> 2); + } else { + movi(_R0, i0); + ldxr_i(r0, r1, _R0); + } +} + +static void +_ldxi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + ldxi_c(_R0, r1, i0); + extr_uc(r0, _R0); +} + +static void +_ldxi_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + assert(r1 != _R0); + + ldxi_s(_R0, r1, i0); + extr_us(r0, _R0); +} + +static void +_ldxai_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 1) + LDBU(r0, r1); + else + generic_ldxai_c(r0, r1, i0); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 1) + LDBU(r0, r1); + else + generic_ldxai_c(r0, r1, i0); + extr_uc(r0, r0); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 2) + LDWU(r0, r1); + else + generic_ldxai_s(r0, r1, i0); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 2) + LDWU(r0, r1); + else + generic_ldxai_s(r0, r1, i0); + extr_us(r0, r0); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0) +{ + if (i0 == 4) + LDLU(r0, r1); + else + generic_ldxai_i(r0, r1, i0); +} + +static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_c(_R0, r0); +} + +static void _sti_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_s(_R0, r0); +} + +static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + assert(r0 != _R0); + + movi(_R0, i0); + str_i(_R0, r0); +} + +static void +_stxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRB(r1, r2); +} + +static void +_stxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRW(r1, r2); +} + +static void +_stxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + assert(r1 != _R0 && r2 != _R0); + + movr(_R0, r0); + STRL(r1, r2); +} + +static void +_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0xff) { + movr(_R0, r1); + GBRSTB(i0); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_c(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_c(_R0, r0, r1); + } +} + +static void +_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) { + movr(_R0, r1); + GBRSTW(i0 >> 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_s(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_s(_R0, r0, r1); + } +} + +static void +_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + jit_uint32_t reg; + + if (r0 == _GBR) { + if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) { + movr(_R0, r1); + GBRSTL(i0 >> 2); + } else { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + stxi_i(i0, rn(reg), r1); + jit_unget_reg(reg); + } + } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 3)) { + STDL(r0, r1, i0 >> 2); + } else { + assert(r0 != _R0 && r1 != _R0); + + movi(_R0, i0); + stxr_i(_R0, r0, r1); + } +} + +static void +_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -1) + STBU(r0, r1); + else + generic_stxbi_c(i0, r0, r1); +} + +static void +_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -2) + STWU(r0, r1); + else + generic_stxbi_s(i0, r0, r1); +} + +static void +_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1) +{ + if (i0 == -4) + STLU(r0, r1); + else + generic_stxbi_i(i0, r0, r1); +} + +static jit_word_t +_bger(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t t, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPGE(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, t, p); + + return (w); +} + +static jit_word_t +_bger_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t t, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPHS(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, t, p); + + return (w); +} + +static jit_word_t +_beqr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 == r1) { + if (p) + w = jmpi_p(i0); + else + w = _jmpi(_jit, i0, i0 == 0); + } else { + CMPEQ(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 1, p); + } + + return (w); +} + +static jit_word_t +_bner(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CMPEQ(r0, r1); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t +_bmsr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_bool_t set = 0; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 != r1) + TST(r0, r1); + else + maybe_emit_tst(_jit, r0, &set); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bmcr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t p) +{ + jit_bool_t set = 1; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (r0 != r1) + TST(r0, r1); + else + maybe_emit_tst(_jit, r0, &set); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgti(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + CMPPL(r0); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPGT(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgei(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + CMPPZ(r0); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPGE(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + maybe_emit_tst(_jit, r0, &set); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPHI(r0, _R0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + movi(_R0, i1); + CMPHS(r0, _R0); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + if (i1 == 0) { + maybe_emit_tst(_jit, r0, &set); + } else if (i1 >= -128 && i1 < 128) { + movr(_R0, r0); + CMPEQI(i1); + } else { + assert(r0 != _R0); + + movi(_R0, i1); + CMPEQ(_R0, r0); + } + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bmsi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + movi(_R0, i1); + TST(_R0, r0); + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + ADDV(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CLRT(); + ADDC(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _boaddi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + movi(_R0, i1); + w = _boaddr(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + movi(_R0, i1); + w = _boaddr_u(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + assert(r0 != _R0); + + NEG(_R0, r1); + ADDV(r0, _R0); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + CLRT(); + SUBC(r0, r1); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bosubi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + movi(_R0, i1); + w = _bosubr(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static jit_word_t _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_word_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + assert(r0 != _R0); + + movi(_R0, i1); + w = _bosubr_u(_jit, i0, r0, _R0, set, p); + + return (w); +} + +static void +_jmpr(jit_state_t *_jit, jit_int16_t r0) +{ + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + JMP(r0); + NOP(); +} + +static jit_word_t +_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t force) +{ + jit_uint16_t reg; + jit_int32_t disp; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + disp = (i0 - w >> 1) - 2; + + if (force || (disp >= -2048 && disp <= 2046)) { + BRA(disp); + NOP(); + } else if (0) { + /* TODO: BRAF */ + reg = jit_get_reg(jit_class_gpr); + + movi_p(rn(reg), disp - 7); + BRAF(rn(reg)); + NOP(); + + jit_unget_reg(reg); + } else { + reg = jit_get_reg(jit_class_gpr); + + movi(rn(reg), i0); + jmpr(rn(reg)); + + jit_unget_reg(reg); + } + + return (w); +} + +static void +_callr(jit_state_t *_jit, jit_int16_t r0) +{ + reset_fpu(_jit, r0 == _R0); + + JSR(r0); + NOP(); + + reset_fpu(_jit, 1); +} + +static void +_calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_int32_t disp; + jit_uint16_t reg; + jit_word_t w; + + reset_fpu(_jit, 0); + + w = _jit->pc.w; + disp = (i0 - w >> 1) - 2; + + if (disp >= -2048 && disp <= 2046) { + BSR(disp); + } else { + movi(_R0, i0); + JSR(_R0); + } + + NOP(); + reset_fpu(_jit, 1); +} + +static jit_word_t +_movi_p(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + jit_word_t w = _jit->pc.w; + + load_const(1, r0, 0); + + return (w); +} + +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_uint16_t reg; + jit_word_t w; + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg(reg); + + return (w); +} + +static jit_word_t +_calli_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_uint16_t reg; + jit_word_t w; + + reset_fpu(_jit, 0); + + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + JSR(rn(reg)); + NOP(); + jit_unget_reg(reg); + + reset_fpu(_jit, 1); + + return (w); +} + +static void +_vastart(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t reg; + + assert(_jitc->function->self.call & jit_call_varargs); + + /* Return jit_va_list_t in the register argument */ + addi(r0, JIT_FP, _jitc->function->vaoff); + reg = jit_get_reg(jit_class_gpr); + + /* Align pointer to 8 bytes with +4 bytes offset (so that the + * double values are aligned to 8 bytes */ + andi(r0, r0, -8); + addi(r0, r0, 4); + + /* Initialize the gpr begin/end pointers */ + addi(rn(reg), r0, sizeof(jit_va_list_t) + + _jitc->function->vagp * sizeof(jit_uint32_t)); + stxi(offsetof(jit_va_list_t, bgpr), r0, rn(reg)); + + addi(rn(reg), rn(reg), NUM_WORD_ARGS * sizeof(jit_word_t) + - _jitc->function->vagp * sizeof(jit_uint32_t)); + stxi(offsetof(jit_va_list_t, egpr), r0, rn(reg)); + + /* Initialize the fpr begin/end pointers */ + if (_jitc->function->vafp) + addi(rn(reg), rn(reg), _jitc->function->vafp * sizeof(jit_float32_t)); + + stxi(offsetof(jit_va_list_t, bfpr), r0, rn(reg)); + addi(rn(reg), rn(reg), NUM_FLOAT_ARGS * sizeof(jit_float32_t) + - _jitc->function->vafp * sizeof(jit_float32_t)); + stxi(offsetof(jit_va_list_t, efpr), r0, rn(reg)); + + /* Initialize the stack pointer to the first stack argument */ + addi(rn(reg), JIT_FP, _jitc->function->self.size); + stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t rg0, rg1; + jit_word_t ge_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg(jit_class_gpr); + rg1 = jit_get_reg(jit_class_gpr); + + /* Load begin/end gpr pointers */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, egpr)); + movi(_R0, offsetof(jit_va_list_t, bgpr)); + ldxr(rn(rg0), r1, _R0); + + /* Check that we didn't reach the end gpr pointer. */ + CMPHS(rn(rg0), rn(rg1)); + + ge_code = _jit->pc.w; + BF(0); + + /* If we did, load the stack pointer instead. */ + movi(_R0, offsetof(jit_va_list_t, over)); + ldxr(rn(rg0), r1, _R0); + + patch_at(ge_code, _jit->pc.w); + + /* All good, we can now load the actual value */ + ldxai_i(r0, rn(rg0), sizeof(jit_uint32_t)); + + /* Update the pointer (gpr or stack) to the next word */ + stxr(_R0, r1, rn(rg0)); + + jit_unget_reg(rg0); + jit_unget_reg(rg1); +} + +static void +_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_instr_t *ptr = (jit_instr_t *)instr; + + ptr[0].ni.i = (label >> 24) & 0xff; + ptr[2].ni.i = (label >> 16) & 0xff; + ptr[4].ni.i = (label >> 8) & 0xff; + ptr[6].ni.i = (label >> 0) & 0xff; +} + +static void +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_instr_t *ptr = (jit_instr_t *)instr; + jit_int32_t disp; + + switch (ptr->nmd.c) { + case 0xe: + patch_abs(instr, label); + break; + case 0xc: + disp = ((label - (instr & ~0x3)) >> 2) - 1; + assert(disp >= 0 && disp <= 255); + ptr->ni.i = disp; + break; + case 0xa: + disp = ((label - instr) >> 1) - 2; + assert(disp >= -2048 && disp <= 2046); + ptr->d.d = disp; + break; + case 0x8: + switch (ptr->ni.n) { + case 0x9: + case 0xb: + case 0xd: + case 0xf: + disp = ((label - instr) >> 1) - 2; + if (disp >= -128 && disp <= 127) { + ptr->ni.i = disp; + } else { + /* Invert bit 1: BT(S) <-> BF(S) */ + ptr->ni.n ^= 1 << 1; + + /* Opcode 2 is now a BRA opcode */ + ptr[1].d = (struct jit_instr_d){ .c = 0xa, .d = disp - 1 }; + } + break; + default: + assert(!"unhandled branch opcode"); + } + break; + case 0xd: + if (ptr->op & 0xff) { + /* TODO: Fix the mess. patch_at() gets called with 'instr' pointing + * to the mov.l opcode and 'label' being the value that should be + * loaded into the register. So we read the address at which the mov.l + * points to, and write the label there. */ + *(jit_uint32_t *)((instr & ~0x3) + 4 + (ptr->op & 0xff) * 4) = label; + } else { + disp = ((label - instr) >> 2) - 1 + !!(instr & 0x3); + ptr->op = (ptr->op & 0xff00) | disp; + } + break; + default: + assert("unhandled branch opcode"); + } +} + +static void +_prolog(jit_state_t *_jit, jit_node_t *node) +{ + jit_uint16_t reg, regno, offs; + + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } + + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; + _jitc->function->stack = ((_jitc->function->self.alen - + /* align stack at 8 bytes */ + _jitc->function->self.aoff) + 7) & -8; + + ADDI(JIT_SP, -stack_framesize); + STDL(JIT_SP, JIT_FP, JIT_V_NUM + 1); + + STSPR(_R0); + STDL(JIT_SP, _R0, JIT_V_NUM); + + for (regno = 0; regno < JIT_V_NUM; regno++) + if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(regno))) + STDL(JIT_SP, JIT_V(regno), regno); + + movr(JIT_FP, JIT_SP); + + if (_jitc->function->stack) + subi(JIT_SP, JIT_SP, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, JIT_FP, rn(reg)); + jit_unget_reg(reg); + } + + if (_jitc->function->self.call & jit_call_varargs) { + /* Align to 8 bytes with +4 bytes offset (so that the double + * values are aligned to 8 bytes */ + andi(JIT_R0, JIT_FP, -8); + addi(JIT_R0, JIT_R0, 4); + + for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); regno++) { + stxi(_jitc->function->vaoff + + sizeof(jit_va_list_t) + + regno * sizeof(jit_word_t), + JIT_R0, rn(_R4 + regno)); + } + + for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); regno++) { + stxi_f(_jitc->function->vaoff + + sizeof(jit_va_list_t) + + NUM_WORD_ARGS * sizeof(jit_word_t) + + regno * sizeof(jit_float32_t), + JIT_R0, rn(_F4 + (regno ^ fpr_args_inverted()))); + } + } + + reset_fpu(_jit, 0); +} + +static void +_epilog(jit_state_t *_jit, jit_node_t *node) +{ + unsigned int i; + + if (_jitc->function->assume_frame) + return; + + reset_fpu(_jit, 1); + + movr(JIT_SP, JIT_FP); + + for (i = JIT_V_NUM; i > 0; i--) + if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(i - 1))) + LDDL(JIT_V(i - 1), JIT_SP, i - 1); + + LDDL(JIT_FP, JIT_SP, JIT_V_NUM); + LDSPR(JIT_FP); + + LDDL(JIT_FP, JIT_SP, JIT_V_NUM + 1); + RTS(); + ADDI(JIT_SP, stack_framesize); +} +#endif /* CODE */ diff --git a/deps/lightning/lib/jit_sh-fpu.c b/deps/lightning/lib/jit_sh-fpu.c new file mode 100644 index 000000000..e440a6483 --- /dev/null +++ b/deps/lightning/lib/jit_sh-fpu.c @@ -0,0 +1,2394 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +#if PROTO +static void set_fmode(jit_state_t *_jit, jit_bool_t is_double); +static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double); +static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0); + +static void _extr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t); +# define extr_f(r0,r1) _extr_f(_jit,r0,r1,0) +# define extr_d(r0,r1) _extr_f(_jit,r0,r1,1) +static void _truncr_f_i(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t); +# define truncr_f_i(r0,r1) _truncr_f_i(_jit,r0,r1,0) +# define truncr_d_i(r0,r1) _truncr_f_i(_jit,r0,r1,1) +static void _fmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3) +static void _fmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmar_d(r0, r1, r2, r3) _fmar_d(_jit, r0, r1, r2, r3) +static void _fmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3) +static void _fmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3) +static void _fnmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmar_f(r0, r1, r2, r3) _fnmar_f(_jit, r0, r1, r2, r3) +static void _fnmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmar_d(r0, r1, r2, r3) _fnmar_d(_jit, r0, r1, r2, r3) +static void _fnmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmsr_f(r0, r1, r2, r3) _fnmsr_f(_jit, r0, r1, r2, r3) +static void _fnmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t, + jit_uint16_t,jit_uint16_t); +# define fnmsr_d(r0, r1, r2, r3) _fnmsr_d(_jit, r0, r1, r2, r3) +static void _movr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr_f(r0,r1) _movr_f(_jit,r0,r1) +static void _movr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define movr_d(r0,r1) _movr_d(_jit,r0,r1) +static void _movi_f(jit_state_t*,jit_uint16_t,jit_float32_t); +# define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_d(jit_state_t*,jit_uint16_t,jit_float64_t); +# define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _ltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ltr_f(r0,r1,r2) _ltr_f(_jit,r0,r1,r2,0) +# define ltr_d(r0,r1,r2) _ltr_f(_jit,r0,r1,r2,1) +static void _lti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define lti_f(r0,r1,i0) _lti_f(_jit,r0,r1,i0) +static void _lti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define lti_d(r0,r1,i0) _lti_d(_jit,r0,r1,i0) +static void _ler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ler_f(r0,r1,r2) _ler_f(_jit,r0,r1,r2,0) +# define ler_d(r0,r1,r2) _ler_f(_jit,r0,r1,r2,1) +static void _lei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define lei_f(r0,r1,i0) _lei_f(_jit,r0,r1,i0) +static void _lei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define lei_d(r0,r1,i0) _lei_d(_jit,r0,r1,i0) +static void _eqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define eqr_f(r0,r1,r2) _eqr_f(_jit,r0,r1,r2,0) +# define eqr_d(r0,r1,r2) _eqr_f(_jit,r0,r1,r2,1) +static void _eqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define eqi_f(r0,r1,i0) _eqi_f(_jit,r0,r1,i0) +static void _eqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define eqi_d(r0,r1,i0) _eqi_d(_jit,r0,r1,i0) +# define ger_f(r0,r1,r2) ler_f(r0,r2,r1) +# define ger_d(r0,r1,r2) ler_d(r0,r2,r1) +static void _gei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define gei_f(r0,r1,i0) _gei_f(_jit,r0,r1,i0) +static void _gei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define gei_d(r0,r1,i0) _gei_d(_jit,r0,r1,i0) +# define gtr_f(r0,r1,r2) ltr_f(r0,r2,r1) +# define gtr_d(r0,r1,r2) ltr_d(r0,r2,r1) +static void _gti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define gti_f(r0,r1,i0) _gti_f(_jit,r0,r1,i0) +static void _gti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define gti_d(r0,r1,i0) _gti_d(_jit,r0,r1,i0) +static void _ner_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ner_f(r0,r1,r2) _ner_f(_jit,r0,r1,r2,0) +# define ner_d(r0,r1,r2) _ner_f(_jit,r0,r1,r2,1) +static void _nei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define nei_f(r0,r1,i0) _nei_f(_jit,r0,r1,i0) +static void _nei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define nei_d(r0,r1,i0) _nei_d(_jit,r0,r1,i0) +static void _unltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define unltr_f(r0,r1,r2) _unltr_f(_jit,r0,r1,r2,0) +# define unltr_d(r0,r1,r2) _unltr_f(_jit,r0,r1,r2,1) +static void _unlti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unlti_f(r0,r1,i0) _unlti_f(_jit,r0,r1,i0) +static void _unlti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unlti_d(r0,r1,i0) _unlti_d(_jit,r0,r1,i0) +static void _unler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t, + jit_bool_t); +# define unler_f(r0,r1,r2) _unler_f(_jit,r0,r1,r2,0) +# define unler_d(r0,r1,r2) _unler_f(_jit,r0,r1,r2,1) +static void _unlei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unlei_f(r0,r1,i0) _unlei_f(_jit,r0,r1,i0) +static void _unlei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unlei_d(r0,r1,i0) _unlei_d(_jit,r0,r1,i0) +# define ungtr_f(r0,r1,r2) unltr_f(r0,r2,r1) +# define ungtr_d(r0,r1,r2) unltr_d(r0,r2,r1) +static void _ungti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ungti_f(r0,r1,i0) _ungti_f(_jit,r0,r1,i0) +static void _ungti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ungti_d(r0,r1,i0) _ungti_d(_jit,r0,r1,i0) +# define unger_f(r0,r1,r2) _unler_f(_jit,r0,r2,r1,0) +# define unger_d(r0,r1,r2) _unler_f(_jit,r0,r2,r1,1) +static void _ungei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ungei_f(r0,r1,i0) _ungei_f(_jit,r0,r1,i0) +static void _ungei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ungei_d(r0,r1,i0) _ungei_d(_jit,r0,r1,i0) +static void _uneqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t, + jit_bool_t); +# define uneqr_f(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2,0) +# define uneqr_d(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2,1) +static void _uneqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define uneqi_f(r0,r1,i0) _uneqi_f(_jit,r0,r1,i0) +static void _uneqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define uneqi_d(r0,r1,i0) _uneqi_d(_jit,r0,r1,i0) +static void _ltgtr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ltgtr_f(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2,0) +# define ltgtr_d(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2,1) +static void _ltgti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ltgti_f(r0,r1,i0) _ltgti_f(_jit,r0,r1,i0) +static void _ltgti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ltgti_d(r0,r1,i0) _ltgti_d(_jit,r0,r1,i0) +static void _ordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define ordr_f(r0,r1,r2) _ordr_f(_jit,r0,r1,r2,0) +# define ordr_d(r0,r1,r2) _ordr_f(_jit,r0,r1,r2,1) +static void _ordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define ordi_f(r0,r1,i0) _ordi_f(_jit,r0,r1,i0) +static void _ordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define ordi_d(r0,r1,i0) _ordi_d(_jit,r0,r1,i0) +static void _unordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t); +# define unordr_f(r0,r1,r2) _unordr_f(_jit,r0,r1,r2,0) +# define unordr_d(r0,r1,r2) _unordr_f(_jit,r0,r1,r2,1) +static void _unordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t); +# define unordi_f(r0,r1,i0) _unordi_f(_jit,r0,r1,i0) +static void _unordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t); +# define unordi_d(r0,r1,i0) _unordi_d(_jit,r0,r1,i0) +static void _addr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t); +# define addr_f(r0,r1,r2) _addr_f(_jit,r0,r1,r2,0) +# define addr_d(r0,r1,r2) _addr_f(_jit,r0,r1,r2,1) +static void _addi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) +static void _addi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define addi_d(r0,r1,i0) _addi_d(_jit,r0,r1,i0) +static void _subr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr_f(r0,r1,r2) _subr_f(_jit,r0,r1,r2) +static void _subr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2) +static void _subi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define subi_f(r0,r1,i0) _subi_f(_jit,r0,r1,i0) +static void _subi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) +static void _negr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define negr_f(r0,r1) _negr_f(_jit,r0,r1) +static void _negr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define negr_d(r0,r1) _negr_d(_jit,r0,r1) +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +static void _rsbi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +static void _rsbi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _mulr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) +static void _muli_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) +static void _mulr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define mulr_d(r0,r1,r2) _mulr_d(_jit,r0,r1,r2) +static void _muli_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define muli_d(r0,r1,i0) _muli_d(_jit,r0,r1,i0) +static void _divr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_f(r0,r1,r2) _divr_f(_jit,r0,r1,r2) +static void _divi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t); +# define divi_f(r0,r1,i0) _divi_f(_jit,r0,r1,i0) +static void _divr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define divr_d(r0,r1,r2) _divr_d(_jit,r0,r1,r2) +static void _divi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t); +# define divi_d(r0,r1,i0) _divi_d(_jit,r0,r1,i0) +static void _movr_w_f(jit_state_t*,jit_uint16_t,jit_int16_t); +#define movr_w_f(r0,r1) _movr_w_f(_jit,r0,r1) +static void _movr_f_w(jit_state_t*,jit_uint16_t,jit_int16_t); +#define movr_f_w(r0,r1) _movr_f_w(_jit,r0,r1) +static void _movi_w_f(jit_state_t*,jit_int16_t,jit_word_t); +# define movi_w_f(r0,i0) _movi_w_f(_jit,r0,i0) +static void _movr_ww_d(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t); +# define movr_ww_d(r0,r1,r2) _movr_ww_d(_jit,r0,r1,r2) +static void _movr_d_ww(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t); +# define movr_d_ww(r0,r1,r2) _movr_d_ww(_jit,r0,r1,r2) +static void _movi_ww_d(jit_state_t*,jit_int16_t,jit_word_t, jit_word_t); +# define movi_ww_d(r0,i0,i1) _movi_ww_d(_jit,r0,i0,i1) +static void _absr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define absr_f(r0,r1) _absr_f(_jit,r0,r1) +static void _absr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define absr_d(r0,r1) _absr_d(_jit,r0,r1) +static void _sqrtr_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define sqrtr_f(r0,r1) _sqrtr_f(_jit,r0,r1) +static void _sqrtr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) +static void _extr_d_f(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define extr_d_f(r0,r1) _extr_d_f(_jit,r0,r1) +static void _extr_f_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define extr_f_d(r0,r1) _extr_f_d(_jit,r0,r1) +# define ldr_f(r0,r1) LDF(r0,r1) +static void _ldr_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define ldr_d(r0,r1) _ldr_d(_jit,r0,r1) +static void _ldi_f(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) +static void _ldi_d(jit_state_t*,jit_uint16_t,jit_word_t); +# define ldi_d(r0,i0) _ldi_d(_jit,r0,i0) +static void _ldxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_f(r0,r1,r2) _ldxr_f(_jit,r0,r1,r2) +static void _ldxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define ldxr_d(r0,r1,r2) _ldxr_d(_jit,r0,r1,r2) +static void _ldxi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) +static void _ldxi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t); +# define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0) +# define unldr_x(r0,r1,i0) fallback_unldr_x(r0,r1,i0) +# define unldi_x(r0,i0,i1) fallback_unldi_x(r0,i0,i1) +# define str_f(r0,r1) STF(r0,r1) +static void _str_d(jit_state_t*,jit_uint16_t,jit_uint16_t); +# define str_d(r0,r1) _str_d(_jit,r0,r1) +static void _sti_f(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_f(i0,r0) _sti_f(_jit,i0,r0) +static void _sti_d(jit_state_t*,jit_word_t,jit_uint16_t); +# define sti_d(i0,r0) _sti_d(_jit,i0,r0) +static void _stxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2) +static void _stxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t); +# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2) +static void _stxi_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) +static void _stxi_d(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t); +# define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) +# define unstr_x(r0,r1,i0) fallback_unstr_x(r0,r1,i0) +# define unsti_x(i0,r0,i1) fallback_unsti_x(i0,r0,i1) +static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define beqr_f(i0,r0,r1) beqr_f_p(i0,r0,r1,0) +# define bner_f(i0,r0,r1) bner_f_p(i0,r0,r1,0) +# define beqr_d(i0,r0,r1) beqr_f_p(i0,r0,r1,0) +# define bner_d(i0,r0,r1) bner_f_p(i0,r0,r1,0) +# define beqr_f_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,0,1,p) +# define bner_f_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,0,0,p) +# define beqr_d_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,1,1,p) +# define bner_d_p(i0,r0,r1,p) _beqr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_uint16_t, + jit_float32_t,jit_bool_t,jit_bool_t); +# define beqi_f(i0,r0,i1) beqi_f_p(i0,r0,i1,0) +# define bnei_f(i0,r0,i1) bnei_f_p(i0,r0,i1,0) +# define beqi_f_p(i0,r0,i1,p) _beqi_f(_jit,i0,r0,i1,1,p) +# define bnei_f_p(i0,r0,i1,p) _beqi_f(_jit,i0,r0,i1,0,p) +static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_uint16_t, + jit_float64_t,jit_bool_t,jit_bool_t); +# define beqi_d(i0,r0,i1) beqi_d_p(i0,r0,i1,0) +# define bnei_d(i0,r0,i1) bnei_d_p(i0,r0,i1,0) +# define beqi_d_p(i0,r0,i1,p) _beqi_d(_jit,i0,r0,i1,1,p) +# define bnei_d_p(i0,r0,i1,p) _beqi_d(_jit,i0,r0,i1,0,p) +static jit_word_t +_blti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define blti_f(i0,r0,i1) blti_f_p(i0,r0,i1,0) +# define blti_f_p(i0,r0,i1,p) _blti_f(_jit,i0,r0,i1,p) +static jit_word_t +_blti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define blti_d(i0,r0,i1) blti_d_p(i0,r0,i1,0) +# define blti_d_p(i0,r0,i1,p) _blti_d(_jit,i0,r0,i1,p) +static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bgtr_f(i0,r0,r1) bgtr_f_p(i0,r0,r1,0) +# define bgtr_d(i0,r0,r1) bgtr_d_p(i0,r0,r1,0) +# define bltr_f(i0,r0,r1) bltr_f_p(i0,r1,r0,0) +# define bltr_d(i0,r0,r1) bltr_d_p(i0,r1,r0,0) +# define bgtr_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,0,1,p) +# define bgtr_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,1,1,p) +# define bltr_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,0,1,p) +# define bltr_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,1,1,p) +static jit_word_t +_bgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bgti_f(i0,r0,i1) bgti_f_p(i0,r0,i1,0) +# define bgti_f_p(i0,r0,i1,p) _bgti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bgti_d(i0,r0,i1) bgti_d_p(i0,r0,i1,0) +# define bgti_d_p(i0,r0,i1,p) _bgti_d(_jit,i0,r0,i1,p) +static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bler_f(i0,r0,r1) bler_f_p(i0,r0,r1,0) +# define bler_d(i0,r0,r1) bler_d_p(i0,r0,r1,0) +# define bler_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,0,0,p) +# define bler_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_blei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define blei_f(i0,r0,i1) blei_f_p(i0,r0,i1,0) +# define blei_f_p(i0,r0,i1,p) _blei_f(_jit,i0,r0,i1,p) +static jit_word_t +_blei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define blei_d(i0,r0,i1) blei_d_p(i0,r0,i1,0) +# define blei_d_p(i0,r0,i1,p) _blei_d(_jit,i0,r0,i1,p) +# define bger_f(i0,r0,r1) bger_f_p(i0,r1,r0,0) +# define bger_d(i0,r0,r1) bger_d_p(i0,r1,r0,0) +# define bger_f_p(i0,r0,r1,p) bler_f_p(i0,r1,r0,p) +# define bger_d_p(i0,r0,r1,p) bler_d_p(i0,r1,r0,p) +static jit_word_t +_bgei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bgei_f(i0,r0,i1) bgei_f_p(i0,r0,i1,0) +# define bgei_f_p(i0,r0,i1,p) _bgei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bgei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bgei_d(i0,r0,i1) bgei_d_p(i0,r0,i1,0) +# define bgei_d_p(i0,r0,i1,p) _bgei_d(_jit,i0,r0,i1,p) +# define bunltr_f(i0,r0,r1) bunltr_f_p(i0,r1,r0,0) +# define bunltr_d(i0,r0,r1) bunltr_d_p(i0,r1,r0,0) +# define bunltr_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r1,r0,0,1,p) +# define bunltr_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r1,r0,1,1,p) +static jit_word_t +_bunlti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunlti_f(i0,r0,i1) bunlti_f_p(i0,r0,i1,0) +# define bunlti_f_p(i0,r0,i1,p) _bunlti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunlti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunlti_d(i0,r0,i1) bunlti_d_p(i0,r0,i1,0) +# define bunlti_d_p(i0,r0,i1,p) _bunlti_d(_jit,i0,r0,i1,p) +# define bunler_f(i0,r0,r1) bunler_f_p(i0,r0,r1,0) +# define bunler_d(i0,r0,r1) bunler_d_p(i0,r0,r1,0) +# define bunler_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,0,0,p) +# define bunler_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_bunlei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunlei_f(i0,r0,i1) bunlei_f_p(i0,r0,i1,0) +# define bunlei_f_p(i0,r0,i1,p) _bunlei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunlei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunlei_d(i0,r0,i1) bunlei_d_p(i0,r0,i1,0) +# define bunlei_d_p(i0,r0,i1,p) _bunlei_d(_jit,i0,r0,i1,p) +# define bungtr_f(i0,r0,r1) bungtr_f_p(i0,r0,r1,0) +# define bungtr_d(i0,r0,r1) bungtr_d_p(i0,r0,r1,0) +# define bungtr_f_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,0,1,p) +# define bungtr_d_p(i0,r0,r1,p) _bler_f(_jit,i0,r0,r1,1,1,p) +static jit_word_t +_bungti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bungti_f(i0,r0,i1) bungti_f_p(i0,r0,i1,0) +# define bungti_f_p(i0,r0,i1,p) _bungti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bungti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bungti_d(i0,r0,i1) bungti_d_p(i0,r0,i1,0) +# define bungti_d_p(i0,r0,i1,p) _bungti_d(_jit,i0,r0,i1,p) +# define bunger_f(i0,r0,r1) bunger_f_p(i0,r1,r0,0) +# define bunger_d(i0,r0,r1) bunger_d_p(i0,r1,r0,0) +# define bunger_f_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,0,0,p) +# define bunger_d_p(i0,r0,r1,p) _bgtr_f(_jit,i0,r1,r0,1,0,p) +static jit_word_t +_bungei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bungei_f(i0,r0,i1) bungei_f_p(i0,r0,i1,0) +# define bungei_f_p(i0,r0,i1,p) _bungei_f(_jit,i0,r0,i1,p) +static jit_word_t +_bungei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bungei_d(i0,r0,i1) bungei_d_p(i0,r0,i1,0) +# define bungei_d_p(i0,r0,i1,p) _bungei_d(_jit,i0,r0,i1,p) +static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int16_t, + jit_int16_t,jit_bool_t,jit_bool_t); +# define buneqr_f(i0,r0,r1) buneqr_f_p(i0,r1,r0,0) +# define buneqr_d(i0,r0,r1) buneqr_d_p(i0,r1,r0,0) +# define buneqr_f_p(i0,r0,r1,p) _buneqr_f(_jit,i0,r1,r0,0,p) +# define buneqr_d_p(i0,r0,r1,p) _buneqr_f(_jit,i0,r1,r0,1,p) +static jit_word_t +_buneqi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define buneqi_f(i0,r0,i1) buneqi_f_p(i0,r0,i1,0) +# define buneqi_f_p(i0,r0,i1,p) _buneqi_f(_jit,i0,r0,i1,p) +static jit_word_t +_buneqi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define buneqi_d(i0,r0,i1) buneqi_d_p(i0,r0,i1,0) +# define buneqi_d_p(i0,r0,i1,p) _buneqi_d(_jit,i0,r0,i1,p) +static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int16_t, + jit_int16_t,jit_bool_t,jit_bool_t); +# define bltgtr_f(i0,r0,r1) bltgtr_f_p(i0,r1,r0,0) +# define bltgtr_d(i0,r0,r1) bltgtr_d_p(i0,r1,r0,0) +# define bltgtr_f_p(i0,r0,r1,p) _bltgtr_f(_jit,i0,r1,r0,0,p) +# define bltgtr_d_p(i0,r0,r1,p) _bltgtr_f(_jit,i0,r1,r0,1,p) +static jit_word_t +_bltgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bltgti_f(i0,r0,i1) bltgti_f_p(i0,r0,i1,0) +# define bltgti_f_p(i0,r0,i1,p) _bltgti_f(_jit,i0,r0,i1,p) +static jit_word_t +_bltgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bltgti_d(i0,r0,i1) bltgti_d_p(i0,r0,i1,0) +# define bltgti_d_p(i0,r0,i1,p) _bltgti_d(_jit,i0,r0,i1,p) +static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t, + jit_bool_t,jit_bool_t,jit_bool_t); +# define bordr_f(i0,r0,r1) bordr_f_p(i0,r0,r1,0) +# define bordr_d(i0,r0,r1) bordr_d_p(i0,r0,r1,0) +# define bordr_f_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,0,1,p) +# define bordr_d_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,1,1,p) +static jit_word_t +_bordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bordi_f(i0,r0,i1) bordi_f_p(i0,r0,i1,0) +# define bordi_f_p(i0,r0,i1,p) _bordi_f(_jit,i0,r0,i1,p) +static jit_word_t +_bordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bordi_d(i0,r0,i1) bordi_d_p(i0,r0,i1,0) +# define bordi_d_p(i0,r0,i1,p) _bordi_d(_jit,i0,r0,i1,p) +# define bunordr_f(i0,r0,r1) bunordr_f_p(i0,r0,r1,0) +# define bunordr_d(i0,r0,r1) bunordr_d_p(i0,r0,r1,0) +# define bunordr_f_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,0,0,p) +# define bunordr_d_p(i0,r0,r1,p) _bordr_f(_jit,i0,r0,r1,1,0,p) +static jit_word_t +_bunordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t); +# define bunordi_f(i0,r0,i1) bunordi_f_p(i0,r0,i1,0) +# define bunordi_f_p(i0,r0,i1,p) _bunordi_f(_jit,i0,r0,i1,p) +static jit_word_t +_bunordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t); +# define bunordi_d(i0,r0,i1) bunordi_d_p(i0,r0,i1,0) +# define bunordi_d_p(i0,r0,i1,p) _bunordi_d(_jit,i0,r0,i1,p) +# define ldxbi_f(r0,r1,i0) generic_ldxbi_f(r0,r1,i0) +# define ldxbi_d(r0,r1,i0) generic_ldxbi_d(r0,r1,i0) +static void +_ldxai_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t); +# define ldxai_f(r0,r1,i0) _ldxai_f(_jit,r0,r1,i0) +static void +_ldxai_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t); +# define ldxai_d(r0,r1,i0) _ldxai_d(_jit,r0,r1,i0) +static void +_stxbi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t); +# define stxbi_f(i0,r0,r1) _stxbi_f(_jit,i0,r0,r1) +static void +_stxbi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t); +# define stxbi_d(i0,r0,r1) _stxbi_d(_jit,i0,r0,r1) +# define stxai_f(i0,r0,r1) generic_stxai_f(i0,r0,r1) +# define stxai_d(i0,r0,r1) generic_stxai_d(i0,r0,r1) +static void _vaarg_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define vaarg_d(r0, r1) _vaarg_d(_jit, r0, r1) +#endif /* PROTO */ + +#if CODE +static void set_fmode_mask(jit_state_t *_jit, jit_uint32_t mask, jit_bool_t no_r0) +{ + jit_uint16_t reg, reg2; + + if (SH_HAS_FPU && _jitc->uses_fpu) { + if (no_r0) { + reg = jit_get_reg(jit_class_gpr); + reg2 = jit_get_reg(jit_class_gpr); + + movi(rn(reg2), mask); + STSFP(rn(reg)); + xorr(rn(reg), rn(reg), rn(reg2)); + LDSFP(rn(reg)); + + jit_unget_reg(reg); + jit_unget_reg(reg2); + } else { + STSFP(_R0); + SWAPW(_R0, _R0); + XORI(mask >> 16); + SWAPW(_R0, _R0); + LDSFP(_R0); + } + } +} + +static void set_fmode(jit_state_t *_jit, jit_bool_t is_double) +{ + if (SH_HAS_FPU && !SH_SINGLE_ONLY && _jitc->uses_fpu && _jitc->mode_d != is_double) { + set_fmode_mask(_jit, PR_FLAG, 0); + _jitc->mode_d = is_double; + } +} + +static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0) +{ + if (SH_HAS_FPU && _jitc->uses_fpu) { + if (_jitc->mode_d != SH_DEFAULT_FPU_MODE) + set_fmode_mask(_jit, PR_FLAG | FR_FLAG, no_r0); + else if (SH_DEFAULT_FPU_MODE) + set_fmode_mask(_jit, FR_FLAG, no_r0); + else + maybe_emit_frchg(); + + _jitc->mode_d = SH_DEFAULT_FPU_MODE; + } +} + +static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double) +{ + if (SH_HAS_FPU && _jitc->uses_fpu && !SH_SINGLE_ONLY && _jitc->mode_d != is_double) { + set_fmode_mask(_jit, PR_FLAG, 1); + _jitc->mode_d = is_double; + } +} + +static void _extr_f(jit_state_t *_jit, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + LDS(r1); + FLOAT(r0); +} + +static void _truncr_f_i(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FTRC(r1); + STSUL(r0); +} + +static void _fmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + reg = jit_get_reg(jit_class_fpr); + mulr_f(rn(reg), r1, r2); + addr_f(r0, rn(reg), r3); + } else if (r0 == r2) { + movr_f(rn(reg), r2); + movr_f(r0, r3); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + movr_f(r0, r3); + FMAC(r0, r2); + } + + jit_unget_reg(reg); +} + +static void _fmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + if (r0 == r3) { + reg = jit_get_reg(jit_class_fpr); + + mulr_d(rn(reg), r1, r2); + addr_d(r0, rn(reg), r3); + + jit_unget_reg(reg); + } else { + mulr_d(r0, r1, r2); + addr_d(r0, r0, r3); + } +} + +static void _fmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + reg = jit_get_reg(jit_class_fpr); + mulr_f(rn(reg), r1, r2); + subr_f(r0, rn(reg), r3); + } else if (r0 == r2) { + movr_f(rn(reg), r2); + movr_f(r0, r3); + FNEG(r0); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + movr_f(r0, r3); + FNEG(r0); + FMAC(r0, r2); + } + + jit_unget_reg(reg); +} + +static void _fmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + if (r0 == r3) { + reg = jit_get_reg(jit_class_fpr); + + mulr_d(rn(reg), r1, r2); + subr_d(r0, rn(reg), r3); + + jit_unget_reg(reg); + } else { + mulr_d(r0, r1, r2); + subr_d(r0, r0, r3); + } +} + +static void _fnmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk); + + if (reg == JIT_NOREG) { + fmsr_f(r0, r1, r2, r3); + negr_f(r0, r0); + } else { + if (r0 == r2) { + movr_f(rn(reg), r2); + FNEG(rn(reg)); + movr_f(r0, r3); + FMAC(r0, r1); + } else { + movr_f(rn(reg), r1); + FNEG(rn(reg)); + movr_f(r0, r3); + FMAC(r0, r2); + } + + jit_unget_reg(reg); + } +} + +static void _fnmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmsr_d(r0, r1, r2, r3); + negr_d(r0, r0); +} + +static void _fnmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmar_f(r0, r1, r2, r3); + negr_f(r0, r0); +} + +static void _fnmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_uint16_t r3) +{ + fmar_d(r0, r1, r2, r3); + negr_d(r0, r0); +} + +static void _movr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (r0 >= _XF0 || r1 >= _XF0) { + set_fmode(_jit, 0); + + if (r0 >= _XF0 && r1 >= _XF0) { + maybe_emit_frchg(); + FMOV(r0 - _XF0, r1 - _XF0); + FRCHG(); + } else if (r0 >= _XF0) { + FLDS(r1); + FRCHG(); + FSTS(r0 - _XF0); + FRCHG(); + } else { + maybe_emit_frchg(); + FLDS(r1 - _XF0); + FRCHG(); + FSTS(r0); + } + } else { + FMOV(r0, r1); + } + } +} + +static void _movr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (r0 != r1) { + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else if (r0 >= _XF0 || r1 >= _XF0) { + set_fmode(_jit, 0); + maybe_emit_fschg(); + + if (r0 >= _XF0 && r1 >= _XF0) + FMOVXX(r0 - _XF0, r1 - _XF0); + else if (r0 >= _XF0) + FMOVXD(r0 - _XF0, r1); + else + FMOVDX(r0, r1 - _XF0); + + FSCHG(); + } else { + FMOV(r0, r1); + FMOV(r0 + 1, r1 + 1); + } + } +} + +static void _movi_f(jit_state_t *_jit, jit_uint16_t r0, jit_float32_t i0) +{ + jit_bool_t is_bank = r0 >= _XF0; + + set_fmode(_jit, 0); + + if (is_bank) { + maybe_emit_frchg(); + r0 -= _XF0; + } + + if (i0 == 0.0f) { + FLDI0(r0); + } else if (i0 == -0.0f) { + FLDI0(r0); + FNEG(r0); + } else if (i0 == 1.0f) { + FLDI1(r0); + } else if (i0 == -1.0f) { + FLDI1(r0); + FNEG(r0); + } else { + load_const_f(0, r0, i0); + } + + if (is_bank) + FRCHG(); +} + +static void _movi_d(jit_state_t *_jit, jit_uint16_t r0, jit_float64_t i0) +{ + union fl64 { + struct { + jit_uint32_t hi; + jit_uint32_t lo; + }; + jit_float64_t f; + }; + + if (SH_SINGLE_ONLY) { + movi_f(r0, (jit_float32_t)i0); + } else if (r0 >= _XF0) { + set_fmode(_jit, 0); + maybe_emit_frchg(); + + movi_w_f(r0 + 1 - _XF0, ((union fl64)i0).hi); + movi_w_f(r0 - _XF0, ((union fl64)i0).lo); + + FRCHG(); + } else { + movi_w_f(r0 + 1, ((union fl64)i0).hi); + movi_w_f(r0, ((union fl64)i0).lo); + } +} + +static void _ltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FCMPGT(r2, r1); + MOVT(r0); +} + +static void +_lti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + + ltr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_lti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + + ltr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _ler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + set_fmode(_jit, is_double); + + MOVI(_R0, 0); + FCMPEQ(r1, r1); + BF(5); + FCMPEQ(r2, r2); + BF(3); + + FCMPGT(r1, r2); + MOVT(_R0); + BRA(13 + is_double); + XORI(1); + + if (is_double) + movr_w_f(rn(reg), _R0); + else + FLDI0(rn(reg)); + FCMPGT(rn(reg), r1); + MOVT(_R0); + FCMPGT(r1, rn(reg)); + ROTL(_R0); + TST(_R0, _R0); + BT(5); + + FCMPGT(rn(reg), r2); + MOVT(_R0); + FCMPGT(r2, rn(reg)); + ROTL(_R0); + TST(_R0, _R0); + BF(-18 - is_double); + + movr(r0, _R0); + + jit_unget_reg(reg); +} + +static void +_lei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ler_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_lei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ler_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _eqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, + jit_int16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + FCMPEQ(r1, r2); + MOVT(r0); +} + +static void +_eqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + eqr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_eqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + eqr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ger_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ger_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + gtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_gti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + gtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ner_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _eqr_f(_jit, _R0, r1, r2, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_nei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ner_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_nei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ner_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _ler_f(_jit, _R0, r2, r1, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_unlti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unltr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unlti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unltr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _ltr_f(_jit, _R0, r2, r1, is_double); + XORI(1); + movr(r0, _R0); +} + +static void +_unlei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unler_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unlei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unler_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ungtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ungtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unger_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ungei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unger_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _unler_f(_jit, rn(reg), r2, r1, is_double); + _unler_f(_jit, r0, r1, r2, is_double); + andr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + uneqr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_uneqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + uneqr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ltgtr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + _uneqr_f(_jit, r0, r1, r2, is_double); + xori(r0, r0, 1); +} + +static void +_ltgti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ltgtr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ltgti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ltgtr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _eqr_f(_jit, rn(reg), r1, r1, is_double); + _eqr_f(_jit, r0, r2, r2, is_double); + andr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + ordr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_ordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + ordr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2, + jit_bool_t is_double) +{ + jit_uint16_t reg = jit_get_reg(jit_class_gpr); + + _ner_f(_jit, rn(reg), r1, r1, is_double); + _ner_f(_jit, r0, r2, r2, is_double); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + unordr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_unordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + unordr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_addr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2, jit_bool_t is_double) +{ + set_fmode(_jit, is_double); + + if (r0 == r2) { + FADD(r0, r1); + } else { + if (is_double) + movr_d(r0, r1); + else + movr_f(r0, r1); + FADD(r0, r2); + } +} + +static void +_addi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + FADD(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + FADD(r0, r1); + } +} + +static void _addi_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + FADD(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + FADD(r0, r1); + } +} + +static void +_subr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r1 == r2) { + movi_f(r0, 0.0f); + } else if (r0 == r2) { + FNEG(r0); + FADD(r0, r1); + } else { + movr_f(r0, r1); + FSUB(r0, r2); + } +} + +static void +_subr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r1 == r2) { + movi_d(r0, 0.0); + } else if (r0 == r2) { + FNEG(r0); + FADD(r0, r1); + } else { + movr_d(r0, r1); + FSUB(r0, r2); + } +} + +static void +_subi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + FSUB(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, -i0); + FADD(r0, r1); + } +} + +static void +_subi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + FSUB(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, -i0); + FADD(r0, r1); + } +} + +static void +_rsbi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + subr_f(r0, rn(reg), r0); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + FSUB(r0, r1); + } +} + +static void +_rsbi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + subr_d(r0, rn(reg), r0); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + FSUB(r0, r1); + } +} + +static void +_mulr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + set_fmode(_jit, 0); + + if (r0 == r2) { + FMUL(r0, r1); + } else { + movr_f(r0, r1); + FMUL(r0, r2); + } +} + +static void +_muli_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + mulr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_f(r0, i0); + mulr_f(r0, r0, r1); + } +} + +static void +_mulr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + set_fmode(_jit, 1); + + if (r0 == r2) { + FMUL(r0, r1); + } else { + movr_d(r0, r1); + FMUL(r0, r2); + } +} + +static void +_muli_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + mulr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); + } else { + movi_d(r0, i0); + mulr_d(r0, r0, r1); + } +} + +static void +_divr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 0); + + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + + movr_f(rn(reg), r2); + movr_f(r0, r1); + FDIV(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movr_f(r0, r1); + FDIV(r0, r2); + } +} + +static void +_divi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i0); + divr_f(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void +_divr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2) +{ + jit_uint16_t reg; + + set_fmode(_jit, 1); + + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + + movr_d(rn(reg), r2); + movr_d(r0, r1); + FDIV(r0, rn(reg)); + + jit_unget_reg(reg); + } else { + movr_d(r0, r1); + FDIV(r0, r2); + } +} + +static void +_divi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0) +{ + jit_uint16_t reg; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i0); + divr_d(r0, r1, rn(reg)); + + jit_unget_reg(reg); +} + +static void _absr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FABS(r0); +} + +static void _absr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FABS(r0); +} + +static void _sqrtr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FSQRT(r0); +} + +static void _sqrtr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FSQRT(r0); +} + +static void _negr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 0); + + movr_f(r0, r1); + FNEG(r0); +} + +static void _negr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + set_fmode(_jit, 1); + + movr_d(r0, r1); + FNEG(r0); +} + +static void _extr_d_f(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else { + set_fmode(_jit, 1); + FCNVDS(r1); + set_fmode(_jit, 0); + FSTS(r0); + } +} + +static void _extr_f_d(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + movr_f(r0, r1); + } else { + set_fmode(_jit, 0); + FLDS(r1); + set_fmode(_jit, 1); + FCNVSD(r0); + } +} + +static void _ldr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + ldr_f(r0, r1); + } else { + movr(_R0, r1); + LDFS(r0 + 1, _R0); + LDF(r0, _R0); + } +} + +static void _ldi_f(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_f(r0, _R0); +} + +static void _ldi_d(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + ldr_d(r0, _R0); +} + +static void _ldxr_f(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_uint16_t r2) +{ + movr(_R0, r2); + LDXF(r0, r1); +} + +static void _ldxr_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_uint16_t r2) +{ + if (SH_SINGLE_ONLY) { + ldxr_f(r0, r1, r2); + } else { + addr(_R0, r1, r2); + ldr_d(r0, _R0); + } +} + +static void _ldxi_f(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_word_t i0) +{ + movi(_R0, i0); + ldxr_f(r0, r1, _R0); +} + +static void _ldxi_d(jit_state_t *_jit, jit_uint16_t r0, + jit_uint16_t r1, jit_word_t i0) +{ + movi(_R0, i0); + ldxr_d(r0, r1, _R0); +} + +static void _str_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1) +{ + if (SH_SINGLE_ONLY) { + str_f(r0, r1); + } else { + STF(r0, r1 + 1); + movi(_R0, 4); + STXF(r0, r1); + } +} + +static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + movi(_R0, i0); + STF(_R0, r0); +} + +static void _sti_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0) +{ + if (SH_SINGLE_ONLY) { + sti_f(i0, r0); + } else { + movi(_R0, i0 + 8); + STFS(_R0, r0); + STFS(_R0, r0 + 1); + } +} + +static void _stxr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2) +{ + movr(_R0, r0); + STXF(r1, r2); +} + +static void _stxr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, + jit_uint16_t r2) +{ + if (SH_SINGLE_ONLY) { + stxr_f(r0, r1, r2); + } else { + movr(_R0, r0); + STXF(r1, r2 + 1); + addi(_R0, _R0, 4); + STXF(r1, r2); + } +} + +static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1) +{ + movi(_R0, i0); + stxr_f(_R0, r0, r1); +} + +static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1) +{ + movi(_R0, i0); + stxr_d(_R0, r0, r1); +} + +static jit_word_t _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_uint16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPEQ(r0, r1); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_float32_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + jit_uint16_t reg; + + set_fmode(_jit, 0); + + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i1); + + FCMPEQ(r0, rn(reg)); + jit_unget_reg(reg); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _beqi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, + jit_float64_t i1, jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + jit_uint16_t reg; + + set_fmode(_jit, 1); + + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i1); + + FCMPEQ(r0, rn(reg)); + jit_unget_reg(reg); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPGT(r0, r1); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_blti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bltr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_blti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bltr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bgtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bgtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + set_fmode(_jit, is_double); + + FCMPGT(r1, r0); + MOVT(_R0); + FCMPEQ(r0, r1); + ROTCL(_R0); + TSTI(3); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, set, p); + + return (w); +} + +static jit_word_t +_blei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bler_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_blei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bler_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bger_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bgei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bger_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, jit_bool_t p) +{ + jit_word_t w; + + _uneqr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, jit_bool_t p) +{ + jit_word_t w; + + _ltgtr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, 0, p); + + return (w); +} + +static jit_word_t _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_int16_t r1, jit_bool_t is_double, + jit_bool_t set, jit_bool_t p) +{ + jit_word_t w; + + _ordr_f(_jit, _R0, r0, r1, is_double); + TST(_R0, _R0); + + set_fmode(_jit, SH_DEFAULT_FPU_MODE); + + w = _jit->pc.w; + emit_branch_opcode(_jit, i0, w, !set, p); + + return (w); +} + +static jit_word_t +_bunlti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunltr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunltr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunler_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunlei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunler_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bungtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bungtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunger_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bungei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunger_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_buneqi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = buneqr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_buneqi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = buneqr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bltgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bltgtr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bltgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bltgtr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bordr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bordr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float32_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_f(rn(reg), i1); + w = bunordr_f_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static jit_word_t +_bunordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, + jit_float64_t i1, jit_bool_t p) +{ + jit_uint16_t reg; + jit_word_t w; + + reg = jit_get_reg(jit_class_fpr); + + movi_d(rn(reg), i1); + w = bunordr_d_p(i0, r0, rn(reg), p); + + jit_unget_reg(reg); + + return w; +} + +static void +_ldxai_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0) +{ + if (i0 == 4) + LDFS(r0, r1); + else + generic_ldxai_f(r0, r1, i0); +} + +static void +_ldxai_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0) +{ + if (SH_SINGLE_ONLY) { + ldxai_f(r0, r1, i0); + } else if (i0 == 8) { + LDFS(r0 + 1, r1); + LDFS(r0, r1); + } else { + generic_ldxai_d(r0, r1, i0); + } +} + +static void +_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1) +{ + if (i0 == -4) + STFS(r0, r1); + else + generic_stxbi_f(i0, r0, r1); +} + +static void +_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1) +{ + if (SH_SINGLE_ONLY) { + stxbi_f(i0, r0, r1); + } else if (i0 == -8) { + STFS(r0, r1); + STFS(r0, r1 + 1); + } else { + generic_stxbi_d(i0, r0, r1); + } +} + +static void _movr_w_f(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1) +{ + LDS(r1); + FSTS(r0); +} + +static void _movr_f_w(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1) +{ + FLDS(r1); + STSUL(r0); +} + +static void _movi_w_f(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0) +{ + movi(_R0, i0); + movr_w_f(r0, _R0); +} + +static void _movr_ww_d(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2) +{ + /* TODO: single-only */ + movr_w_f(r0 + 1, r1); + movr_w_f(r0, r2); +} + +static void _movr_d_ww(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2) +{ + /* TODO: single-only */ + movr_f_w(r0, r2 + 1); + movr_f_w(r1, r2); +} + +static void _movi_ww_d(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0, jit_word_t i1) +{ + /* TODO: single-only */ + movi_w_f(r0, i1); + movi_w_f(r0 + 1, i0); +} + +static void +_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t rg0, rg1; + jit_word_t ge_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg(jit_class_gpr); + rg1 = jit_get_reg(jit_class_gpr); + + /* Load begin/end gpr pointers */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, efpr)); + movi(_R0, offsetof(jit_va_list_t, bfpr)); + ldxr(rn(rg0), r1, _R0); + + /* Check that we didn't reach the end gpr pointer. */ + CMPHS(rn(rg0), rn(rg1)); + + ge_code = _jit->pc.w; + BF(0); + + /* If we did, load the stack pointer instead. */ + movi(_R0, offsetof(jit_va_list_t, over)); + ldxr(rn(rg0), r1, _R0); + + patch_at(ge_code, _jit->pc.w); + + /* All good, we can now load the actual value */ + ldxai_d(r0, rn(rg0), sizeof(jit_float64_t)); + + /* Update the pointer (gpr or stack) to the next word */ + stxr(_R0, r1, rn(rg0)); + + jit_unget_reg(rg0); + jit_unget_reg(rg1); +} + +#endif /* CODE */ diff --git a/deps/lightning/lib/jit_sh-sz.c b/deps/lightning/lib/jit_sh-sz.c new file mode 100644 index 000000000..0b02cbf61 --- /dev/null +++ b/deps/lightning/lib/jit_sh-sz.c @@ -0,0 +1,598 @@ +#define JIT_INSTR_MAX 116 + 0, /* data */ + 0, /* live */ + 4, /* align */ + 0, /* save */ + 0, /* load */ + 4, /* skip */ + 0, /* #name */ + 0, /* #note */ + 0, /* label */ + 40, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 10, /* addi */ + 6, /* addcr */ + 12, /* addci */ + 4, /* addxr */ + 10, /* addxi */ + 4, /* subr */ + 10, /* subi */ + 8, /* subcr */ + 14, /* subci */ + 6, /* subxr */ + 12, /* subxi */ + 10, /* rsbi */ + 4, /* mulr */ + 10, /* muli */ + 6, /* qmulr */ + 12, /* qmuli */ + 6, /* qmulr_u */ + 12, /* qmuli_u */ + 34, /* divr */ + 40, /* divi */ + 24, /* divr_u */ + 30, /* divi_u */ + 44, /* qdivr */ + 50, /* qdivi */ + 34, /* qdivr_u */ + 40, /* qdivi_u */ + 44, /* remr */ + 50, /* remi */ + 34, /* remr_u */ + 40, /* remi_u */ + 4, /* andr */ + 10, /* andi */ + 4, /* orr */ + 10, /* ori */ + 4, /* xorr */ + 10, /* xori */ +# if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) + 6, /* lshr */ + 12, /* lshi */ + 6, /* rshr */ + 12, /* rshi */ + 6, /* rshr_u */ + 12, /* rshi_u */ +#else + 16, /* lshr */ + 22, /* lshi */ + 14, /* rshr */ + 20, /* rshi */ + 14, /* rshr_u */ + 20, /* rshi_u */ +#endif + 2, /* negr */ + 4, /* negi */ + 2, /* comr */ + 4, /* comi */ + 4, /* ltr */ + 4, /* lti */ + 4, /* ltr_u */ + 4, /* lti_u */ + 4, /* ler */ + 10, /* lei */ + 4, /* ler_u */ + 10, /* lei_u */ + 4, /* eqr */ + 10, /* eqi */ + 4, /* ger */ + 10, /* gei */ + 4, /* ger_u */ + 10, /* gei_u */ + 4, /* gtr */ + 10, /* gti */ + 4, /* gtr_u */ + 10, /* gti_u */ + 6, /* ner */ + 12, /* nei */ + 2, /* movr */ + 6, /* movi */ + 6, /* movnr */ + 6, /* movzr */ + 24, /* casr */ + 30, /* casi */ + 2, /* extr_c */ + 4, /* exti_c */ + 2, /* extr_uc */ + 4, /* exti_uc */ + 2, /* extr_s */ + 4, /* exti_s */ + 2, /* extr_us */ + 4, /* exti_us */ + 4, /* extr_i */ + 0, /* exti_i */ + 8, /* extr_ui */ + 0, /* exti_ui */ + 4, /* bswapr_us */ + 4, /* bswapi_us */ + 6, /* bswapr_ui */ + 8, /* bswapi_ui */ + 0, /* bswapr_ul */ + 0, /* bswapi_ul */ + 4, /* htonr_us */ + 4, /* htoni_us */ + 6, /* htonr_ui */ + 8, /* htoni_ui */ + 0, /* htonr_ul */ + 0, /* htoni_ul */ + 2, /* ldr_c */ + 12, /* ldi_c */ + 4, /* ldr_uc */ + 12, /* ldi_uc */ + 2, /* ldr_s */ + 12, /* ldi_s */ + 4, /* ldr_us */ + 12, /* ldi_us */ + 2, /* ldr_i */ + 12, /* ldi_i */ + 2, /* ldr_ui */ + 12, /* ldi_ui */ + 0, /* ldr_l */ + 0, /* ldi_l */ + 4, /* ldxr_c */ + 16, /* ldxi_c */ + 6, /* ldxr_uc */ + 16, /* ldxi_uc */ + 4, /* ldxr_s */ + 16, /* ldxi_s */ + 6, /* ldxr_us */ + 16, /* ldxi_us */ + 4, /* ldxr_i */ + 16, /* ldxi_i */ + 4, /* ldxr_ui */ + 16, /* ldxi_ui */ + 0, /* ldxr_l */ + 0, /* ldxi_l */ + 2, /* str_c */ + 12, /* sti_c */ + 2, /* str_s */ + 12, /* sti_s */ + 2, /* str_i */ + 12, /* sti_i */ + 0, /* str_l */ + 0, /* sti_l */ + 4, /* stxr_c */ + 16, /* stxi_c */ + 4, /* stxr_s */ + 16, /* stxi_s */ + 4, /* stxr_i */ + 16, /* stxi_i */ + 0, /* stxr_l */ + 0, /* stxi_l */ + 6, /* bltr */ + 8, /* blti */ + 6, /* bltr_u */ + 8, /* blti_u */ + 6, /* bler */ + 8, /* blei */ + 6, /* bler_u */ + 8, /* blei_u */ + 6, /* beqr */ + 28, /* beqi */ + 6, /* bger */ + 8, /* bgei */ + 6, /* bger_u */ + 8, /* bgei_u */ + 6, /* bgtr */ + 8, /* bgti */ + 6, /* bgtr_u */ + 8, /* bgti_u */ + 6, /* bner */ + 20, /* bnei */ + 6, /* bmsr */ + 12, /* bmsi */ + 6, /* bmcr */ + 12, /* bmci */ + 8, /* boaddr */ + 36, /* boaddi */ + 8, /* boaddr_u */ + 20, /* boaddi_u */ + 8, /* bxaddr */ + 36, /* bxaddi */ + 8, /* bxaddr_u */ + 20, /* bxaddi_u */ + 10, /* bosubr */ + 36, /* bosubi */ + 8, /* bosubr_u */ + 20, /* bosubi_u */ + 10, /* bxsubr */ + 36, /* bxsubi */ + 8, /* bxsubr_u */ + 20, /* bxsubi_u */ + 4, /* jmpr */ + 10, /* jmpi */ + 4, /* callr */ + 10, /* calli */ + 0, /* prepare */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 22, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 4, /* addr_f */ + 12, /* addi_f */ + 4, /* subr_f */ + 12, /* subi_f */ + 12, /* rsbi_f */ + 4, /* mulr_f */ + 12, /* muli_f */ + 4, /* divr_f */ + 12, /* divi_f */ + 4, /* negr_f */ + 0, /* negi_f */ + 4, /* absr_f */ + 0, /* absi_f */ + 4, /* sqrtr_f */ + 0, /* sqrti_f */ + 4, /* ltr_f */ + 12, /* lti_f */ + 4, /* ler_f */ + 12, /* lei_f */ + 4, /* eqr_f */ + 12, /* eqi_f */ + 4, /* ger_f */ + 12, /* gei_f */ + 4, /* gtr_f */ + 12, /* gti_f */ + 8, /* ner_f */ + 16, /* nei_f */ + 28, /* unltr_f */ + 36, /* unlti_f */ + 28, /* unler_f */ + 36, /* unlei_f */ + 28, /* uneqr_f */ + 36, /* uneqi_f */ + 28, /* unger_f */ + 36, /* ungei_f */ + 28, /* ungtr_f */ + 36, /* ungti_f */ + 40, /* ltgtr_f */ + 48, /* ltgti_f */ + 28, /* ordr_f */ + 36, /* ordi_f */ + 20, /* unordr_f */ + 28, /* unordi_f */ + 4, /* truncr_f_i */ + 4, /* truncr_f_l */ + 4, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 8, /* movi_f */ + 4, /* ldr_f */ + 12, /* ldi_f */ + 8, /* ldxr_f */ + 16, /* ldxi_f */ + 4, /* str_f */ + 12, /* sti_f */ + 8, /* stxr_f */ + 16, /* stxi_f */ + 8, /* bltr_f */ + 16, /* blti_f */ + 8, /* bler_f */ + 16, /* blei_f */ + 8, /* beqr_f */ + 16, /* beqi_f */ + 8, /* bger_f */ + 16, /* bgei_f */ + 8, /* bgtr_f */ + 16, /* bgti_f */ + 8, /* bner_f */ + 16, /* bnei_f */ + 32, /* bunltr_f */ + 40, /* bunlti_f */ + 32, /* bunler_f */ + 40, /* bunlei_f */ + 32, /* buneqr_f */ + 40, /* buneqi_f */ + 32, /* bunger_f */ + 40, /* bungei_f */ + 32, /* bungtr_f */ + 40, /* bungti_f */ + 44, /* bltgtr_f */ + 52, /* bltgti_f */ + 32, /* bordr_f */ + 40, /* bordi_f */ + 24, /* bunordr_f */ + 32, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 24, /* addi_d */ + 4, /* subr_d */ + 24, /* subi_d */ + 24, /* rsbi_d */ + 4, /* mulr_d */ + 24, /* muli_d */ + 4, /* divr_d */ + 24, /* divi_d */ + 4, /* negr_d */ + 0, /* negi_d */ + 4, /* absr_d */ + 0, /* absi_d */ + 4, /* sqrtr_d */ + 0, /* sqrti_d */ + 4, /* ltr_d */ + 24, /* lti_d */ + 4, /* ler_d */ + 24, /* lei_d */ + 4, /* eqr_d */ + 24, /* eqi_d */ + 4, /* ger_d */ + 24, /* gei_d */ + 4, /* gtr_d */ + 24, /* gti_d */ + 8, /* ner_d */ + 28, /* nei_d */ + 28, /* unltr_d */ + 48, /* unlti_d */ + 28, /* unler_d */ + 48, /* unlei_d */ + 28, /* uneqr_d */ + 48, /* uneqi_d */ + 28, /* unger_d */ + 48, /* ungei_d */ + 28, /* ungtr_d */ + 48, /* ungti_d */ + 40, /* ltgtr_d */ + 60, /* ltgti_d */ + 28, /* ordr_d */ + 48, /* ordi_d */ + 20, /* unordr_d */ + 40, /* unordi_d */ + 4, /* truncr_d_i */ + 4, /* truncr_d_l */ + 4, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 20, /* movi_d */ + 4, /* ldr_d */ + 12, /* ldi_d */ + 8, /* ldxr_d */ + 16, /* ldxi_d */ + 4, /* str_d */ + 12, /* sti_d */ + 8, /* stxr_d */ + 16, /* stxi_d */ + 8, /* bltr_d */ + 28, /* blti_d */ + 8, /* bler_d */ + 28, /* blei_d */ + 8, /* beqr_d */ + 28, /* beqi_d */ + 8, /* bger_d */ + 28, /* bgei_d */ + 8, /* bgtr_d */ + 28, /* bgti_d */ + 8, /* bner_d */ + 28, /* bnei_d */ + 32, /* bunltr_d */ + 52, /* bunlti_d */ + 32, /* bunler_d */ + 52, /* bunlei_d */ + 32, /* buneqr_d */ + 52, /* buneqi_d */ + 32, /* bunger_d */ + 52, /* bungei_d */ + 32, /* bungtr_d */ + 52, /* bungti_d */ + 44, /* bltgtr_d */ + 64, /* bltgti_d */ + 32, /* bordr_d */ + 52, /* bordi_d */ + 24, /* bunordr_d */ + 44, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 4, /* movr_w_f */ + 8, /* movi_w_f */ + 0, /* movr_ww_d */ + 16, /* movi_ww_d */ + 4, /* movr_w_d */ + 0, /* movi_w_d */ + 0, /* movr_f_w */ + 4, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 4, /* movr_d_w */ + 16, /* movi_d_w */ + 10, /* clor */ + 6, /* cloi */ + 12, /* clzr */ + 6, /* clzi */ + 10, /* ctor */ + 6, /* ctoi */ + 12, /* ctzr */ + 6, /* ctzi */ + 14, /* rbitr */ + 6, /* rbiti */ + 14, /* popcntr */ + 6, /* popcnti */ + 14, /* lrotr */ + 14, /* lroti */ + 14, /* rrotr */ + 14, /* rroti */ + 8, /* extr */ + 6, /* exti */ + 4, /* extr_u */ + 6, /* exti_u */ + 4, /* depr */ + 10, /* depi */ + 18, /* qlshr */ + 8, /* qlshi */ + 18, /* qlshr_u */ + 8, /* qlshi_u */ + 18, /* qrshr */ + 8, /* qrshi */ + 18, /* qrshr_u */ + 8, /* qrshi_u */ + 16, /* unldr */ + 20, /* unldi */ + 16, /* unldr_u */ + 20, /* unldi_u */ + 44, /* unstr */ + 28, /* unsti */ + 32, /* unldr_x */ + 40, /* unldi_x */ + 28, /* unstr_x */ + 40, /* unsti_x */ + 4, /* fmar_f */ + 0, /* fmai_f */ + 4, /* fmsr_f */ + 0, /* fmsi_f */ + 4, /* fmar_d */ + 0, /* fmai_d */ + 4, /* fmsr_d */ + 0, /* fmsi_d */ + 4, /* fnmar_f */ + 0, /* fnmai_f */ + 4, /* fnmsr_f */ + 0, /* fnmsi_f */ + 4, /* fnmar_d */ + 0, /* fnmai_d */ + 4, /* fnmsr_d */ + 0, /* fnmsi_d */ + 8, /* hmulr */ + 16, /* hmuli */ + 8, /* hmulr_u */ + 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 18, /* ldxbi_c */ + 12, /* ldxar_c */ + 18, /* ldxai_c */ + 4, /* ldxbr_uc */ + 18, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 18, /* ldxai_uc */ + 4, /* ldxbr_s */ + 18, /* ldxbi_s */ + 8, /* ldxar_s */ + 18, /* ldxai_s */ + 4, /* ldxbr_us */ + 18, /* ldxbi_us */ + 8, /* ldxar_us */ + 18, /* ldxai_us */ + 4, /* ldxbr_i */ + 18, /* ldxbi_i */ + 8, /* ldxar_i */ + 18, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 4, /* ldxbr_f */ + 18, /* ldxbi_f */ + 8, /* ldxar_f */ + 18, /* ldxai_f */ + 4, /* ldxbr_d */ + 18, /* ldxbi_d */ + 8, /* ldxar_d */ + 18, /* ldxai_d */ + 4, /* stxbr_c */ + 18, /* stxbi_c */ + 8, /* stxar_c */ + 18, /* stxai_c */ + 4, /* stxbr_s */ + 18, /* stxbi_s */ + 8, /* stxar_s */ + 18, /* stxai_s */ + 4, /* stxbr_i */ + 18, /* stxbi_i */ + 8, /* stxar_i */ + 18, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 4, /* stxbr_f */ + 18, /* stxbi_f */ + 8, /* stxar_f */ + 18, /* stxai_f */ + 4, /* stxbr_d */ + 18, /* stxbi_d */ + 8, /* stxar_d */ + 18, /* stxai_d */ diff --git a/deps/lightning/lib/jit_sh.c b/deps/lightning/lib/jit_sh.c new file mode 100644 index 000000000..9806f1472 --- /dev/null +++ b/deps/lightning/lib/jit_sh.c @@ -0,0 +1,2215 @@ +/* + * Copyright (C) 2022 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paul Cercueil + */ + +# define NUM_WORD_ARGS 4 +# define NUM_FLOAT_ARGS 8 +# define STACK_SLOT 4 +# define STACK_SHIFT 2 + +#define jit_arg_reg_p(i) ((i) >= 0 && (i) < NUM_WORD_ARGS) +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < NUM_FLOAT_ARGS) + +#define fpr_args_inverted() (__BYTE_ORDER == __LITTLE_ENDIAN && !SH_SINGLE_ONLY) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define C_DISP 0 +# define S_DISP 0 +# define I_DISP 0 +#else +# define C_DISP STACK_SLOT - sizeof(jit_int8_t) +# define S_DISP STACK_SLOT - sizeof(jit_int16_t) +# define I_DISP STACK_SLOT - sizeof(jit_int32_t) +#endif + +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); +#define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) +static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); +#define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) +static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*); +#define load_const(uniq,r0,i0) _load_const(_jit,uniq,r0,i0) +static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t); +#define load_const_f(uniq,r0,i0) _load_const_f(_jit,uniq,r0,i0) +static void _load_const_f(jit_state_t*,jit_bool_t,jit_int32_t,jit_float32_t); +#define flush_consts(force) _flush_consts(_jit,force) +static void _flush_consts(jit_state_t*,jit_bool_t); +#define invalidate_consts() _invalidate_consts(_jit) +static void _invalidate_consts(jit_state_t*); +#define patch(instr, node) _patch(_jit, instr, node) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*); + +#define PROTO 1 +# include "jit_rewind.c" +# include "jit_sh-cpu.c" +# include "jit_sh-fpu.c" +# include "jit_fallback.c" +#undef PROTO + +jit_register_t _rvs[] = { + { 0x0, "r0" }, + { rc(gpr) | 0x1, "r1" }, + { rc(gpr) | 0x2, "r2" }, + { rc(gpr) | 0x3, "r3" }, + { rc(arg) | rc(gpr) | 0x4, "r4" }, + { rc(arg) | rc(gpr) | 0x5, "r5" }, + { rc(arg) | rc(gpr) | 0x6, "r6" }, + { rc(arg) | rc(gpr) | 0x7, "r7" }, + { rc(sav) | rc(gpr) | 0x8, "r8" }, + { rc(sav) | rc(gpr) | 0x9, "r9" }, + { rc(sav) | rc(gpr) | 0xa, "r10" }, + { rc(sav) | rc(gpr) | 0xb, "r11" }, + { rc(sav) | rc(gpr) | 0xc, "r12" }, + { rc(sav) | rc(gpr) | 0xd, "r13" }, + { rc(sav) | 0xe, "r14" }, + { rc(sav) | 0xf, "r15" }, + { 0x10, "gbr" }, + + /* Only use half of the floating-point registers. + * This makes it much easier to switch between + * float and double processing. */ + { rc(fpr) | 0x0, "$f0" }, + { 0x1, "$f1" }, + { rc(fpr) | 0x2, "$f2" }, + { 0x3, "$f3" }, + { rc(fpr) | 0x4, "$f4" }, + { 0x5, "$f5" }, + { rc(fpr) | 0x6, "$f6" }, + { 0x7, "$f7" }, + { rc(fpr) | 0x8, "$f8" }, + { 0x9, "$f9" }, + { rc(fpr) | 0xa, "$f10" }, + { 0xb, "$f11" }, + { rc(fpr) | 0xc, "$f12" }, + { 0xd, "$f13" }, + { rc(fpr) | 0xe, "$f14" }, + { 0xf, "$f15" }, + + { _XF0, "$xf0" }, + { _XF1, "$xf1" }, + { _XF2, "$xf2" }, + { _XF3, "$xf3" }, + { _XF4, "$xf4" }, + { _XF5, "$xf5" }, + { _XF6, "$xf6" }, + { _XF7, "$xf7" }, + { _XF8, "$xf8" }, + { _XF9, "$xf9" }, + { _XF10, "$xf10" }, + { _XF11, "$xf11" }, + { rc(sav) | _XF12, "$xf12" }, + { rc(sav) | _XF13, "$xf13" }, + { rc(sav) | _XF14, "$xf14" }, + { rc(sav) | _XF15, "$xf15" }, +}; + +typedef struct jit_va_list { + jit_pointer_t bgpr; + jit_pointer_t egpr; + jit_pointer_t bfpr; + jit_pointer_t efpr; + jit_pointer_t over; +} jit_va_list_t; + +static jit_bool_t jit_uses_fpu(jit_code_t code) +{ + switch (code) { + case jit_code_retr_f: + case jit_code_retr_d: + case jit_code_pushargr_f: + case jit_code_pushargr_d: + case jit_code_reti_f: + case jit_code_pushargi_f: + case jit_code_reti_d: + case jit_code_pushargi_d: + case jit_code_arg_f: + case jit_code_arg_d: + case jit_code_retval_f: + case jit_code_retval_d: + case jit_code_getarg_f: + case jit_code_getarg_d: + case jit_code_putargr_f: + case jit_code_putargr_d: + case jit_code_putargi_f: + case jit_code_putargi_d: + case jit_code_ldi_f: + case jit_code_ldi_d: + case jit_code_movi_w_f: + case jit_code_movi_w_d: + case jit_code_movi_ww_d: + case jit_code_movi_f: + case jit_code_movi_f_w: + case jit_code_negi_f: + case jit_code_absi_f: + case jit_code_sqrti_f: + case jit_code_movi_d: + case jit_code_movi_d_w: + case jit_code_negi_d: + case jit_code_absi_d: + case jit_code_sqrti_d: + case jit_code_truncr_f_i: + case jit_code_truncr_f_l: + case jit_code_truncr_d_i: + case jit_code_truncr_d_l: + case jit_code_negr_f: + case jit_code_absr_f: + case jit_code_sqrtr_f: + case jit_code_movr_f: + case jit_code_extr_f: + case jit_code_extr_d_f: + case jit_code_ldr_f: + case jit_code_negr_d: + case jit_code_absr_d: + case jit_code_sqrtr_d: + case jit_code_movr_d: + case jit_code_extr_d: + case jit_code_extr_f_d: + case jit_code_ldr_d: + case jit_code_movr_w_f: + case jit_code_movr_f_w: + case jit_code_movr_w_d: + case jit_code_movr_d_w: + case jit_code_va_arg_d: + case jit_code_ldxi_f: + case jit_code_ldxi_d: + case jit_code_addi_f: + case jit_code_subi_f: + case jit_code_rsbi_f: + case jit_code_muli_f: + case jit_code_divi_f: + case jit_code_lti_f: + case jit_code_lei_f: + case jit_code_eqi_f: + case jit_code_gei_f: + case jit_code_gti_f: + case jit_code_nei_f: + case jit_code_unlti_f: + case jit_code_unlei_f: + case jit_code_uneqi_f: + case jit_code_ungei_f: + case jit_code_ungti_f: + case jit_code_ltgti_f: + case jit_code_ordi_f: + case jit_code_unordi_f: + case jit_code_addi_d: + case jit_code_subi_d: + case jit_code_rsbi_d: + case jit_code_muli_d: + case jit_code_divi_d: + case jit_code_lti_d: + case jit_code_lei_d: + case jit_code_eqi_d: + case jit_code_gei_d: + case jit_code_gti_d: + case jit_code_nei_d: + case jit_code_unlti_d: + case jit_code_unlei_d: + case jit_code_uneqi_d: + case jit_code_ungei_d: + case jit_code_ungti_d: + case jit_code_ltgti_d: + case jit_code_ordi_d: + case jit_code_unordi_d: + case jit_code_addr_f: + case jit_code_subr_f: + case jit_code_mulr_f: + case jit_code_divr_f: + case jit_code_ltr_f: + case jit_code_ler_f: + case jit_code_eqr_f: + case jit_code_ger_f: + case jit_code_gtr_f: + case jit_code_ner_f: + case jit_code_unltr_f: + case jit_code_unler_f: + case jit_code_uneqr_f: + case jit_code_unger_f: + case jit_code_ungtr_f: + case jit_code_ltgtr_f: + case jit_code_ordr_f: + case jit_code_unordr_f: + case jit_code_ldxr_f: + case jit_code_addr_d: + case jit_code_subr_d: + case jit_code_mulr_d: + case jit_code_divr_d: + case jit_code_ltr_d: + case jit_code_ler_d: + case jit_code_eqr_d: + case jit_code_ger_d: + case jit_code_gtr_d: + case jit_code_ner_d: + case jit_code_unltr_d: + case jit_code_unler_d: + case jit_code_uneqr_d: + case jit_code_unger_d: + case jit_code_ungtr_d: + case jit_code_ltgtr_d: + case jit_code_ordr_d: + case jit_code_unordr_d: + case jit_code_ldxr_d: + case jit_code_movr_ww_d: + case jit_code_sti_f: + case jit_code_sti_d: + case jit_code_blti_f: + case jit_code_blei_f: + case jit_code_beqi_f: + case jit_code_bgei_f: + case jit_code_bgti_f: + case jit_code_bnei_f: + case jit_code_bunlti_f: + case jit_code_bunlei_f: + case jit_code_buneqi_f: + case jit_code_bungei_f: + case jit_code_bungti_f: + case jit_code_bltgti_f: + case jit_code_bordi_f: + case jit_code_bunordi_f: + case jit_code_blti_d: + case jit_code_blei_d: + case jit_code_beqi_d: + case jit_code_bgei_d: + case jit_code_bgti_d: + case jit_code_bnei_d: + case jit_code_bunlti_d: + case jit_code_bunlei_d: + case jit_code_buneqi_d: + case jit_code_bungei_d: + case jit_code_bungti_d: + case jit_code_bltgti_d: + case jit_code_bordi_d: + case jit_code_bunordi_d: + case jit_code_str_f: + case jit_code_str_d: + case jit_code_stxi_f: + case jit_code_stxi_d: + case jit_code_bltr_f: + case jit_code_bler_f: + case jit_code_beqr_f: + case jit_code_bger_f: + case jit_code_bgtr_f: + case jit_code_bner_f: + case jit_code_bunltr_f: + case jit_code_bunler_f: + case jit_code_buneqr_f: + case jit_code_bunger_f: + case jit_code_bungtr_f: + case jit_code_bltgtr_f: + case jit_code_bordr_f: + case jit_code_bunordr_f: + case jit_code_bltr_d: + case jit_code_bler_d: + case jit_code_beqr_d: + case jit_code_bger_d: + case jit_code_bgtr_d: + case jit_code_bner_d: + case jit_code_bunltr_d: + case jit_code_bunler_d: + case jit_code_buneqr_d: + case jit_code_bunger_d: + case jit_code_bungtr_d: + case jit_code_bltgtr_d: + case jit_code_bordr_d: + case jit_code_bunordr_d: + case jit_code_stxr_f: + case jit_code_stxr_d: + case jit_code_fmar_f: + case jit_code_fmar_d: + case jit_code_fmsr_f: + case jit_code_fmsr_d: + case jit_code_fnmar_f: + case jit_code_fnmar_d: + case jit_code_fnmsr_f: + case jit_code_fnmsr_d: + case jit_code_fmai_f: + case jit_code_fmsi_f: + case jit_code_fnmai_f: + case jit_code_fnmsi_f: + case jit_code_fmai_d: + case jit_code_fmsi_d: + case jit_code_fnmai_d: + case jit_code_fnmsi_d: + case jit_code_ldxbi_f: + case jit_code_ldxai_f: + case jit_code_ldxbi_d: + case jit_code_ldxai_d: + case jit_code_ldxbr_f: + case jit_code_ldxar_f: + case jit_code_ldxbr_d: + case jit_code_ldxar_d: + case jit_code_stxbi_f: + case jit_code_stxai_f: + case jit_code_stxbi_d: + case jit_code_stxai_d: + case jit_code_stxbr_f: + case jit_code_stxar_f: + case jit_code_stxbr_d: + case jit_code_stxar_d: + return 1; + default: + return 0; + } +} + +void +jit_get_cpu(void) +{ +} + +void +_jit_init(jit_state_t *_jit) +{ + _jitc->reglen = jit_size(_rvs) - 1; +} + +void +_jit_prolog(jit_state_t *_jit) +{ + jit_int32_t offset; + + if (_jitc->function) + jit_epilog(); + + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + + jit_regset_set_ui(&_jitc->regsav, 0); + offset = _jitc->functions.offset; + + if (offset >= _jitc->functions.length) { + jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, + _jitc->functions.length * sizeof(jit_function_t), + (_jitc->functions.length + 16) * sizeof(jit_function_t)); + _jitc->functions.length += 16; + } + + _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; + _jitc->function->self.size = stack_framesize; + _jitc->function->self.argi = _jitc->function->self.argf = + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + _jitc->function->self.call = jit_call_default; + + jit_alloc((jit_pointer_t *)&_jitc->function->regoff, + _jitc->reglen * sizeof(jit_int32_t)); + + /* _no_link here does not mean the jit_link() call can be removed + * by rewriting as: + * _jitc->function->prolog = jit_new_node(jit_code_prolog); + */ + _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); + jit_link(_jitc->function->prolog); + + _jitc->function->prolog->w.w = offset; + _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); + /* u: label value + * v: offset in blocks vector + * w: offset in functions vector + */ + _jitc->function->epilog->w.w = offset; + + jit_regset_new(&_jitc->function->regset); +} + +jit_int32_t +_jit_allocai(jit_state_t *_jit, jit_int32_t length) +{ + assert(_jitc->function); + switch (length) { + case 0: case 1: break; + case 2: _jitc->function->self.aoff &= -2; break; + case 3: case 4: _jitc->function->self.aoff &= -4; break; + default: _jitc->function->self.aoff &= -8; break; + } + _jitc->function->self.aoff -= length; + if (!_jitc->realize) { + jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); + jit_dec_synth(); + } + return (_jitc->function->self.aoff); +} + +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + jit_inc_synth_ww(allocar, u, v); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); + jit_dec_synth(); +} + +void +_jit_ret(jit_state_t *_jit) +{ + jit_node_t *instr; + assert(_jitc->function); + jit_inc_synth(ret); + /* jump to epilog */ + instr = jit_jmpi(); + jit_patch_at(instr, _jitc->function->epilog); + jit_dec_synth(); +} + +void +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); + jit_live(JIT_RET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movi(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_epilog(jit_state_t *_jit) +{ + assert(_jitc->function); + assert(_jitc->function->epilog->next == NULL); + jit_link(_jitc->function->epilog); + _jitc->function = NULL; +} + +void +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) +{ + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(JIT_RA0 + v->u.w, u); + else + jit_stxi(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) +{ + jit_int32_t regno; + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movi(JIT_RA0 + v->u.w, u); + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +jit_bool_t +_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) +{ + jit_int32_t spec; + + spec = jit_class(_rvs[regno].spec); + if (spec & jit_class_arg) { + if (spec & jit_class_gpr) { + regno = JIT_RA0 + regno; + if (regno >= 0 && regno < node->v.w) + return (1); + } + else if (spec & jit_class_fpr) { + regno = JIT_FA0 + regno; + if (regno >= 0 && regno < node->w.w) + return (1); + } + } + + return (0); +} + +jit_pointer_t +_emit_code(jit_state_t *_jit) +{ + jit_node_t *node; + jit_node_t *temp; + jit_word_t word; + jit_word_t value; + jit_int32_t offset; + struct { + jit_node_t *node; + jit_uint8_t *data; + jit_word_t word; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif +#if DISASSEMBLER + jit_int32_t info_offset; +#endif + jit_int32_t const_offset; + jit_int32_t patch_offset; + } undo; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + + _jitc->function = NULL; + _jitc->no_flag = 0; + _jitc->mode_d = SH_DEFAULT_FPU_MODE; + _jitc->uses_fpu = 0; + + jit_reglive_setup(); + + _jitc->consts.data = NULL; + _jitc->consts.offset = _jitc->consts.length = 0; + + undo.word = 0; + undo.node = NULL; + undo.data = NULL; +#if DISASSEMBLER + undo.info_offset = +#endif + undo.const_offset = undo.patch_offset = 0; +#define case_rr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_rw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_wr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; +#define case_rrf(name) \ + case jit_code_##name##i_f: \ + name##i_f(rn(node->u.w), rn(node->v.w), node->w.f); \ + break +#define case_rrd(name) \ + case jit_code_##name##i_d: \ + name##i_d(rn(node->u.w), rn(node->v.w), node->w.d); \ + break +#define case_wrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_brr(name, type) \ + case jit_code_##name##r##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##r##type(temp->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + else { \ + word = _jit->code.length \ + - (_jit->pc.uc - _jit->code.ptr); \ + if (word < 4094) { \ + word = name##r##type(0, rn(node->v.w), \ + rn(node->w.w)); \ + } else { \ + word = name##r##type##_p(_jit->pc.w, \ + rn(node->v.w), \ + rn(node->w.w), 1); \ + } \ + patch(word, node); \ + } \ + break +#define case_brw(name, type) \ + case jit_code_##name##i##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i##type(temp->u.w, \ + rn(node->v.w), node->w.w); \ + else { \ + word = _jit->code.length \ + - (_jit->pc.uc - _jit->code.ptr); \ + if (word < 4094) { \ + word = name##i##type(0, rn(node->v.w), \ + node->w.w); \ + } else { \ + word = name##i##type##_p(_jit->pc.w, \ + rn(node->v.w), \ + node->w.w, 1); \ + } \ + patch(word, node); \ + } \ + break; +#define case_brf(name) \ + case jit_code_##name##i_f: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_f(temp->u.w, rn(node->v.w), node->w.f); \ + else { \ + word = name##i_f_p(_jit->pc.w, rn(node->v.w), \ + node->w.f, 1); \ + patch(word, node); \ + } \ + break +#define case_brd(name) \ + case jit_code_##name##i_d: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_d(temp->u.w, rn(node->v.w), node->w.d); \ + else { \ + word = name##i_d_p(_jit->pc.w, rn(node->v.w), \ + node->w.d, 1); \ + patch(word, node); \ + } \ + break +#if DEVEL_DISASSEMBLER + prevw = _jit->pc.w; +#endif + if (SH_HAS_FPU) { + for (node = _jitc->head; node && !_jitc->uses_fpu; node = node->next) + _jitc->uses_fpu = jit_uses_fpu(node->code); + } + + for (node = _jitc->head; node; node = node->next) { + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + +#if DEVEL_DISASSEMBLER + node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw; + prevw = _jit->pc.w; +#endif + value = jit_classify(node->code); + jit_regarg_set(node, value); + switch (node->code) { + case jit_code_align: + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; + case jit_code_note: case jit_code_name: + node->u.w = _jit->pc.w; + break; + case jit_code_label: + /* remember label is defined */ + node->flag |= jit_flag_patch; + /* Reset FPU mode */ + set_fmode_no_r0(_jit, SH_DEFAULT_FPU_MODE); + node->u.w = _jit->pc.w; + break; + case_rrr(add,); + case_rrw(add,); + case_rrr(addc,); + case_rrw(addc,); + case_rrr(addx,); + case_rrw(addx,); + case_rrr(sub,); + case_rrw(sub,); + case_rrr(subc,); + case_rrw(subc,); + case_rrr(subx,); + case_rrw(subx,); + case_rrw(rsb,); + case_rrr(mul,); + case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); + case_rrr(div,); + case_rrw(div,); + case_rrr(div, _u); + case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); + case_rrr(lsh,); + case_rrw(lsh,); + case_rrrr(qlsh,); + case_rrrw(qlsh,); + case_rrrr(qlsh, _u); + case_rrrw(qlsh, _u); + case_rrr(rsh,); + case_rrw(rsh,); + case_rrr(rsh, _u); + case_rrw(rsh, _u); + case_rrrr(qrsh,); + case_rrrw(qrsh,); + case_rrrr(qrsh, _u); + case_rrrw(qrsh, _u); + case_rr(neg,); + case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); + case_rr(rbit,); + case_rr(popcnt,); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); + case_rrr(and,); + case_rrw(and,); + case_rrr(or,); + case_rrw(or,); + case_rrr(xor,); + case_rrw(xor,); + case_rr(trunc, _f_i); + case_rr(trunc, _d_i); + case_rr(ld, _c); + case_rw(ld, _c); + case_rr(ld, _uc); + case_rw(ld, _uc); + case_rr(ld, _s); + case_rw(ld, _s); + case_rr(ld, _us); + case_rw(ld, _us); + case_rr(ld, _i); + case_rw(ld, _i); + case_rrr(ldx, _c); + case_rrw(ldx, _c); + case_rrr(ldx, _uc); + case_rrw(ldx, _uc); + case_rrr(ldx, _s); + case_rrw(ldx, _s); + case_rrr(ldx, _us); + case_rrw(ldx, _us); + case_rrr(ldx, _i); + case_rrw(ldx, _i); + case jit_code_unldr: + unldr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi: + unldi(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_unldr_u: + unldr_u(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_u: + unldi_u(rn(node->u.w), node->v.w, node->w.w); + break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); + case_rr(st, _c); + case_wr(st, _c); + case_rr(st, _s); + case_wr(st, _s); + case_rr(st, _i); + case_wr(st, _i); + case_rrr(stx, _c); + case_wrr(stx, _c); + case_rrr(stx, _s); + case_wrr(stx, _s); + case_rrr(stx, _i); + case_wrr(stx, _i); + case jit_code_unstr: + unstr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti: + unsti(node->u.w, rn(node->v.w), node->w.w); + break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); + case_rr(hton, _us); + case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case jit_code_extr: + extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_extr_u: + extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depr: + depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depi: + depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case_rr(ext, _c); + case_rr(ext, _uc); + case_rr(ext, _s); + case_rr(ext, _us); + case_rrr(movn,); + case_rrr(movz,); + case_rr(mov,); + case jit_code_movi: + if (node->flag & jit_flag_node) { + temp = node->v.n; + if (temp->code == jit_code_data || + (temp->code == jit_code_label && + (temp->flag & jit_flag_patch))) + movi(rn(node->u.w), temp->u.w); + else { + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + word = movi_p(rn(node->u.w), temp->u.w); + patch(word, node); + } + } + else + movi(rn(node->u.w), node->v.w); + break; + case_rrr(lt,); + case_rrw(lt,); + case_rrr(lt, _u); + case_rrw(lt, _u); + case_rrr(le,); + case_rrw(le,); + case_rrr(le, _u); + case_rrw(le, _u); + case_rrr(eq,); + case_rrw(eq,); + case_rrr(ge,); + case_rrw(ge,); + case_rrr(ge, _u); + case_rrw(ge, _u); + case_rrr(gt,); + case_rrw(gt,); + case_rrr(gt, _u); + case_rrw(gt, _u); + case_rrr(ne,); + case_rrw(ne,); + case_brr(blt,); + case_brw(blt,); + case_brr(blt, _u); + case_brw(blt, _u); + case_brr(ble,); + case_brw(ble,); + case_brr(ble, _u); + case_brw(ble, _u); + case_brr(beq,); + case_brw(beq,); + case_brr(bge,); + case_brw(bge,); + case_brr(bge, _u); + case_brw(bge, _u); + case_brr(bgt,); + case_brw(bgt,); + case_brr(bgt, _u); + case_brw(bgt, _u); + case_brr(bne,); + case_brw(bne,); + case_brr(boadd,); + case_brw(boadd,); + case_brr(boadd, _u); + case_brw(boadd, _u); + case_brr(bxadd,); + case_brw(bxadd,); + case_brr(bxadd, _u); + case_brw(bxadd, _u); + case_brr(bosub,); + case_brw(bosub,); + case_brr(bosub, _u); + case_brw(bosub, _u); + case_brr(bxsub,); + case_brw(bxsub,); + case_brr(bxsub, _u); + case_brw(bxsub, _u); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); + case_rrr(add, _f); + case_rrf(add); + case_rrr(sub, _f); + case_rrf(sub); + case_rrf(rsb); + case_rrr(mul, _f); + case_rrf(mul); + case_rrr(div, _f); + case_rrf(div); + case_rr(abs, _f); + case_rr(neg, _f); + case_rr(sqrt, _f); + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); + case_rr(ext, _f); + case_rr(ld, _f); + case_rw(ld, _f); + case_rrr(ldx, _f); + case_rrw(ldx, _f); + case jit_code_unldr_x: + unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_x: + unldi_x(rn(node->u.w), node->v.w, node->w.w); + break; + case_rr(st, _f); + case_wr(st, _f); + case_rrr(stx, _f); + case_wrr(stx, _f); + case jit_code_unstr_x: + unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti_x: + unsti_x(node->u.w, rn(node->v.w), node->w.w); + break; + case_rr(mov, _f); + case jit_code_movi_f: + movi_f(rn(node->u.w), node->v.f); + break; + case_rr(ext, _d_f); + case_rrr(lt, _f); + case_rrf(lt); + case_rrr(le, _f); + case_rrf(le); + case_rrr(eq, _f); + case_rrf(eq); + case_rrr(ge, _f); + case_rrf(ge); + case_rrr(gt, _f); + case_rrf(gt); + case_rrr(ne, _f); + case_rrf(ne); + case_rrr(unlt, _f); + case_rrf(unlt); + case_rrr(unle, _f); + case_rrf(unle); + case_rrr(uneq, _f); + case_rrf(uneq); + case_rrr(unge, _f); + case_rrf(unge); + case_rrr(ungt, _f); + case_rrf(ungt); + case_rrr(ltgt, _f); + case_rrf(ltgt); + case_rrr(ord, _f); + case_rrf(ord); + case_rrr(unord, _f); + case_rrf(unord); + case_brr(blt, _f); + case_brf(blt); + case_brr(ble, _f); + case_brf(ble); + case_brr(beq, _f); + case_brf(beq); + case_brr(bge, _f); + case_brf(bge); + case_brr(bgt, _f); + case_brf(bgt); + case_brr(bne, _f); + case_brf(bne); + case_brr(bunlt, _f); + case_brf(bunlt); + case_brr(bunle, _f); + case_brf(bunle); + case_brr(buneq, _f); + case_brf(buneq); + case_brr(bunge, _f); + case_brf(bunge); + case_brr(bungt, _f); + case_brf(bungt); + case_brr(bltgt, _f); + case_brf(bltgt); + case_brr(bord, _f); + case_brf(bord); + case_brr(bunord, _f); + case_brf(bunord); + case_rrr(add, _d); + case_rrd(add); + case_rrr(sub, _d); + case_rrd(sub); + case_rrd(rsb); + case_rrr(mul, _d); + case_rrd(mul); + case_rrr(div, _d); + case_rrd(div); + case_rr(abs, _d); + case_rr(neg, _d); + case_rr(sqrt, _d); + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); + case_rr(ext, _d); + case_rr(ld, _d); + case_rw(ld, _d); + case_rrr(ldx, _d); + case_rrw(ldx, _d); + case_rr(st, _d); + case_wr(st, _d); + case_rrr(stx, _d); + case_wrr(stx, _d); + case_rr(mov, _d); + case jit_code_movi_d: + movi_d(rn(node->u.w), node->v.d); + break; + case_rr(ext, _f_d); + case_rrr(lt, _d); + case_rrd(lt); + case_rrr(le, _d); + case_rrd(le); + case_rrr(eq, _d); + case_rrd(eq); + case_rrr(ge, _d); + case_rrd(ge); + case_rrr(gt, _d); + case_rrd(gt); + case_rrr(ne, _d); + case_rrd(ne); + case_rrr(unlt, _d); + case_rrd(unlt); + case_rrr(unle, _d); + case_rrd(unle); + case_rrr(uneq, _d); + case_rrd(uneq); + case_rrr(unge, _d); + case_rrd(unge); + case_rrr(ungt, _d); + case_rrd(ungt); + case_rrr(ltgt, _d); + case_rrd(ltgt); + case_rrr(ord, _d); + case_rrd(ord); + case_rrr(unord, _d); + case_rrd(unord); + case_brr(blt, _d); + case_brd(blt); + case_brr(ble, _d); + case_brd(ble); + case_brr(beq, _d); + case_brd(beq); + case_brr(bge, _d); + case_brd(bge); + case_brr(bgt, _d); + case_brd(bgt); + case_brr(bne, _d); + case_brd(bne); + case_brr(bunlt, _d); + case_brd(bunlt); + case_brr(bunle, _d); + case_brd(bunle); + case_brr(buneq, _d); + case_brd(buneq); + case_brr(bunge, _d); + case_brd(bunge); + case_brr(bungt, _d); + case_brd(bungt); + case_brr(bltgt, _d); + case_brd(bltgt); + case_brr(bord, _d); + case_brd(bord); + case_brr(bunord, _d); + case_brd(bunord); + case jit_code_jmpr: + jmpr(rn(node->u.w)); + flush_consts(0); + break; + case jit_code_jmpi: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + jmpi(temp->u.w); + else { + word = jmpi_p(_jit->pc.w); + patch(word, node); + } + } + else + jmpi(node->u.w); + flush_consts(0); + break; + case jit_code_callr: + callr(rn(node->u.w)); + break; + case jit_code_calli: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = calli_p(_jit->pc.w); + patch(word, node); + } + } + else + calli(node->u.w); + break; + case jit_code_prolog: + _jitc->function = _jitc->functions.ptr + node->w.w; + undo.node = node; + undo.word = _jit->pc.w; +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif + undo.data = _jitc->consts.data; + undo.const_offset = _jitc->consts.offset; + undo.patch_offset = _jitc->patches.offset; +#if DISASSEMBLER + if (_jitc->data_info.ptr) + undo.info_offset = _jitc->data_info.offset; +#endif + restart_function: + _jitc->again = 0; + prolog(node); + break; + case jit_code_epilog: + assert(_jitc->function == _jitc->functions.ptr + node->w.w); + if (_jitc->again) { + for (temp = undo.node->next; + temp != node; temp = temp->next) { + if (temp->code == jit_code_label || + temp->code == jit_code_epilog) + temp->flag &= ~jit_flag_patch; + } + temp->flag &= ~jit_flag_patch; + node = undo.node; + _jit->pc.w = undo.word; +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif + invalidate_consts(); + _jitc->consts.data = undo.data; + _jitc->consts.offset = undo.const_offset; + _jitc->patches.offset = undo.patch_offset; +#if DISASSEMBLER + if (_jitc->data_info.ptr) + _jitc->data_info.offset = undo.info_offset; +#endif + goto restart_function; + } + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + epilog(node); + _jitc->function = NULL; + flush_consts(0); + break; + case jit_code_movr_w_f: + movr_w_f(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movr_f_w: + movr_f_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movi_f_w: + movi_f_w(rn(node->u.w), node->v.f); + break; + case jit_code_movi_w_f: + movi_w_f(rn(node->u.w), node->v.w); + break; + case jit_code_movr_ww_d: + movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_movr_d_ww: + movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_movi_d_ww: + movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d); + break; + case jit_code_movi_ww_d: + movi_ww_d(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_va_start: + vastart(rn(node->u.w)); + break; + case jit_code_va_arg: + vaarg(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_va_arg_d: + vaarg_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_live: case jit_code_ellipsis: + case jit_code_va_push: + case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: + case jit_code_arg_f: case jit_code_arg_d: + case jit_code_va_end: + case jit_code_ret: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_f: case jit_code_reti_f: + case jit_code_retr_d: case jit_code_reti_d: + case jit_code_getarg_c: case jit_code_getarg_uc: + case jit_code_getarg_s: case jit_code_getarg_us: + case jit_code_getarg_i: case jit_code_getarg_ui: + case jit_code_getarg_l: + case jit_code_getarg_f: case jit_code_getarg_d: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_f: case jit_code_putargi_f: + case jit_code_putargr_d: case jit_code_putargi_d: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_f: case jit_code_pushargi_f: + case jit_code_pushargr_d: case jit_code_pushargi_d: + case jit_code_retval_c: case jit_code_retval_uc: + case jit_code_retval_s: case jit_code_retval_us: + case jit_code_retval_i: + case jit_code_retval_f: case jit_code_retval_d: + case jit_code_prepare: + case jit_code_finishr: case jit_code_finishi: + break; + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_negi_f: case jit_code_absi_f: + case jit_code_sqrti_f: case jit_code_negi_d: + case jit_code_absi_d: case jit_code_sqrti_d: + break; + case jit_code_negi: + negi(rn(node->u.w), node->v.w); + break; + case jit_code_comi: + comi(rn(node->u.w), node->v.w); + break; + case jit_code_exti_c: + exti_c(rn(node->u.w), node->v.w); + break; + case jit_code_exti_uc: + exti_uc(rn(node->u.w), node->v.w); + break; + case jit_code_exti_s: + exti_s(rn(node->u.w), node->v.w); + break; + case jit_code_exti_us: + exti_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_us: + bswapi_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_ui: + bswapi_ui(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_us: + htoni_us(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_ui: + htoni_ui(rn(node->u.w), node->v.w); + break; + case jit_code_cloi: + cloi(rn(node->u.w), node->v.w); + break; + case jit_code_clzi: + clzi(rn(node->u.w), node->v.w); + break; + case jit_code_ctoi: + ctoi(rn(node->u.w), node->v.w); + break; + case jit_code_ctzi: + ctzi(rn(node->u.w), node->v.w); + break; + case jit_code_rbiti: + rbiti(rn(node->u.w), node->v.w); + break; + case jit_code_popcnti: + popcnti(rn(node->u.w), node->v.w); + break; + case jit_code_exti: + exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case jit_code_exti_u: + exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + default: + printf("ABORT MISSION (%i)\n", node->code); + abort(); + } + jit_regarg_clr(node, value); + assert(_jitc->regarg == 0); + assert(_jitc->synth == 0); + /* update register live state */ + jit_reglive(node); + + _jitc->no_flag = !(node->flag & jit_flag_patch); + + if (_jitc->consts.length && + (jit_uword_t)_jit->pc.uc - (jit_uword_t)_jitc->consts.patches[0] >= 900) { + /* Maximum displacement for mov.l is +1020 bytes. If we're already +900 bytes + * since the first mov.l, force a flush. */ + + if (node->next && + node->next->code != jit_code_jmpi && + node->next->code != jit_code_jmpr && + node->next->code != jit_code_epilog) { + /* insert a jump, flush constants and continue */ + word = _jit->pc.w; + BRA(0); + NOP(); + flush_consts(1); + patch_at(word, _jit->pc.w); + } + } + } +#undef case_brw +#undef case_brr +#undef case_wrr +#undef case_rrw +#undef case_rrr +#undef case_wr +#undef case_rw +#undef case_rr + + flush_consts(1); + + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + word = _jitc->patches.ptr[offset].inst; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + patch_at(word, value); + } + + jit_flush(_jit->code.ptr, _jit->pc.uc); + + return (_jit->code.ptr); +} + +#define CODE 1 +# include "jit_rewind.c" +# include "jit_sh-cpu.c" +# include "jit_sh-fpu.c" +# include "jit_fallback.c" +#undef CODE + +void +jit_flush(void *fptr, void *tptr) +{ +#if defined(__linux__) + jit_uword_t i, f, t, s; + + s = sysconf(_SC_PAGE_SIZE); + f = (jit_uword_t)fptr & -s; + t = (((jit_uword_t)tptr) + s - 1) & -s; + for (i = f; i < t; i += s) + __clear_cache((void *)i, (void *)(i + s)); +#endif +} + +void +_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi(rn(r0), rn(r1), i0); +} + +void +_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi(i0, rn(r0), rn(r1)); +} + +void +_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* No FPU support */ +} + +void +_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* No FPU support */ +} + +static void +_load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t w; + jit_word_t d; + jit_word_t base; + jit_int32_t *data; + jit_int32_t size; + jit_int32_t offset; + + _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w; + /* positive forward offset */ + LDPL(r0, 0); + + if (!uniq) { + /* search already requested values */ + for (offset = 0; offset < _jitc->consts.length; offset++) { + if (_jitc->consts.values[offset] == i0) { + _jitc->consts.patches[_jitc->consts.offset++] = offset; + return; + } + } + } + +#if DEBUG + /* cannot run out of space because of limited range + * but assert anyway to catch logic errors */ + assert(_jitc->consts.length < 1024); + assert(_jitc->consts.offset < 2048); +#endif + _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length; + _jitc->consts.values[_jitc->consts.length++] = i0; +} + +static void +_load_const_f(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_float32_t f0) +{ + jit_word_t w; + jit_word_t d; + jit_word_t base; + jit_int32_t *data; + jit_int32_t size; + jit_int32_t offset; + union fl32 { + jit_int32_t i; + jit_float32_t f; + }; + jit_uint32_t i0 = ((union fl32)f0).i; + + _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w; + /* positive forward offset */ + MOVA(0); + LDF(r0, _R0); + + if (!uniq) { + /* search already requested values */ + for (offset = 0; offset < _jitc->consts.length; offset++) { + if (_jitc->consts.values[offset] == i0) { + _jitc->consts.patches[_jitc->consts.offset++] = offset; + return; + } + } + } + +#if DEBUG + /* cannot run out of space because of limited range + * but assert anyway to catch logic errors */ + assert(_jitc->consts.length < 1024); + assert(_jitc->consts.offset < 2048); +#endif + _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length; + _jitc->consts.values[_jitc->consts.length++] = i0; +} + +static void +_flush_consts(jit_state_t *_jit, jit_bool_t force) +{ + jit_word_t word; + jit_int32_t offset; + + /* if no forward constants */ + if (!_jitc->consts.length) + return; + + word = _jit->code.length - (_jit->pc.uc - _jit->code.ptr) + - (_jitc->consts.length << 1); + if (!force && word < 1024) + return; + + /* Align to 32 bits */ + if (_jit->pc.w & 0x3) + NOP(); + + word = _jit->pc.w; + _jitc->consts.data = _jit->pc.uc; + _jitc->consts.size = _jitc->consts.length << 2; + /* FIXME check will not overrun, otherwise, need to reallocate + * code buffer and start over */ + jit_memcpy(_jitc->consts.data, _jitc->consts.values, _jitc->consts.size); + _jit->pc.w += _jitc->consts.size; + +#if DISASSEMBLER + if (_jitc->data_info.ptr) { + if (_jitc->data_info.offset >= _jitc->data_info.length) { + jit_realloc((jit_pointer_t *)&_jitc->data_info.ptr, + _jitc->data_info.length * sizeof(jit_data_info_t), + (_jitc->data_info.length + 1024) * + sizeof(jit_data_info_t)); + _jitc->data_info.length += 1024; + } + _jitc->data_info.ptr[_jitc->data_info.offset].code = word; + _jitc->data_info.ptr[_jitc->data_info.offset].length = _jitc->consts.size; + ++_jitc->data_info.offset; + } +#endif + + for (offset = 0; offset < _jitc->consts.offset; offset += 2) + patch_at(_jitc->consts.patches[offset], + word + (_jitc->consts.patches[offset + 1] << 2)); + _jitc->consts.length = _jitc->consts.offset = 0; +} + +/* to be called if needing to start over a function */ +static void +_invalidate_consts(jit_state_t *_jit) +{ + /* if no forward constants */ + if (_jitc->consts.length) + _jitc->consts.length = _jitc->consts.offset = 0; +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +{ + jit_int32_t flag; + + assert(node->flag & jit_flag_node); + if (node->code == jit_code_movi) + flag = node->v.n->flag; + else + flag = node->u.n->flag; + assert(!(flag & jit_flag_patch)); + if (_jitc->patches.offset >= _jitc->patches.length) { + jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, + _jitc->patches.length * sizeof(jit_patch_t), + (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); + _jitc->patches.length += 1024; + } + _jitc->patches.ptr[_jitc->patches.offset].inst = instr; + _jitc->patches.ptr[_jitc->patches.offset].node = node; + ++_jitc->patches.offset; +} + +static jit_node_t * +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) +{ + jit_int32_t offset; + + if (jit_arg_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT; + } + + if (node == (jit_node_t *)0) + node = jit_new_node(code); + else + link_node(node); + + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + + return (node); +} + +static jit_node_t * +_jit_make_arg_f(jit_state_t *_jit, jit_node_t *node) +{ + jit_int32_t offset; + + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { + offset = _jitc->function->self.argf++; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT; + } + if (node == (jit_node_t *)0) + node = jit_new_node(jit_code_arg_f); + else + link_node(node); + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + return (node); +} + +static jit_node_t * +_jit_make_arg_d(jit_state_t *_jit, jit_node_t *node) +{ + jit_int32_t offset; + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { + offset = (_jitc->function->self.argf + 1) & ~1; + _jitc->function->self.argf = offset + 2; + } + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += STACK_SLOT * 2; + } + if (node == (jit_node_t *)0) + node = jit_new_node(jit_code_arg_d); + else + link_node(node); + node->u.w = offset; + node->v.w = ++_jitc->function->self.argn; + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg(jit_state_t *_jit, jit_code_t code) +{ + assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); + return (jit_make_arg((jit_node_t*)0, code)); +} + +void +_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_c, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, JIT_RA0 + v->u.w); + else + jit_ldxi_c(u, JIT_FP, v->u.w + C_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_c); + jit_inc_synth_wp(getarg_uc, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, JIT_RA0 + v->u.w); + else + jit_ldxi_uc(u, JIT_FP, v->u.w + C_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_s, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, JIT_RA0 + v->u.w); + else + jit_ldxi_s(u, JIT_FP, v->u.w + S_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_s); + jit_inc_synth_wp(getarg_us, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, JIT_RA0 + v->u.w); + else + jit_ldxi_us(u, JIT_FP, v->u.w + S_DISP); + jit_dec_synth(); +} + +void +_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert_arg_type(v->code, jit_code_arg_i); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) { + jit_movr(u, JIT_RA0 + v->u.w); + } + else + jit_ldxi_i(u, JIT_FP, v->u.w + I_DISP); + jit_dec_synth(); +} + +void +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(JIT_RA0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT; + } + jit_dec_synth(); +} + +void +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) +{ + jit_int32_t regno; + assert(_jitc->function); + jit_code_inc_synth_w(code, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(JIT_RA0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_finishr(jit_state_t *_jit, jit_int32_t r0) +{ + jit_node_t *call; + assert(_jitc->function); + jit_inc_synth_w(finishr, r0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + call = jit_callr(r0); + call->v.w = _jitc->function->self.argi; + call->w.w = _jitc->function->self.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); +} + +jit_node_t * +_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) +{ + jit_node_t *call; + assert(_jitc->function); + jit_inc_synth_w(finishi, (jit_word_t)i0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + call = jit_calli(i0); + call->v.w = _jitc->function->call.argi; + call->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); + return (call); +} + +void +_jit_retval_c(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_c(r0, JIT_RET); +} + +void +_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_uc(r0, JIT_RET); +} + +void +_jit_retval_s(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_s(r0, JIT_RET); +} + +void +_jit_retval_us(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_us(r0, JIT_RET); +} + +void +_jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr(r0, JIT_RET); +} + +void +_jit_ellipsis(jit_state_t *_jit) +{ + jit_inc_synth(ellipsis); + if (_jitc->prepare) { + jit_link_prepare(); + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + jit_link_prolog(); + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + _jitc->function->vagp = _jitc->function->self.argi; + _jitc->function->vafp = _jitc->function->self.argf; + _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t) + /* +1 to ensure 8-byte alignment */ + + (NUM_WORD_ARGS + NUM_FLOAT_ARGS + 1) * 4); + } + jit_dec_synth(); +} + +void +_jit_va_push(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t i, reg; + jit_inc_synth_w(va_push, u); + + reg = jit_get_reg(jit_class_gpr); + + for (i = 0; i < 5; i++) { + jit_ldxi(reg, u, i * 4); + jit_stxi(_jitc->function->call.size + i * 4, JIT_SP, reg); + } + + jit_unget_reg(reg); + + _jitc->function->call.size += 5 * 4; + + jit_dec_synth(); +} + +jit_bool_t +_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) +{ + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + + assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); + return (jit_arg_f_reg_p(u->u.w)); +} + +jit_node_t * +_jit_arg_f(jit_state_t *_jit) +{ + assert(_jitc->function); + return (jit_make_arg_f((jit_node_t*)0)); +} + +jit_node_t * +_jit_arg_d(jit_state_t *_jit) +{ + assert(_jitc->function); + return (jit_make_arg_d((jit_node_t*)0)); +} + +void +_jit_retval_f(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr_f(r0, JIT_FRET); +} + +void +_jit_retval_d(jit_state_t *_jit, jit_int32_t r0) +{ + jit_movr_d(r0, JIT_FRET); +} + +void +_jit_retr_f(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_f, u); + jit_movr_f(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_inc_synth_w(reti_f, u); + jit_movi_f(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_d(jit_state_t *_jit, jit_int32_t u) +{ + jit_inc_synth_w(retr_d, u); + jit_movr_d(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_inc_synth_w(reti_d, u); + jit_movi_d(JIT_FRET, u); + jit_live(JIT_FRET); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargr_f, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movr_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT; + } + jit_dec_synth(); +} + +void +_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargi_f, u); + jit_link_prepare(); + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movi_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) +{ + jit_int32_t regno; + jit_inc_synth_w(pushargr_d, u); + jit_link_prepare(); + assert(_jitc->function); + + regno = (_jitc->function->call.argf + 1) & ~1; + if (jit_arg_f_reg_p(regno)) { + jit_movr_d(JIT_FA0 + regno, u); + _jitc->function->call.argf = regno + 2; + } + else { + jit_stxi_d(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += STACK_SLOT * 2; + } + jit_dec_synth(); +} + +void +_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_int32_t regno; + + jit_inc_synth_w(pushargi_d, u); + jit_link_prepare(); + assert(_jitc->function); + + regno = (_jitc->function->call.argf + 1) & ~1; + if (jit_arg_f_reg_p(regno)) { + jit_movi_d(JIT_FA0 + regno, u); + _jitc->function->call.argf = regno + 2; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += STACK_SLOT * 2; + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargr, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u); + else + jit_stxi_f(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) +{ + jit_int32_t regno; + + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargi, u, v); + if (jit_arg_f_reg_p(v->u.w)) { + jit_movi_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u); + } else { + regno = jit_get_reg(jit_class_fpr); + + jit_movi_f(regno, u); + jit_stxi_f(v->u.w, JIT_FP, regno); + + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargr, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(JIT_FA0 + v->u.w, u); + else + jit_stxi_d(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) +{ + jit_int32_t regno; + + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargi, u, v); + if (jit_arg_f_reg_p(v->u.w)) { + jit_movi_d(JIT_FA0 + v->u.w, u); + } else { + regno = jit_get_reg(jit_class_fpr); + + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(getarg_d, u, v); + + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(u, JIT_FA0 + v->u.w); + else + jit_ldxi_d(u, JIT_FP, v->u.w); + + jit_dec_synth(); +} + +void +_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(getarg_f, u, v); + + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(u, JIT_FA0 + (v->u.w ^ fpr_args_inverted())); + else + jit_ldxi_f(u, JIT_FP, v->u.w); + + jit_dec_synth(); +} diff --git a/deps/lightning/lib/jit_size.c b/deps/lightning/lib/jit_size.c index 143a5d9d7..24a5c9566 100644 --- a/deps/lightning/lib/jit_size.c +++ b/deps/lightning/lib/jit_size.c @@ -54,6 +54,8 @@ static jit_int16_t _szs[jit_code_last_code] = { # include "jit_riscv-sz.c" # elif defined(__loongarch__) # include "jit_loongarch-sz.c" +# elif defined(__sh__) +# include "jit_sh-sz.c" # endif #endif }; diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index 8a4ce7bff..0fd494d43 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -528,6 +528,66 @@ 16, /* hmuli */ 8, /* hmulr_u */ 16, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 8, /* ldxbr_f */ + 8, /* ldxbi_f */ + 8, /* ldxar_f */ + 8, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 8, /* stxbr_f */ + 8, /* stxbi_f */ + 8, /* stxar_f */ + 8, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -1060,4 +1120,64 @@ 60, /* hmuli */ 44, /* hmulr_u */ 60, /* hmuli_u */ + 8, /* ldxbr_c */ + 8, /* ldxbi_c */ + 8, /* ldxar_c */ + 8, /* ldxai_c */ + 8, /* ldxbr_uc */ + 8, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbr_s */ + 8, /* ldxbi_s */ + 8, /* ldxar_s */ + 8, /* ldxai_s */ + 8, /* ldxbr_us */ + 8, /* ldxbi_us */ + 8, /* ldxar_us */ + 8, /* ldxai_us */ + 8, /* ldxbr_i */ + 8, /* ldxbi_i */ + 8, /* ldxar_i */ + 8, /* ldxai_i */ + 8, /* ldxbr_ui */ + 8, /* ldxbi_ui */ + 8, /* ldxar_ui */ + 8, /* ldxai_ui */ + 8, /* ldxbr_l */ + 8, /* ldxbi_l */ + 8, /* ldxar_l */ + 8, /* ldxai_l */ + 12, /* ldxbr_f */ + 12, /* ldxbi_f */ + 12, /* ldxar_f */ + 12, /* ldxai_f */ + 8, /* ldxbr_d */ + 8, /* ldxbi_d */ + 8, /* ldxar_d */ + 8, /* ldxai_d */ + 8, /* stxbr_c */ + 8, /* stxbi_c */ + 8, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 8, /* stxbi_s */ + 8, /* stxar_s */ + 8, /* stxai_s */ + 8, /* stxbr_i */ + 8, /* stxbi_i */ + 8, /* stxar_i */ + 8, /* stxai_i */ + 8, /* stxbr_l */ + 8, /* stxbi_l */ + 8, /* stxar_l */ + 8, /* stxai_l */ + 12, /* stxbr_f */ + 12, /* stxbi_f */ + 12, /* stxar_f */ + 12, /* stxai_f */ + 8, /* stxbr_d */ + 8, /* stxbi_d */ + 8, /* stxar_d */ + 8, /* stxai_d */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index bd8756d20..f9a20f6b7 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -1287,6 +1287,26 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(rn(node->u.w), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + generic_##name##i##type(node->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + generic_##name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1539,6 +1559,26 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); +#if __WORDSIZE == 64 + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); +#endif + case_rrx(ldxb, _f); case_rrX(ldxb, _f); + case_rrx(ldxa, _f); case_rrX(ldxa, _f); + case_rrx(ldxb, _d); case_rrX(ldxb, _d); + case_rrx(ldxa, _d); case_rrX(ldxa, _d); case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1567,6 +1607,20 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); +#if __WORDSIZE == 64 + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); +#endif + case_xrr(stxb, _f); case_rrX(stxb, _f); + case_xrr(stxa, _f); case_rrX(stxa, _f); + case_xrr(stxb, _d); case_rrX(stxb, _d); + case_xrr(stxa, _d); case_rrX(stxa, _d); case_rr(hton, _us); case_rr(hton, _ui); #if __WORDSIZE == 64 @@ -2186,6 +2240,10 @@ _emit_code(jit_state_t *_jit) #undef case_rrrw #undef case_rrw #undef case_rrrr +#undef case_rrx +#undef case_rrX +#undef case_xrr +#undef case_Xrr #undef case_rrr #undef case_rf #undef case_wr diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 76f90ec87..6957adf36 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -570,6 +570,45 @@ static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # endif # endif +# define ldxbr_c(r0, r1, r2) generic_ldxbr_c(r0, r1, r2) +# define ldxbi_c(r0, r1, i0) generic_ldxbi_c(r0, r1, i0) +# define ldxbr_uc(r0, r1, r2) generic_ldxbr_uc(r0, r1, r2) +# define ldxbi_uc(r0, r1, i0) generic_ldxbi_uc(r0, r1, i0) +# define ldxbr_s(r0, r1, r2) generic_ldxbr_s(r0, r1, r2) +# define ldxbi_s(r0, r1, i0) generic_ldxbi_s(r0, r1, i0) +# define ldxbr_us(r0, r1, r2) generic_ldxbr_us(r0, r1, r2) +# define ldxbi_us(r0, r1, i0) generic_ldxbi_us(r0, r1, i0) +# define ldxbr_i(r0, r1, r2) generic_ldxbr_i(r0, r1, r2) +# define ldxbi_i(r0, r1, i0) generic_ldxbi_i(r0, r1, i0) +# if __X64 && !__X64_32 +# define ldxbr_ui(r0, r1, i0) generic_ldxbr_ui(r0, r1, i0) +# define ldxbi_ui(r0, r1, i0) generic_ldxbi_ui(r0, r1, i0) +# define ldxbr_l(r0, r1, r2) generic_ldxbr_l(r0, r1, r2) +# define ldxbi_l(r0, r1, i0) generic_ldxbi_l(r0, r1, i0) +# endif +# define ldxar_c(r0, r1, r2) generic_ldxar_c(r0, r1, r2) +# define ldxai_c(r0, r1, i0) _ldxai_c(_jit,r0, r1, i0) +static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_uc(r0, r1, r2) generic_ldxar_uc(r0, r1, r2) +# define ldxai_uc(r0, r1, i0) _ldxai_uc(_jit, r0, r1, i0) +static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_s(r0, r1, r2) generic_ldxar_s(r0, r1, r2) +# define ldxai_s(r0, r1, i0) _ldxai_s(_jit, r0, r1, i0) +static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_us(r0, r1, r2) generic_ldxar_us(r0, r1, r2) +# define ldxai_us(r0, r1, i0) _ldxai_us(_jit, r0, r1, i0) +static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_i(r0, r1, r2) generic_ldxar_i(r0, r1, r2) +# define ldxai_i(r0, r1, i0) _ldxai_i(_jit, r0, r1, i0) +static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# if __X64 && !__X64_32 +# define ldxar_ui(r0, r1, i0) generic_ldxar_ui(r0, r1, i0) +# define ldxai_ui(r0, r1, i0) _ldxai_ui(_jit, r0, r1, i0) +static void _ldxai_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define ldxar_l(r0, r1, r2) generic_ldxar_l(r0, r1, r2) +# define ldxai_l(r0, r1, i0) _ldxai_l(_jit, r0, r1, i0) +static void _ldxai_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# endif # define unldr(r0, r1, i0) generic_unldr(r0, r1, i0) # define unldi(r0, i0, i1) generic_unldi(r0, i0, i1) # define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0) @@ -610,8 +649,34 @@ static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # endif -#define unstr(r0, r1, i0) generic_unstr(r0, r1, i0) -#define unsti(i0, r0, i1) generic_unsti(i0, r0, i1) +# define stxbr_c(r0, r1, r2) generic_stxbr_c(r0, r1, r2) +# define stxbi_c(i0, r0, r1) generic_stxbi_c(i0, r0, r1) +# define stxbr_s(r0, r1, r2) generic_stxbr_s(r0, r1, r2) +# define stxbi_s(i0, r0, r1) generic_stxbi_s(i0, r0, r1) +# define stxbr_i(r0, r1, r2) generic_stxbr_i(r0, r1, r2) +# define stxbi_i(i0, r0, r1) generic_stxbi_i(i0, r0, r1) +# if __X64 && !__X64_32 +# define stxbr_l(r0, r1, r2) generic_stxbr_l(r0, r1, r2) +# define stxbi_l(i0, r0, r1) generic_stxbi_l(i0, r0, r1) +# endif + +# define stxar_c(r0, r1, r2) generic_stxar_c(r0, r1, r2) +# define stxai_c(i0, r0, r1) _stxai_c(_jit, i0, r0, r1) +static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_s(r0, r1, r2) generic_stxar_s(r0, r1, r2) +# define stxai_s(i0, r0, r1) _stxai_s(_jit, i0, r0, r1) +static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define stxar_i(r0, r1, r2) generic_stxar_i(r0, r1, r2) +# define stxai_i(i0, r0, r1) _stxai_i(_jit, i0, r0, r1) +static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# if __X64 && !__X64_32 +# define stxar_l(r0, r1, r2) generic_stxar_l(r0, r1, r2) +# define stxai_l(i0, r0, r1) _stxai_l(_jit, i0, r0, r1) +static void _stxai_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# endif + +# define unstr(r0, r1, i0) generic_unstr(r0, r1, i0) +# define unsti(i0, r0, i1) generic_unsti(i0, r0, i1) # define jcc(code, i0) _jcc(_jit, code, i0) # define jo(i0) jcc(X86_CC_O, i0) # define jno(i0) jcc(X86_CC_NO, i0) @@ -806,6 +871,9 @@ static void _patch_at(jit_state_t*, jit_word_t, jit_word_t); # endif # endif # define jit_cmov_p() jit_cpu.cmov +# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) +# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0) +# define unmasked_bits_count(im) (__WORDSIZE - __builtin_popcountl(im)) #endif #if CODE @@ -1881,15 +1949,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) ixorr(r0, r0); else if (i0 == -1) movr(r0, r1); + else if (r0 == r1 && can_sign_extend_int_p(i0)) + iandi(r0, i0); + else if (is_low_mask(i0)) { + lshi(r0, r1, unmasked_bits_count(i0)); + rshi_u(r0, r0, unmasked_bits_count(i0)); + } else if (is_high_mask(i0)) { + rshi_u(r0, r1, unmasked_bits_count(i0)); + lshi(r0, r0, unmasked_bits_count(i0)); + } else if (r0 == r1) { - if (can_sign_extend_int_p(i0)) - iandi(r0, i0); - else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); iandr(r0, rn(reg)); jit_unget_reg(reg); - } } else { movi(r0, i0); @@ -3721,6 +3794,104 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) # endif #endif +static void +_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) { + /* lods %rsi, %al */ + ic(0xac); + extr_c(r0, r0); + } + else + generic_ldxai_uc(r0, r1, i0); +} + +static void +_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) { + /* lods %rsi, %al */ + ic(0xac); + extr_uc(r0, r0); + } + else + generic_ldxai_uc(r0, r1, i0); +} + +static void +_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) { + /* lods %rsi, %ax */ + ic(0x66); + ic(0xad); + extr_s(r0, r0); + } + else + generic_ldxai_us(r0, r1, i0); +} + +static void +_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) { + /* lods %rsi, %ax */ + ic(0x66); + ic(0xad); + extr_us(r0, r0); + } + else + generic_ldxai_us(r0, r1, i0); +} + +static void +_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) { + /* lods %rsi, %eax */ + ic(0xad); +# if __X64 && !__X64_32 + extr_i(r0, r0); +# endif + } + else + generic_ldxai_i(r0, r1, i0); +} + +# if __X64 && !__X64_32 +static void +_ldxai_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) { + /* lods %rsi, %eax */ + ic(0xad); + extr_ui(r0, r0); + } + else + generic_ldxai_ui(r0, r1, i0); +} + + +static void +_ldxai_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + /* Assume DF = 0 */ + if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 8) { + /* lods %rsi, %rax */ + ic(0x48); /* rex.w */ + ic(0xad); + } + else + generic_ldxai_l(r0, r1, i0); +} +# endif + static void _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -4045,6 +4216,57 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif +static void +_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* Assume DF = 0 */ + if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 1) + /* stos %al, %rdi */ + ic(0xaa); + else + generic_stxai_c(i0, r0, r1); +} + +static void +_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* Assume DF = 0 */ + if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 2) { + /* stos %ax, %rdi */ + ic(0x66); + ic(0xab); + } + else + generic_stxai_s(i0, r0, r1); +} + +static void +_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* Assume DF = 0 */ + if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 4) + /* stos %eax, %rdi */ + ic(0xab); + else + generic_stxai_i(i0, r0, r1); +} + +#if __X64 && !__X64_32 +static void +_stxai_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + /* Assume DF = 0 */ + if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 8) { + /* rex.w */ + ic(0x48); + /* stos %rax, %rdi */ + ic(0xab); + } + else + generic_stxai_l(i0, r0, r1); +} +#endif + static jit_word_t _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index 99bb62552..3f91fbd70 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -1,6 +1,6 @@ #if __X32 -#define JIT_INSTR_MAX 63 +#define JIT_INSTR_MAX 66 0, /* data */ 0, /* live */ 3, /* align */ @@ -423,7 +423,7 @@ 6, /* str_d */ 10, /* sti_d */ 7, /* stxr_d */ - 8, /* stxi_d */ + 9, /* stxi_d */ 10, /* bltr_d */ 28, /* blti_d */ 10, /* bler_d */ @@ -495,7 +495,7 @@ 12, /* qlshi */ 60, /* qlshr_u */ 12, /* qlshi_u */ - 59, /* qrshr */ + 66, /* qrshr */ 12, /* qrshi */ 56, /* qrshr_u */ 12, /* qrshi_u */ @@ -525,10 +525,70 @@ 0, /* fnmai_d */ 27, /* fnmsr_d */ 0, /* fnmsi_d */ - 18, /* hmulr */ - 23, /* hmuli */ - 18, /* hmulr_u */ - 23, /* hmuli_u */ + 18, /* hmulr */ + 23, /* hmuli */ + 18, /* hmulr_u */ + 23, /* hmuli_u */ + 5, /* ldxbr_c */ + 6, /* ldxbi_c */ + 5, /* ldxar_c */ + 6, /* ldxai_c */ + 5, /* ldxbr_uc */ + 6, /* ldxbi_uc */ + 5, /* ldxar_uc */ + 6, /* ldxai_uc */ + 5, /* ldxbr_s */ + 6, /* ldxbi_s */ + 5, /* ldxar_s */ + 6, /* ldxai_s */ + 5, /* ldxbr_us */ + 6, /* ldxbi_us */ + 5, /* ldxar_us */ + 6, /* ldxai_us */ + 4, /* ldxbr_i */ + 5, /* ldxbi_i */ + 4, /* ldxar_i */ + 5, /* ldxai_i */ + 0, /* ldxbr_ui */ + 0, /* ldxbi_ui */ + 0, /* ldxar_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbr_l */ + 0, /* ldxbi_l */ + 0, /* ldxar_l */ + 0, /* ldxai_l */ + 6, /* ldxbr_f */ + 7, /* ldxbi_f */ + 6, /* ldxar_f */ + 7, /* ldxai_f */ + 6, /* ldxbr_d */ + 7, /* ldxbi_d */ + 6, /* ldxar_d */ + 7, /* ldxai_d */ + 6, /* stxbr_c */ + 7, /* stxbi_c */ + 6, /* stxar_c */ + 7, /* stxai_c */ + 5, /* stxbr_s */ + 6, /* stxbi_s */ + 5, /* stxar_s */ + 6, /* stxai_s */ + 4, /* stxbr_i */ + 5, /* stxbi_i */ + 4, /* stxar_i */ + 5, /* stxai_i */ + 0, /* stxbr_l */ + 0, /* stxbi_l */ + 0, /* stxar_l */ + 0, /* stxai_l */ + 8, /* stxbr_f */ + 9, /* stxbi_f */ + 8, /* stxar_f */ + 9, /* stxai_f */ + 8, /* stxbr_d */ + 9, /* stxbi_d */ + 8, /* stxar_d */ + 9, /* stxai_d */ #endif /* __X32 */ #if __X64 @@ -1028,7 +1088,7 @@ 15, /* qlshi */ 54, /* qlshr_u */ 15, /* qlshi_u */ - 53, /* qrshr */ + 62, /* qrshr */ 15, /* qrshi */ 49, /* qrshr_u */ 15, /* qrshi_u */ @@ -1058,10 +1118,40 @@ 0, /* fnmai_d */ 30, /* fnmsr_d */ 0, /* fnmsi_d */ - 17, /* hmulr */ - 27, /* hmuli */ - 17, /* hmulr_u */ - 27, /* hmuli_u */ + 17, /* hmulr */ + 27, /* hmuli */ + 17, /* hmulr_u */ + 27, /* hmuli_u */ + 8, /* ldxbi_c */ + 8, /* ldxai_c */ + 8, /* ldxbi_uc */ + 8, /* ldxai_uc */ + 8, /* ldxbi_s */ + 8, /* ldxai_s */ + 8, /* ldxbi_us */ + 8, /* ldxai_us */ + 7, /* ldxbi_i */ + 7, /* ldxai_i */ + 7, /* ldxbi_ui */ + 7, /* ldxai_ui */ + 7, /* ldxbi_l */ + 7, /* ldxai_l */ + 9, /* ldxbi_f */ + 9, /* ldxai_f */ + 9, /* ldxbi_d */ + 9, /* ldxai_d */ + 10, /* stxbi_c */ + 10, /* stxai_c */ + 8, /* stxbi_s */ + 8, /* stxai_s */ + 7, /* stxbi_i */ + 7, /* stxai_i */ + 7, /* stxbi_l */ + 7, /* stxai_l */ + 9, /* stxbi_f */ + 9, /* stxai_f */ + 9, /* stxbi_d */ + 9, /* stxai_d */ #else # if __X64_32 @@ -1406,7 +1496,7 @@ 10, /* bgtr_f */ 20, /* bgti_f */ 13, /* bner_f */ - 23, /* bnei_f */ + 24, /* bnei_f */ 10, /* bunltr_f */ 20, /* bunlti_f */ 10, /* bunler_f */ @@ -1488,7 +1578,7 @@ 6, /* str_d */ 11, /* sti_d */ 10, /* stxr_d */ - 9, /* stxi_d */ + 10, /* stxi_d */ 11, /* bltr_d */ 30, /* blti_d */ 11, /* bler_d */ @@ -1500,7 +1590,7 @@ 11, /* bgtr_d */ 30, /* bgti_d */ 14, /* bner_d */ - 33, /* bnei_d */ + 37, /* bnei_d */ 11, /* bunltr_d */ 30, /* bunlti_d */ 11, /* bunler_d */ @@ -1560,7 +1650,7 @@ 15, /* qlshi */ 52, /* qlshr_u */ 15, /* qlshi_u */ - 51, /* qrshr */ + 60, /* qrshr */ 15, /* qrshi */ 47, /* qrshr_u */ 15, /* qrshi_u */ @@ -1590,10 +1680,40 @@ 0, /* fnmai_d */ 31, /* fnmsr_d */ 0, /* fnmsi_d */ - 15, /* hmulr */ - 21, /* hmuli */ - 15, /* hmulr_u */ - 21, /* hmuli_u */ + 15, /* hmulr */ + 21, /* hmuli */ + 15, /* hmulr_u */ + 21, /* hmuli_u */ + 9, /* ldxbi_c */ + 9, /* ldxai_c */ + 9, /* ldxbi_uc */ + 9, /* ldxai_uc */ + 9, /* ldxbi_s */ + 9, /* ldxai_s */ + 9, /* ldxbi_us */ + 9, /* ldxai_us */ + 8, /* ldxbi_i */ + 8, /* ldxai_i */ + 0, /* ldxbi_ui */ + 0, /* ldxai_ui */ + 0, /* ldxbi_l */ + 0, /* ldxai_l */ + 10, /* ldxbi_f */ + 10, /* ldxai_f */ + 10, /* ldxbi_d */ + 10, /* ldxai_d */ + 11, /* stxbi_c */ + 11, /* stxai_c */ + 9, /* stxbi_s */ + 9, /* stxai_s */ + 8, /* stxbi_i */ + 8, /* stxai_i */ + 0, /* stxbi_l */ + 0, /* stxai_l */ + 10, /* stxbi_f */ + 10, /* stxai_f */ + 10, /* stxbi_d */ + 10, /* stxai_d */ #else #define JIT_INSTR_MAX 112 @@ -2125,6 +2245,66 @@ 27, /* hmuli */ 17, /* hmulr_u */ 27, /* hmuli_u */ + 8, /* ldxbr_c */ + 9, /* ldxbi_c */ + 8, /* ldxar_c */ + 9, /* ldxai_c */ + 8, /* ldxbr_uc */ + 9, /* ldxbi_uc */ + 8, /* ldxar_uc */ + 9, /* ldxai_uc */ + 8, /* ldxbr_s */ + 9, /* ldxbi_s */ + 8, /* ldxar_s */ + 9, /* ldxai_s */ + 8, /* ldxbr_us */ + 9, /* ldxbi_us */ + 8, /* ldxar_us */ + 9, /* ldxai_us */ + 7, /* ldxbr_i */ + 8, /* ldxbi_i */ + 7, /* ldxar_i */ + 8, /* ldxai_i */ + 7, /* ldxbr_ui */ + 8, /* ldxbi_ui */ + 7, /* ldxar_ui */ + 8, /* ldxai_ui */ + 7, /* ldxbr_l */ + 8, /* ldxbi_l */ + 7, /* ldxar_l */ + 8, /* ldxai_l */ + 9, /* ldxbr_f */ + 10, /* ldxbi_f */ + 9, /* ldxar_f */ + 10, /* ldxai_f */ + 9, /* ldxbr_d */ + 10, /* ldxbi_d */ + 9, /* ldxar_d */ + 10, /* ldxai_d */ + 7, /* stxbr_c */ + 8, /* stxbi_c */ + 7, /* stxar_c */ + 8, /* stxai_c */ + 8, /* stxbr_s */ + 9, /* stxbi_s */ + 8, /* stxar_s */ + 9, /* stxai_s */ + 7, /* stxbr_i */ + 8, /* stxbi_i */ + 7, /* stxar_i */ + 8, /* stxai_i */ + 7, /* stxbr_l */ + 8, /* stxbi_l */ + 7, /* stxar_l */ + 8, /* stxai_l */ + 9, /* stxbr_f */ + 10, /* stxbi_f */ + 9, /* stxar_f */ + 10, /* stxai_f */ + 9, /* stxbr_d */ + 10, /* stxbi_d */ + 9, /* stxar_d */ + 10, /* stxai_d */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index dd4fccd8b..58bf9ae2d 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1600,6 +1600,24 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrx(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrX(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_xrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_Xrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w), \ + rn(node->w.w)); \ + break #define case_rrrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1985,6 +2003,66 @@ _emit_code(jit_state_t *_jit) case jit_code_unldi_u: unldi_u(rn(node->u.w), node->v.w, node->w.w); break; + case_rrx(ldxb, _c); case_rrX(ldxb, _c); + case_rrx(ldxa, _c); case_rrX(ldxa, _c); + case_rrx(ldxb, _uc); case_rrX(ldxb, _uc); + case_rrx(ldxa, _uc); case_rrX(ldxa, _uc); + case_rrx(ldxb, _s); case_rrX(ldxb, _s); + case_rrx(ldxa, _s); case_rrX(ldxa, _s); + case_rrx(ldxb, _us); case_rrX(ldxb, _us); + case_rrx(ldxa, _us); case_rrX(ldxa, _us); + case_rrx(ldxb, _i); case_rrX(ldxb, _i); + case_rrx(ldxa, _i); case_rrX(ldxa, _i); +#if __WORDSIZE == 64 + case_rrx(ldxb, _ui); case_rrX(ldxb, _ui); + case_rrx(ldxa, _ui); case_rrX(ldxa, _ui); + case_rrx(ldxb, _l); case_rrX(ldxb, _l); + case_rrx(ldxa, _l); case_rrX(ldxa, _l); +#endif + case jit_code_ldxbr_f: + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + goto L_ldxbi_f; + case jit_code_ldxbi_f: + addi(rn(node->v.w), rn(node->v.w), node->w.w); + L_ldxbi_f: + if (jit_x87_reg_p(node->u.w)) + x87_ldr_f(rn(node->u.w), rn(node->v.w)); + else + sse_ldr_f(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_ldxar_f: + case jit_code_ldxai_f: + if (jit_x87_reg_p(node->u.w)) + x87_ldr_f(rn(node->u.w), rn(node->v.w)); + else + sse_ldr_f(rn(node->u.w), rn(node->v.w)); + if (node->code == jit_code_ldxai_f) + addi(rn(node->v.w), rn(node->v.w), node->w.w); + else + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + break; + case jit_code_ldxbr_d: + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + goto L_ldxbi_d; + case jit_code_ldxbi_d: + addi(rn(node->v.w), rn(node->v.w), node->w.w); + L_ldxbi_d: + if (jit_x87_reg_p(node->u.w)) + x87_ldr_d(rn(node->u.w), rn(node->v.w)); + else + sse_ldr_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_ldxar_d: + case jit_code_ldxai_d: + if (jit_x87_reg_p(node->u.w)) + x87_ldr_d(rn(node->u.w), rn(node->v.w)); + else + sse_ldr_d(rn(node->u.w), rn(node->v.w)); + if (node->code == jit_code_ldxai_d) + addi(rn(node->v.w), rn(node->v.w), node->w.w); + else + addr(rn(node->v.w), rn(node->v.w), rn(node->w.w)); + break; case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -2011,6 +2089,60 @@ _emit_code(jit_state_t *_jit) case jit_code_unsti: unsti(node->u.w, rn(node->v.w), node->w.w); break; + case_xrr(stxb, _c); case_Xrr(stxb, _c); + case_xrr(stxa, _c); case_Xrr(stxa, _c); + case_xrr(stxb, _s); case_Xrr(stxb, _s); + case_xrr(stxa, _s); case_Xrr(stxa, _s); + case_xrr(stxb, _i); case_Xrr(stxb, _i); + case_xrr(stxa, _i); case_Xrr(stxa, _i); +#if __WORDSIZE == 64 + case_xrr(stxb, _l); case_rrX(stxb, _l); + case_xrr(stxa, _l); case_rrX(stxa, _l); +#endif + case jit_code_stxbr_f: + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + goto L_stxbi_f; + case jit_code_stxbi_f: + addi(rn(node->v.w), rn(node->v.w), node->u.w); + L_stxbi_f: + if (jit_x87_reg_p(node->w.w)) + x87_str_f(rn(node->v.w), rn(node->w.w)); + else + sse_str_f(rn(node->v.w), rn(node->w.w)); + break; + case jit_code_stxar_f: + case jit_code_stxai_f: + if (jit_x87_reg_p(node->w.w)) + x87_str_f(rn(node->v.w), rn(node->w.w)); + else + sse_str_f(rn(node->v.w), rn(node->w.w)); + if (node->code == jit_code_stxai_f) + addi(rn(node->v.w), rn(node->v.w), node->u.w); + else + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + break; + case jit_code_stxbr_d: + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + goto L_stxbr_d; + case jit_code_stxbi_d: + addi(rn(node->v.w), rn(node->v.w), node->u.w); + L_stxbr_d: + if (jit_x87_reg_p(node->w.w)) + x87_str_d(rn(node->v.w), rn(node->w.w)); + else + sse_str_d(rn(node->v.w), rn(node->w.w)); + break; + case jit_code_stxar_d: + case jit_code_stxai_d: + if (jit_x87_reg_p(node->w.w)) + x87_str_d(rn(node->v.w), rn(node->w.w)); + else + sse_str_d(rn(node->v.w), rn(node->w.w)); + if (node->code == jit_code_stxai_d) + addi(rn(node->v.w), rn(node->v.w), node->u.w); + else + addr(rn(node->v.w), rn(node->v.w), rn(node->u.w)); + break; case_brr(blt,); case_brw(blt,); case_brr(blt, _u); @@ -2651,6 +2783,10 @@ _emit_code(jit_state_t *_jit) #undef case_wrr #undef case_frw #undef case_rrf +#undef case_xrr +#undef case_Xrr +#undef case_rrx +#undef case_rrX #undef case_rrw #undef case_frr #undef case_rrr diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index 646d9db5b..643c5f17e 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1737,6 +1737,46 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = jit_cc_a0_reg|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl; break; + case jit_code_ldxbi_c: case jit_code_ldxai_c: + case jit_code_ldxbi_uc: case jit_code_ldxai_uc: + case jit_code_ldxbi_s: case jit_code_ldxai_s: + case jit_code_ldxbi_us: case jit_code_ldxai_us: + case jit_code_ldxbi_i: case jit_code_ldxai_i: + case jit_code_ldxbi_ui: case jit_code_ldxai_ui: + case jit_code_ldxbi_l: case jit_code_ldxai_l: + case jit_code_ldxbi_f: case jit_code_ldxai_f: + case jit_code_ldxbi_d: case jit_code_ldxai_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_int; + break; + case jit_code_ldxbr_c: case jit_code_ldxar_c: + case jit_code_ldxbr_uc: case jit_code_ldxar_uc: + case jit_code_ldxbr_s: case jit_code_ldxar_s: + case jit_code_ldxbr_us: case jit_code_ldxar_us: + case jit_code_ldxbr_i: case jit_code_ldxar_i: + case jit_code_ldxbr_ui: case jit_code_ldxar_ui: + case jit_code_ldxbr_l: case jit_code_ldxar_l: + case jit_code_ldxbr_f: case jit_code_ldxar_f: + case jit_code_ldxbr_d: case jit_code_ldxar_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg; + break; + case jit_code_stxbi_c: case jit_code_stxai_c: + case jit_code_stxbi_s: case jit_code_stxai_s: + case jit_code_stxbi_i: case jit_code_stxai_i: + case jit_code_stxbi_l: case jit_code_stxai_l: + case jit_code_stxbi_f: case jit_code_stxai_f: + case jit_code_stxbi_d: case jit_code_stxai_d: + mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg; + break; + case jit_code_stxbr_c: case jit_code_stxar_c: + case jit_code_stxbr_s: case jit_code_stxar_s: + case jit_code_stxbr_i: case jit_code_stxar_i: + case jit_code_stxbr_l: case jit_code_stxar_l: + case jit_code_stxbr_f: case jit_code_stxar_f: + case jit_code_stxbr_d: case jit_code_stxar_d: + mask = jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg; + break; default: abort(); } @@ -1764,8 +1804,8 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address) default: #ifndef NDEBUG mask = jit_classify(instr->code); -#endif assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp); +#endif instr->u.p = address; } } @@ -1794,8 +1834,8 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) default: #ifndef NDEBUG mask = jit_classify(instr->code); -#endif assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp); +#endif assert(label->code == jit_code_label); instr->u.n = label; break; @@ -2580,15 +2620,18 @@ _jit_emit(jit_state_t *_jit) # endif # ifndef NDEBUG result = - mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC); # endif + mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC); assert(result == 0); } #endif /* HAVE_MMAP */ return (_jit->code.ptr); + +#if HAVE_MMAP fail: return (NULL); +#endif /* HAVE_MMAP */ } void @@ -4291,12 +4334,12 @@ static void _htoni_ul(jit_state_t*, jit_int32_t, jit_word_t); #endif # define movi_f_w(r0, i0) _movi_f_w(_jit, r0, i0) static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t); -#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI) -# define movi_d_ww(r0, r1, i0) _movi_d_ww(_jit, r0, r1, i0) -static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); -#else +#if __WORDSIZE == 64 # define movi_d_w(r0, i0) _movi_d_w(_jit, r0, i0) static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t); +#elif !(defined(__mips__) && NEW_ABI) +# define movi_d_ww(r0, r1, i0) _movi_d_ww(_jit, r0, r1, i0) +static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); #endif #define cloi(r0, i0) _cloi(_jit, r0, i0) static void _cloi(jit_state_t*, jit_int32_t, jit_word_t); @@ -4324,6 +4367,118 @@ static void _generic_unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); #define generic_unldi_u(r0, i0, i1) _generic_unldi_u(_jit, r0, i0, i1) static void _generic_unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +#define generic_ldxbr_c(r0, r1, r2) _generic_ldxbr_c(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxbi_c(r0, r1, i0) _generic_ldxbi_c(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxar_c(r0, r1, r2) _generic_ldxar_c(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxai_c(r0, r1, i0) _generic_ldxai_c(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxbr_uc(r0, r1, r2) _generic_ldxbr_uc(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxbi_uc(r0, r1, i0) _generic_ldxbi_uc(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxar_uc(r0, r1, r2) _generic_ldxar_uc(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxai_uc(r0, r1, i0) _generic_ldxai_uc(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxbr_s(r0, r1, r2) _generic_ldxbr_s(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxbi_s(r0, r1, i0) _generic_ldxbi_s(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxar_s(r0, r1, r2) _generic_ldxar_s(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxai_s(r0, r1, i0) _generic_ldxai_s(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxbr_us(r0, r1, r2) _generic_ldxbr_us(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxbi_us(r0, r1, i0) _generic_ldxbi_us(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxar_us(r0, r1, r2) _generic_ldxar_us(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxai_us(r0, r1, i0) _generic_ldxai_us(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxar_i(r0, r1, r2) _generic_ldxar_i(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxai_i(r0, r1, i0) _generic_ldxai_i(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_ldxbr_i(r0, r1, r2) _generic_ldxbr_i(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_ldxbi_i(r0, r1, i0) _generic_ldxbi_i(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#if __WORDSIZE == 64 +# define generic_ldxbr_ui(r0, r1, r2) _generic_ldxbr_ui(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxbi_ui(r0, r1, i0) _generic_ldxbi_ui(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxar_ui(r0, r1, r2) _generic_ldxar_ui(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxai_ui(r0, r1, i0) _generic_ldxai_ui(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxbr_l(r0, r1, i0) _generic_ldxbr_l(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxbi_l(r0, r1, i0) _generic_ldxbi_l(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxar_l(r0, r1, i0) _generic_ldxar_l(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxar_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxai_l(r0, r1, i0) _generic_ldxai_l(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#endif +#if !defined(__i386__) && !defined(__x86_64__) +# define generic_ldxbr_f(r0, r1, r2) _generic_ldxbr_f(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxbr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxbi_f(r0, r1, i0) _generic_ldxbi_f(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxar_f(r0, r1, r2) _generic_ldxai_f(_jit, r0, r1, r2) +static maybe_unused +void _generic_ldxar_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxai_f(r0, r1, i0) _generic_ldxai_f(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxbr_d(r0, r1, i0) _generic_ldxbr_d(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxbi_d(r0, r1, i0) _generic_ldxbi_d(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxbi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define generic_ldxar_d(r0, r1, i0) _generic_ldxar_d(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxar_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_ldxai_d(r0, r1, i0) _generic_ldxai_d(_jit, r0, r1, i0) +static maybe_unused +void _generic_ldxai_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#endif #define generic_unstr(r0, r1, i0) _generic_unstr(_jit, r0, r1, i0) static void _generic_unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); #define generic_unsti(i0, r0, i1) _generic_unsti(_jit, i0, r0, i1) @@ -4340,6 +4495,82 @@ static void _generic_unstr_x(jit_state_t*, # define generic_unsti_x(i0, r0, i1) _generic_unsti_x(_jit, i0, r0, i1) static void _generic_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); #endif +#define generic_stxbr_c(r0, r1, r2) _generic_stxbr_c(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxbi_c(i0, r0, r1) _generic_stxbi_c(_jit,i0, r0, r1) +static maybe_unused +void _generic_stxbi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#define generic_stxar_c(r0, r1, r2) _generic_stxar_c(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxai_c(i0, r0, r1) _generic_stxai_c(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#define generic_stxbr_s(r0, r1, r2) _generic_stxbr_s(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxbi_s(i0, r0, r1) _generic_stxbi_s(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxbi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#define generic_stxar_s(r0, r1, r2) _generic_stxar_s(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxai_s(i0, r0, r1) _generic_stxai_s(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#define generic_stxbr_i(r0, r1, r2) _generic_stxbr_i(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxbi_i(i0, r0, r1) _generic_stxbi_i(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxbi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#define generic_stxar_i(r0, r1, r2) _generic_stxar_i(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define generic_stxai_i(i0, r0, r1) _generic_stxai_i(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#if __WORDSIZE == 64 +# define generic_stxbr_l(r0, r1, r2) _generic_stxbr_l(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxbi_l(i0, r0, r1) _generic_stxbi_l(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxbi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define generic_stxar_l(r0, r1, r2) _generic_stxar_l(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxai_l(i0, r0, r1) _generic_stxai_l(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#endif +#if !defined(__i386__) && !defined(__x86_64__) +# define generic_stxbr_f(r0, r1, r2) _generic_stxbr_f(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxbi_f(i0, r0, r1) _generic_stxbi_f(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxbi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define generic_stxar_f(r0, r1, r2) _generic_stxar_f(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxai_f(i0, r0, r1) _generic_stxai_f(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define generic_stxbr_d(r0, r1, r2) _generic_stxbr_d(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxbr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxbi_d(i0, r0, r1) _generic_stxbi_d(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxbi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +# define generic_stxar_d(r0, r1, r2) _generic_stxar_d(_jit, r0, r1, r2) +static maybe_unused +void _generic_stxar_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define generic_stxai_d(i0, r0, r1) _generic_stxai_d(_jit, i0, r0, r1) +static maybe_unused +void _generic_stxai_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +#endif #define patch_alist(revert) _patch_alist(_jit, revert) static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert); @@ -4367,6 +4598,8 @@ static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert); # include "jit_riscv.c" #elif defined(__loongarch__) # include "jit_loongarch.c" +#elif defined(__sh__) +# include "jit_sh.c" #endif static maybe_unused void @@ -4575,7 +4808,23 @@ _movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) movi(r0, data.i); } -#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI) +#if __WORDSIZE == 64 +static void +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + union { + jit_int64_t l; + jit_float64_t d; + } data; + data.d = i0; +# if defined(__ia64__) + /* Should be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); +# endif + movi(r0, data.l); +} +#elif !(defined(__mips__) && NEW_ABI) static void _movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) { @@ -4593,23 +4842,6 @@ _movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) movi(r0, data.i[1]); # endif } - -#else -static void -_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) -{ - union { - jit_int64_t l; - jit_float64_t d; - } data; - data.d = i0; -# if defined(__ia64__) - /* Should be used only in this case (with out0 == 120) */ - if (r0 >= 120) - r0 = _jitc->rout + (r0 - 120); -# endif - movi(r0, data.l); -} #endif void @@ -5474,6 +5706,120 @@ _generic_unsti_x(jit_state_t *_jit, } #endif +#define def_ldxbr_T(T) \ +static void \ +_generic_ldxbr_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + addr(r1, r1, r2); \ + ldr_##T(r0, r1); \ +} +#define def_ldxbi_T(T) \ +static void \ +_generic_ldxbi_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) \ +{ \ + addi(r1, r1, i0); \ + ldr_##T(r0, r1); \ +} +def_ldxbr_T(c) def_ldxbi_T(c) +def_ldxbr_T(uc) def_ldxbi_T(uc) +def_ldxbr_T(s) def_ldxbi_T(s) +def_ldxbr_T(us) def_ldxbi_T(us) +def_ldxbi_T(i) def_ldxbr_T(i) +#if __WORDSIZE == 64 +def_ldxbr_T(ui) def_ldxbi_T(ui) +def_ldxbr_T(l) def_ldxbi_T(l) +#endif +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) +def_ldxbr_T(f) def_ldxbi_T(f) +def_ldxbr_T(d) def_ldxbi_T(d) +#endif + +#define def_ldxar_T(T) \ +static void \ +_generic_ldxar_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + ldr_##T(r0, r1); \ + addr(r1, r1, r2); \ +} +#define def_ldxai_T(T) \ +static void \ +_generic_ldxai_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) \ +{ \ + ldr_##T(r0, r1); \ + addi(r1, r1, i0); \ +} +def_ldxar_T(c) def_ldxai_T(c) +def_ldxar_T(uc) def_ldxai_T(uc) +def_ldxar_T(s) def_ldxai_T(s) +def_ldxar_T(us) def_ldxai_T(us) +def_ldxar_T(i) def_ldxai_T(i) +#if __WORDSIZE == 64 +def_ldxar_T(ui) def_ldxai_T(ui) +def_ldxar_T(l) def_ldxai_T(l) +#endif +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) +def_ldxar_T(f) def_ldxai_T(f) +def_ldxar_T(d) def_ldxai_T(d) +#endif + +#define def_stxbr_T(T) \ +static void \ +_generic_stxbr_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + addr(r1, r1, r0); \ + str_##T(r1, r2); \ +} +#define def_stxbi_T(T) \ +static void \ +_generic_stxbi_##T(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) \ +{ \ + addi(r0, r0, i0); \ + str_##T(r0, r1); \ +} +def_stxbr_T(c) def_stxbi_T(c) +def_stxbr_T(s) def_stxbi_T(s) +def_stxbr_T(i) def_stxbi_T(i) +#if __WORDSIZE == 64 +def_stxbr_T(l) def_stxbi_T(l) +#endif +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) +def_stxbr_T(f) def_stxbi_T(f) +def_stxbr_T(d) def_stxbi_T(d) +#endif + +#define def_stxar_T(T) \ +static void \ +_generic_stxar_##T(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + str_##T(r1, r2); \ + addr(r1, r1, r0); \ +} +#define def_stxai_T(T) \ +static void \ +_generic_stxai_##T(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) \ +{ \ + str_##T(r0, r1); \ + addi(r0, r0, i0); \ +} +def_stxar_T(c) def_stxai_T(c) +def_stxar_T(s) def_stxai_T(s) +def_stxar_T(i) def_stxai_T(i) +#if __WORDSIZE == 64 +def_stxar_T(l) def_stxai_T(l) +#endif +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) +def_stxar_T(f) def_stxai_T(f) +def_stxar_T(d) def_stxai_T(d) +#endif + #if defined(stack_framesize) static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert) diff --git a/include/lightning/lightning.h b/include/lightning/lightning.h index b6b7bbea7..f669f68cf 100644 --- a/include/lightning/lightning.h +++ b/include/lightning/lightning.h @@ -154,6 +154,8 @@ typedef jit_int32_t jit_fpr_t; # include #elif defined(__loongarch__) # include +#elif defined(__sh__) +# include #endif #define jit_flag_node 0x0001 /* patch node not absolute */