diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp index c466efdb9e1..dd936b41b8a 100644 --- a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp @@ -970,6 +970,10 @@ class Assembler : public AbstractAssembler { vshuf_h_op = 0b01110001011110101, vshuf_w_op = 0b01110001011110110, vshuf_d_op = 0b01110001011110111, + vslti_b_op = 0b01110010100001100, + vslti_h_op = 0b01110010100001101, + vslti_w_op = 0b01110010100001110, + vslti_d_op = 0b01110010100001111, vslti_bu_op = 0b01110010100010000, vslti_hu_op = 0b01110010100010001, vslti_wu_op = 0b01110010100010010, @@ -1146,6 +1150,10 @@ class Assembler : public AbstractAssembler { xvshuf_w_op = 0b01110101011110110, xvshuf_d_op = 0b01110101011110111, xvperm_w_op = 0b01110101011111010, + xvslti_b_op = 0b01110110100001100, + xvslti_h_op = 0b01110110100001101, + xvslti_w_op = 0b01110110100001110, + xvslti_d_op = 0b01110110100001111, xvslti_bu_op = 0b01110110100010000, xvslti_hu_op = 0b01110110100010001, xvslti_wu_op = 0b01110110100010010, @@ -1237,6 +1245,14 @@ class Assembler : public AbstractAssembler { unknow_ops14 = 0b11111111111111 }; + // 13-bit opcode, highest 13 bits: bits[31...19] + enum ops13 { + vldrepl_d_op = 0b0011000000010, + xvldrepl_d_op = 0b0011001000010, + + unknow_ops13 = 0b1111111111111 + }; + // 12-bit opcode, highest 12 bits: bits[31...20] enum ops12 { fmadd_s_op = 0b000010000001, @@ -1274,10 +1290,20 @@ class Assembler : public AbstractAssembler { xvbitsel_v_op = 0b000011010010, vshuf_b_op = 0b000011010101, xvshuf_b_op = 0b000011010110, + vldrepl_w_op = 0b001100000010, + xvldrepl_w_op = 0b001100100010, unknow_ops12 = 0b111111111111 }; + // 11-bit opcode, highest 11 bits: bits[31...21] + enum ops11 { + vldrepl_h_op = 0b00110000010, + xvldrepl_h_op = 0b00110010010, + + unknow_ops11 = 0b11111111111 + }; + // 10-bit opcode, highest 10 bits: bits[31...22] enum ops10 { bstr_w_op = 0b0000000001, @@ -1313,6 +1339,8 @@ class Assembler : public AbstractAssembler { xvst_op = 0b0010110011, ldl_w_op = 0b0010111000, ldr_w_op = 0b0010111001, + vldrepl_b_op = 0b0011000010, + xvldrepl_b_op = 0b0011001010, unknow_ops10 = 0b1111111111 }; @@ -1491,10 +1519,25 @@ class Assembler : public AbstractAssembler { // | opcode | I8 | rj | rd | static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } + // 2RI9-type + // 31 19 18 10 9 5 4 0 + // | opcode | I9 | rj | vd | + static inline int insn_I9RR(int op, int imm9, int rj, int vd) { return (op<<19) | (low(imm9, 9)<<10) | (rj<<5) | vd; } + + // 2RI10-type + // 31 20 19 10 9 5 4 0 + // | opcode | I10 | rj | vd | + static inline int insn_I10RR(int op, int imm10, int rj, int vd) { return (op<<20) | (low(imm10, 10)<<10) | (rj<<5) | vd; } + + // 2RI11-type + // 31 21 20 10 9 5 4 0 + // | opcode | I11 | rj | vd | + static inline int insn_I11RR(int op, int imm11, int rj, int vd) { return (op<<21) | (low(imm11, 11)<<10) | (rj<<5) | vd; } + // 2RI12-type // 31 22 21 10 9 5 4 0 // | opcode | I12 | rj | rd | - static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } + static inline int insn_I12RR(int op, int imm12, int rj, int rd) { return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } // 2RI14-type // 31 24 23 10 9 5 4 0 @@ -2898,6 +2941,15 @@ class Assembler : public AbstractAssembler { void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void vslti_b(FloatRegister vd, FloatRegister vj, int si5) { ASSERT_LSX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR( vslti_b_op, si5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_h(FloatRegister vd, FloatRegister vj, int si5) { ASSERT_LSX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR( vslti_h_op, si5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_w(FloatRegister vd, FloatRegister vj, int si5) { ASSERT_LSX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR( vslti_w_op, si5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_d(FloatRegister vd, FloatRegister vj, int si5) { ASSERT_LSX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR( vslti_d_op, si5, (int)vj->encoding(), (int)vd->encoding())); } + void xvslti_b(FloatRegister xd, FloatRegister xj, int si5) { ASSERT_LASX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR(xvslti_b_op, si5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_h(FloatRegister xd, FloatRegister xj, int si5) { ASSERT_LASX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR(xvslti_h_op, si5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_w(FloatRegister xd, FloatRegister xj, int si5) { ASSERT_LASX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR(xvslti_w_op, si5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_d(FloatRegister xd, FloatRegister xj, int si5) { ASSERT_LASX assert(is_simm(si5, 5), "not a signed 5-bit int"); emit_int32(insn_I5RR(xvslti_d_op, si5, (int)xj->encoding(), (int)xd->encoding())); } + void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } @@ -3136,6 +3188,15 @@ class Assembler : public AbstractAssembler { void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } + void vldrepl_d(FloatRegister vd, Register rj, int si9) { ASSERT_LSX assert(is_simm(si9, 9), "not a signed 9-bit int"); emit_int32(insn_I9RR( vldrepl_d_op, si9, (int)rj->encoding(), (int)vd->encoding()));} + void vldrepl_w(FloatRegister vd, Register rj, int si10) { ASSERT_LSX assert(is_simm(si10, 10), "not a signed 10-bit int"); emit_int32(insn_I10RR( vldrepl_w_op, si10, (int)rj->encoding(), (int)vd->encoding()));} + void vldrepl_h(FloatRegister vd, Register rj, int si11) { ASSERT_LSX assert(is_simm(si11, 11), "not a signed 11-bit int"); emit_int32(insn_I11RR( vldrepl_h_op, si11, (int)rj->encoding(), (int)vd->encoding()));} + void vldrepl_b(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vldrepl_b_op, si12, (int)rj->encoding(), (int)vd->encoding()));} + void xvldrepl_d(FloatRegister xd, Register rj, int si9) { ASSERT_LASX assert(is_simm(si9, 9), "not a signed 9-bit int"); emit_int32(insn_I9RR(xvldrepl_d_op, si9, (int)rj->encoding(), (int)xd->encoding()));} + void xvldrepl_w(FloatRegister xd, Register rj, int si10) { ASSERT_LASX assert(is_simm(si10, 10), "not a signed 10-bit int"); emit_int32(insn_I10RR(xvldrepl_w_op, si10, (int)rj->encoding(), (int)xd->encoding()));} + void xvldrepl_h(FloatRegister xd, Register rj, int si11) { ASSERT_LASX assert(is_simm(si11, 11), "not a signed 11-bit int"); emit_int32(insn_I11RR(xvldrepl_h_op, si11, (int)rj->encoding(), (int)xd->encoding()));} + void xvldrepl_b(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvldrepl_b_op, si12, (int)rj->encoding(), (int)xd->encoding()));} + #undef ASSERT_LSX #undef ASSERT_LASX diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp index 62db2bc609f..d9377db5503 100644 --- a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp @@ -475,7 +475,7 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, assert_different_registers(addr.base(), expected, tmp1, tmp2); assert_different_registers(addr.base(), new_val, tmp1, tmp2); - Label step4, done_succ, done_fail, done; + Label step4, done_succ, done_fail, done, is_null; // There are two ways to reach this label. Initial entry into the // cmpxchg_oop code expansion starts at step1 (which is equivalent @@ -544,13 +544,15 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, __ move(tmp1, tmp2); if (is_narrow) { + __ beqz(tmp1, is_null); // Decode tmp1 in order to resolve its forward pointer - __ decode_heap_oop(tmp1); - } - resolve_forward_pointer(masm, tmp1); - if (is_narrow) { + __ decode_heap_oop_not_null(tmp1); + resolve_forward_pointer_not_null(masm, tmp1); // Encode tmp1 to compare against expected. - __ encode_heap_oop(tmp1); + __ encode_heap_oop_not_null(tmp1); + __ bind(is_null); + } else { + resolve_forward_pointer(masm, tmp1); } // Does forwarded value of fetched from-space pointer match original diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad index 8dc0ce5a62b..2608ef13576 100644 --- a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad @@ -29,16 +29,16 @@ source_hpp %{ %} encode %{ - enc_class loongarch_enc_cmpxchg_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{ + enc_class loongarch_enc_cmpxchg_oop_shenandoah(indirect mem, mRegP oldval, mRegP newval, mRegI res) %{ MacroAssembler _masm(&cbuf); - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ false, $res$$Register); %} - enc_class loongarch_enc_cmpxchg_acq_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{ + enc_class loongarch_enc_cmpxchg_acq_oop_shenandoah(indirect mem, mRegP oldval, mRegP newval, mRegI res) %{ MacroAssembler _masm(&cbuf); - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ false, $res$$Register); %} @@ -64,7 +64,7 @@ instruct compareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ false, $res$$Register); %} @@ -92,7 +92,7 @@ instruct compareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval, mR %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ false, $res$$Register); %} @@ -108,7 +108,7 @@ instruct compareAndExchangeN_shenandoah(mRegN res, indirect mem, mRegN oldval, m %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ true, $res$$Register); %} @@ -124,7 +124,7 @@ instruct compareAndExchangeP_shenandoah(mRegP res, indirect mem, mRegP oldval, m %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ true, $res$$Register); %} @@ -140,7 +140,7 @@ instruct compareAndExchangeNAcq_shenandoah(mRegN res, indirect mem, mRegN oldval %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ true, $res$$Register); %} @@ -156,7 +156,7 @@ instruct compareAndExchangePAcq_shenandoah(mRegP res, indirect mem, mRegP oldval %} ins_encode %{ - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ true, $res$$Register); %} @@ -172,7 +172,7 @@ instruct weakCompareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, m %} ins_encode %{ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ false, $res$$Register); %} @@ -189,7 +189,7 @@ instruct weakCompareAndSwapP_shenandoah(mRegI res, indirect mem, mRegP oldval, m ins_encode %{ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ false, /*is_cae*/ false, $res$$Register); %} @@ -206,7 +206,7 @@ instruct weakCompareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval ins_encode %{ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ false, $res$$Register); %} @@ -223,7 +223,7 @@ instruct weakCompareAndSwapPAcq_shenandoah(mRegI res, indirect mem, mRegP oldval ins_encode %{ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop - Address addr(as_Register($mem$$base), $mem$$disp); + Address addr(as_Register($mem$$base), 0); ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, addr, $oldval$$Register, $newval$$Register, /*acquire*/ true, /*is_cae*/ false, $res$$Register); %} diff --git a/src/hotspot/cpu/loongarch/gc/x/x_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/x/x_loongarch_64.ad index fdb66075606..c4e77f08b4f 100644 --- a/src/hotspot/cpu/loongarch/gc/x/x_loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/gc/x/x_loongarch_64.ad @@ -56,12 +56,12 @@ static void x_compare_and_swap(MacroAssembler& _masm, const MachNode* node, // Weak CAS operations are thus only emitted if the barrier is elided. Address addr(mem); if (node->barrier_data() == XLoadBarrierElided) { - __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */, + __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* acquire */, weak /* weak */, false /* exchange */); __ move(res, tmp); } else { __ move(tmp, oldval); - __ cmpxchg(addr, tmp, newval, AT, true /* retold */, acquire /* barrier */, + __ cmpxchg(addr, tmp, newval, AT, true /* retold */, acquire /* acquire */, false /* weak */, false /* exchange */); __ move(res, AT); @@ -70,7 +70,7 @@ static void x_compare_and_swap(MacroAssembler& _masm, const MachNode* node, __ andr(AT, AT, tmp); __ beqz(AT, good); x_load_barrier_slow_path(_masm, node, addr, tmp, res /* used as tmp */); - __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */, weak /* weak */, false /* exchange */); + __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* acquire */, weak /* weak */, false /* exchange */); __ move(res, tmp); __ bind(good); } diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad index 0b5c7ebe686..9a3ef92d0ce 100644 --- a/src/hotspot/cpu/loongarch/loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad @@ -981,6 +981,8 @@ const bool Matcher::match_rule_supported(int opcode) { case Op_ConvF2HF: case Op_ConvHF2F: case Op_StrInflatedCopy: + case Op_StrCompressedCopy: + case Op_EncodeISOArray: if (!UseLSX) return false; case Op_PopCountI: @@ -8789,19 +8791,25 @@ instruct count_positives(mRegP src, mRegI len, mRegI result, %} // fast char[] to byte[] compression -instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, mRegI result, - mRegL tmp1, mRegL tmp2, mRegL tmp3) +instruct string_compress(a2_RegP src, mRegP dst, mRegI len, mRegI result, + mRegL tmp1, mRegL tmp2, mRegL tmp3, + regF vtemp1, regF vtemp2, regF vtemp3, regF vtemp4) %{ + predicate(UseLSX); match(Set result (StrCompressedCopy src (Binary dst len))); - effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, + TEMP vtemp1, TEMP vtemp2, TEMP vtemp3, TEMP vtemp4, USE_KILL src); format %{ "String Compress $src,$dst -> $result @ string_compress " %} + ins_encode %{ __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp1$$Register, - $tmp2$$Register, $tmp3$$Register); + $tmp2$$Register, $tmp3$$Register, + $vtemp1$$FloatRegister, $vtemp2$$FloatRegister, + $vtemp3$$FloatRegister, $vtemp4$$FloatRegister); %} + ins_pipe( pipe_slow ); %} @@ -8872,38 +8880,46 @@ instruct array_equalsC(a4_RegP ary1, a5_RegP ary2, mRegI result, mRegL tmp0, mRe %} // encode char[] to byte[] in ISO_8859_1 -instruct encode_iso_array(a4_RegP src, a5_RegP dst, mA6RegI len, mRegI result, - mRegL tmp1, mRegL tmp2, mRegL tmp3) +instruct encode_iso_array(a2_RegP src, mRegP dst, mRegI len, mRegI result, + mRegL tmp1, mRegL tmp2, mRegL tmp3, + regF vtemp1, regF vtemp2, regF vtemp3, regF vtemp4) %{ - predicate(!((EncodeISOArrayNode*)n)->is_ascii()); + predicate(UseLSX && !((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); - effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, + TEMP vtemp1, TEMP vtemp2, TEMP vtemp3, TEMP vtemp4, USE_KILL src); format %{ "Encode ISO array $src,$dst,$len -> $result @ encode_iso_array" %} + ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp1$$Register, - $tmp2$$Register, $tmp3$$Register, false); + $tmp2$$Register, $tmp3$$Register, false, + $vtemp1$$FloatRegister, $vtemp2$$FloatRegister, + $vtemp3$$FloatRegister, $vtemp4$$FloatRegister); %} ins_pipe( pipe_slow ); %} // encode char[] to byte[] in ASCII -instruct encode_ascii_array(a4_RegP src, a5_RegP dst, mA6RegI len, mRegI result, - mRegL tmp1, mRegL tmp2, mRegL tmp3) +instruct encode_ascii_array(a2_RegP src, mRegP dst, mRegI len, mRegI result, + mRegL tmp1, mRegL tmp2, mRegL tmp3, + regF vtemp1, regF vtemp2, regF vtemp3, regF vtemp4) %{ - predicate(((EncodeISOArrayNode*)n)->is_ascii()); + predicate(UseLSX && ((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); - effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, + TEMP vtemp1, TEMP vtemp2, TEMP vtemp3, TEMP vtemp4, USE_KILL src); format %{ "Encode ASCII array $src,$dst,$len -> $result @ encode_ascii_array" %} + ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp1$$Register, - $tmp2$$Register, $tmp3$$Register, true); + $tmp2$$Register, $tmp3$$Register, true, + $vtemp1$$FloatRegister, $vtemp2$$FloatRegister, + $vtemp3$$FloatRegister, $vtemp4$$FloatRegister); %} ins_pipe( pipe_slow ); @@ -10833,6 +10849,28 @@ instruct convHF2F_reg_reg(regF dst, mRegI src, regF tmp) %{ ins_pipe(pipe_slow); %} +instruct round_float_reg(mRegI dst, regF src, mRegL tmp) +%{ + match(Set dst (RoundF src)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "round_float $dst, $src\t# @round_float_reg" %} + ins_encode %{ + __ java_round_float($dst$$Register, $src$$FloatRegister, $tmp$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct round_double_reg(mRegL dst, regD src, mRegL tmp) +%{ + match(Set dst (RoundD src)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "round_double $dst, $src\t# @round_double_reg" %} + ins_encode %{ + __ java_round_double($dst$$Register, $src$$FloatRegister, $tmp$$Register); + %} + ins_pipe( pipe_slow ); +%} + instruct roundD(regD dst, regD src, immI rmode) %{ predicate(UseLSX); match(Set dst (RoundDoubleMode src rmode)); @@ -12676,26 +12714,6 @@ instruct safePoint_poll_tls(mRegP poll) %{ ins_pipe( pipe_serial ); %} -//----------Arithmetic Conversion Instructions--------------------------------- - -instruct roundFloat_nop(regF dst) -%{ - match(Set dst (RoundFloat dst)); - - ins_cost(0); - ins_encode(); - ins_pipe( empty ); -%} - -instruct roundDouble_nop(regD dst) -%{ - match(Set dst (RoundDouble dst)); - - ins_cost(0); - ins_encode(); - ins_pipe( empty ); -%} - //----------BSWAP Instructions------------------------------------------------- instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{ match(Set dst (ReverseBytesI src)); @@ -14383,6 +14401,64 @@ instruct reduceVD(regD dst, regD src, vReg vsrc, vReg tmp) %{ ins_pipe( pipe_slow ); %} +// ------------------------------ Vector Round --------------------------------- + +instruct round_float_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ + predicate(Matcher::vector_length_in_bytes(n) <= 16); + match(Set dst (RoundVF src)); + effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); + format %{ "round_float_lsx $dst, $src\t# @round_float_lsx" %} + ins_encode %{ + __ java_round_float_lsx($dst$$FloatRegister, + $src$$FloatRegister, + $vtemp1$$FloatRegister, + $vtemp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct round_float_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ + predicate(Matcher::vector_length_in_bytes(n) > 16); + match(Set dst (RoundVF src)); + effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); + format %{ "round_float_lasx $dst, $src\t# @round_float_lasx" %} + ins_encode %{ + __ java_round_float_lasx($dst$$FloatRegister, + $src$$FloatRegister, + $vtemp1$$FloatRegister, + $vtemp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct round_double_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ + predicate(Matcher::vector_length_in_bytes(n) <= 16); + match(Set dst (RoundVD src)); + effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); + format %{ "round_double_lsx $dst, $src\t# @round_double_lsx" %} + ins_encode %{ + __ java_round_double_lsx($dst$$FloatRegister, + $src$$FloatRegister, + $vtemp1$$FloatRegister, + $vtemp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct round_double_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{ + predicate(Matcher::vector_length_in_bytes(n) > 16); + match(Set dst (RoundVD src)); + effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2); + format %{ "round_double_lasx $dst, $src\t# @round_double_lasx" %} + ins_encode %{ + __ java_round_double_lasx($dst$$FloatRegister, + $src$$FloatRegister, + $vtemp1$$FloatRegister, + $vtemp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + // ------------------------------ RoundDoubleModeV ---------------------------- instruct roundVD(vReg dst, vReg src, immI rmode) %{ diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp index 27e8f2de13b..b7b7ece47e8 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp @@ -3538,65 +3538,13 @@ void MacroAssembler::count_positives(Register src, Register len, Register result // jtreg: TestStringIntrinsicRangeChecks.java void MacroAssembler::char_array_compress(Register src, Register dst, Register len, Register result, - Register tmp1, Register tmp2, - Register tmp3) { - Label Loop, Done, Once, Fail; - - move(result, len); - bge(R0, result, Done); - - srli_w(AT, len, 2); - andi(len, len, 3); - - li(tmp3, 0xff00ff00ff00ff00); - - bind(Loop); - beqz(AT, Once); - ld_d(tmp1, src, 0); - andr(tmp2, tmp3, tmp1); // not latin-1, stop here - bnez(tmp2, Fail); - - // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 - srli_d(tmp2, tmp1, 8); - orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 - bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 - slli_d(tmp1, tmp1, 16); // 0xa1b20000 - bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 - - st_w(tmp1, dst, 0); - addi_w(AT, AT, -1); - addi_d(dst, dst, 4); - addi_d(src, src, 8); - b(Loop); - - bind(Once); - beqz(len, Done); - ld_d(AT, src, 0); - - bstrpick_d(tmp1, AT, 15, 0); - andr(tmp2, tmp3, tmp1); - bnez(tmp2, Fail); - st_b(tmp1, dst, 0); - addi_w(len, len, -1); - - beqz(len, Done); - bstrpick_d(tmp1, AT, 31, 16); - andr(tmp2, tmp3, tmp1); - bnez(tmp2, Fail); - st_b(tmp1, dst, 1); - addi_w(len, len, -1); - - beqz(len, Done); - bstrpick_d(tmp1, AT, 47, 32); - andr(tmp2, tmp3, tmp1); - bnez(tmp2, Fail); - st_b(tmp1, dst, 2); - b(Done); - - bind(Fail); - move(result, R0); - - bind(Done); + Register tmp1, Register tmp2, Register tmp3, + FloatRegister vtemp1, FloatRegister vtemp2, + FloatRegister vtemp3, FloatRegister vtemp4) { + encode_iso_array(src, dst, len, result, tmp1, tmp2, tmp3, false, vtemp1, vtemp2, vtemp3, vtemp4); + // Adjust result: result == len ? len : 0 + sub_w(tmp1, result, len); + masknez(result, result, tmp1); } // Inflate byte[] to char[]. len must be positive int. @@ -3690,20 +3638,51 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, Register result, Register tmp1, Register tmp2, - Register tmp3, bool ascii) { - Label Loop, Done, Once; + Register tmp3, bool ascii, + FloatRegister vtemp1, FloatRegister vtemp2, + FloatRegister vtemp3, FloatRegister vtemp4) { + const FloatRegister shuf_index = vtemp3; + const FloatRegister latin_mask = vtemp4; - move(result, R0); // init in case of bad value - bge(R0, len, Done); + Label Deal8, Loop8, Loop32, Done, Once; - srai_w(AT, len, 2); + move(result, R0); // init in case of bad value + bge(R0, len, Done); li(tmp3, ascii ? 0xff80ff80ff80ff80 : 0xff00ff00ff00ff00); + srai_w(AT, len, 4); + beqz(AT, Deal8); - bind(Loop); + li(tmp1, StubRoutines::la::string_compress_index()); + vld(shuf_index, tmp1, 0); + vreplgr2vr_d(latin_mask, tmp3); + + bind(Loop32); + beqz(AT, Deal8); + + vld(vtemp1, src, 0); + vld(vtemp2, src, 16); + addi_w(AT, AT, -1); + + vor_v(fscratch, vtemp1, vtemp2); + vand_v(fscratch, fscratch, latin_mask); + vseteqz_v(FCC0, fscratch); // not latin-1, apply slow path + bceqz(FCC0, Once); + + vshuf_b(fscratch, vtemp2, vtemp1, shuf_index); + + vstx(fscratch, dst, result); + addi_d(src, src, 32); + addi_w(result, result, 16); + b(Loop32); + + bind(Deal8); + bstrpick_w(AT, len, 3, 2); + + bind(Loop8); beqz(AT, Once); ld_d(tmp1, src, 0); - andr(tmp2, tmp3, tmp1); // not latin-1, stop here + andr(tmp2, tmp3, tmp1); // not latin-1, apply slow path bnez(tmp2, Once); // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 @@ -3717,12 +3696,12 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, addi_w(AT, AT, -1); addi_d(src, src, 8); addi_w(result, result, 4); - b(Loop); + b(Loop8); bind(Once); beq(len, result, Done); ld_hu(tmp1, src, 0); - andr(tmp2, tmp3, tmp1); // not latin-1, stop here + andr(tmp2, tmp3, tmp1); // not latin-1, stop here bnez(tmp2, Done); stx_b(tmp1, dst, result); addi_d(src, src, 2); @@ -3732,6 +3711,144 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, bind(Done); } +// Math.round employs the ties-to-positive round mode, +// which is not a typically conversion method defined +// in the IEEE-754-2008. For single-precision floatings, +// the following algorithm can be used to effectively +// implement rounding via standard operations. +// +// if src >= 0: +// dst = floor(src + 0.49999997f) +// else: +// dst = floor(src + 0.5f) +void MacroAssembler::java_round_float(Register dst, + FloatRegister src, + Register tmp) { + block_comment("java_round_float: { "); + li(AT, StubRoutines::la::round_float_imm()); + + movfr2gr_s(tmp, src); + bstrpick_w(tmp, tmp, 31, 31); + slli_w(tmp, tmp, 2); + fldx_s(fscratch, AT, tmp); + fadd_s(fscratch, fscratch, src); + + ftintrm_w_s(fscratch, fscratch); + movfr2gr_s(dst, fscratch); + block_comment("} java_round_float"); +} + +void MacroAssembler::java_round_float_lsx(FloatRegister dst, + FloatRegister src, + FloatRegister vtemp1, + FloatRegister vtemp2) { + block_comment("java_round_float_lsx: { "); + li(AT, StubRoutines::la::round_float_imm()); + + vldrepl_w(vtemp2, AT, 1); // repl 0.5f + vslti_w(fscratch, src, 0); // masked add + vand_v(vtemp2, fscratch, vtemp2); + vfadd_s(dst, src, vtemp2); + + vldrepl_w(vtemp1, AT, 0); // repl 0.49999997f + vnor_v(fscratch, fscratch, fscratch); // rev mask + vand_v(vtemp1, fscratch, vtemp1); + vfadd_s(dst, dst, vtemp1); + + vftintrm_w_s(dst, dst); + block_comment("} java_round_float_lsx"); +} + +void MacroAssembler::java_round_float_lasx(FloatRegister dst, + FloatRegister src, + FloatRegister vtemp1, + FloatRegister vtemp2) { + block_comment("java_round_float_lasx: { "); + li(AT, StubRoutines::la::round_float_imm()); + + xvldrepl_w(vtemp2, AT, 1); // repl 0.5f + xvslti_w(fscratch, src, 0); // masked add + xvand_v(vtemp2, fscratch, vtemp2); + xvfadd_s(dst, src, vtemp2); + + xvldrepl_w(vtemp1, AT, 0); // repl 0.49999997f + xvnor_v(fscratch, fscratch, fscratch); // rev mask + xvand_v(vtemp1, fscratch, vtemp1); + xvfadd_s(dst, dst, vtemp1); + + xvftintrm_w_s(dst, dst); + block_comment("} java_round_float_lasx"); +} + +// Math.round employs the ties-to-positive round mode, +// which is not a typically conversion method defined +// in the IEEE-754-2008. For double-precision floatings, +// the following algorithm can be used to effectively +// implement rounding via standard operations. +// +// if src >= 0: +// dst = floor(src + 0.49999999999999994d) +// else: +// dst = floor(src + 0.5d) +void MacroAssembler::java_round_double(Register dst, + FloatRegister src, + Register tmp) { + block_comment("java_round_double: { "); + li(AT, StubRoutines::la::round_double_imm()); + + movfr2gr_d(tmp, src); + bstrpick_d(tmp, tmp, 63, 63); + slli_d(tmp, tmp, 3); + fldx_d(fscratch, AT, tmp); + fadd_d(fscratch, fscratch, src); + + ftintrm_l_d(fscratch, fscratch); + movfr2gr_d(dst, fscratch); + block_comment("} java_round_double"); +} + +void MacroAssembler::java_round_double_lsx(FloatRegister dst, + FloatRegister src, + FloatRegister vtemp1, + FloatRegister vtemp2) { + block_comment("java_round_double_lsx: { "); + li(AT, StubRoutines::la::round_double_imm()); + + vldrepl_d(vtemp2, AT, 1); // repl 0.5d + vslti_d(fscratch, src, 0); // masked add + vand_v(vtemp2, fscratch, vtemp2); + vfadd_d(dst, src, vtemp2); + + vldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d + vnor_v(fscratch, fscratch, fscratch); // rev mask + vand_v(vtemp1, fscratch, vtemp1); + vfadd_d(dst, dst, vtemp1); + + vftintrm_l_d(dst, dst); + block_comment("} java_round_double_lsx"); +} + +void MacroAssembler::java_round_double_lasx(FloatRegister dst, + FloatRegister src, + FloatRegister vtemp1, + FloatRegister vtemp2) { + block_comment("java_round_double_lasx: { "); + li(AT, StubRoutines::la::round_double_imm()); + + xvldrepl_d(vtemp2, AT, 1); // repl 0.5d + xvslti_d(fscratch, src, 0); // masked add + xvand_v(vtemp2, fscratch, vtemp2); + xvfadd_d(dst, src, vtemp2); + + xvldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d + xvnor_v(fscratch, fscratch, fscratch); // rev mask + xvand_v(vtemp1, fscratch, vtemp1); + xvfadd_d(dst, dst, vtemp1); + + xvftintrm_l_d(dst, dst); + block_comment("} java_round_double_lasx"); +} + // Code for BigInteger::mulAdd intrinsic // out = c_rarg0 // in = c_rarg1 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp index 06db5471b2a..25db54ac4b9 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp @@ -315,6 +315,18 @@ class MacroAssembler: public Assembler { void sign_extend_short(Register reg) { ext_w_h(reg, reg); } void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } + // java.lang.Math::round intrinsics + void java_round_float(Register dst, FloatRegister src, Register tmp); + void java_round_float_lsx(FloatRegister dst, FloatRegister src, + FloatRegister vtemp1, FloatRegister vtemp2); + void java_round_float_lasx(FloatRegister dst, FloatRegister src, + FloatRegister vtemp1, FloatRegister vtemp2); + void java_round_double(Register dst, FloatRegister src, Register tmp); + void java_round_double_lsx(FloatRegister dst, FloatRegister src, + FloatRegister vtemp1, FloatRegister vtemp2); + void java_round_double_lasx(FloatRegister dst, FloatRegister src, + FloatRegister vtemp1, FloatRegister vtemp2); + // allocation void tlab_allocate( Register obj, // result: pointer to object after successful allocation @@ -673,9 +685,11 @@ class MacroAssembler: public Assembler { Register tmp1, Register tmp2); // Code for java.lang.StringUTF16::compress intrinsic. - void char_array_compress(Register src, Register dst, Register len, - Register result, Register tmp1, - Register tmp2, Register tmp3); + void char_array_compress(Register src, Register dst, + Register len, Register result, + Register tmp1, Register tmp2, Register tmp3, + FloatRegister vtemp1, FloatRegister vtemp2, + FloatRegister vtemp3, FloatRegister vtemp4); // Code for java.lang.StringLatin1::inflate intrinsic. void byte_array_inflate(Register src, Register dst, Register len, @@ -687,7 +701,9 @@ class MacroAssembler: public Assembler { void encode_iso_array(Register src, Register dst, Register len, Register result, Register tmp1, Register tmp2, - Register tmp3, bool ascii); + Register tmp3, bool ascii, + FloatRegister vtemp1, FloatRegister vtemp2, + FloatRegister vtemp3, FloatRegister vtemp4); // Code for java.math.BigInteger::mulAdd intrinsic. void mul_add(Register out, Register in, Register offset, diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp index 943f01e916f..2614bbd6832 100644 --- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp @@ -61,6 +61,11 @@ class la { static address _jlong_fill; static address _arrayof_jlong_fill; + static julong _string_compress_index[]; + + static jfloat _round_float_imm[]; + static jdouble _round_double_imm[]; + // begin trigonometric tables block. See comments in .cpp file static juint _npio2_hw[]; static jdouble _two_over_pi[]; @@ -96,6 +101,18 @@ class la { static address arrayof_jlong_fill() { return _arrayof_jlong_fill; } + + static address string_compress_index() { + return (address) _string_compress_index; + } + + static address round_float_imm() { + return (address) _round_float_imm; + } + + static address round_double_imm() { + return (address) _round_double_imm; + } }; #endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp index 281cc99115b..0986b40627a 100644 --- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp @@ -151,6 +151,10 @@ ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 }; +ATTRIBUTE_ALIGNED(128) julong StubRoutines::la::_string_compress_index[] = { + 0x0e0c0a0806040200UL, 0x1e1c1a1816141210UL // 128-bit shuffle index +}; + // Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. // Used in cases of very large argument. 396 hex digits is enough to support // required precision. @@ -182,3 +186,13 @@ ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { 2.73370053816464559624e-44, // 0x36E3822280000000 2.16741683877804819444e-51, // 0x3569F31D00000000 }; + +ATTRIBUTE_ALIGNED(64) jfloat StubRoutines::la::_round_float_imm[] = { + 0.49999997f, // round positive + 0.5f, // round negative +}; + +ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_round_double_imm[] = { + 0.49999999999999994d, // round positive + 0.5d, // round negative +}; diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp index fc459586dda..86b4241b20a 100644 --- a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp @@ -3476,7 +3476,6 @@ void TemplateTable::invokeinterface(int byte_no) { __ bind(no_such_method); // throw exception - __ pop(Rmethod); // pop return address (pushed by prepare_invoke) __ restore_bcp(); __ restore_locals(); // Pass arguments for generating a verbose error message. @@ -3490,7 +3489,6 @@ void TemplateTable::invokeinterface(int byte_no) { __ bind(no_such_interface); // throw exception - __ pop(Rmethod); // pop return address (pushed by prepare_invoke) __ restore_bcp(); __ restore_locals(); // Pass arguments for generating a verbose error message. diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 12a0e953771..4acc5848471 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2023, These + * modifications are Copyright (c) 2023, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/vmSymbols.hpp" #include "gc/shared/oopStorage.hpp" @@ -365,6 +371,9 @@ bool ObjectMonitor::enter(JavaThread* current) { } assert(owner_raw() != current, "invariant"); + // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr. + // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion. + LOONGARCH64_ONLY(DEBUG_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)) assert(_succ != current, "invariant"); assert(!SafepointSynchronize::is_at_safepoint(), "invariant"); assert(current->thread_state() != _thread_blocked, "invariant"); @@ -729,6 +738,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { } // The Spin failed -- Enqueue and park the thread ... + LOONGARCH64_ONLY(DEBUG_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)) assert(_succ != current, "invariant"); assert(owner_raw() != current, "invariant"); assert(_Responsible != current, "invariant");