Skip to content

Commit

Permalink
Allow virtual threads to unmount when blocked on Object.wait()
Browse files Browse the repository at this point in the history
  • Loading branch information
pchilano committed Oct 17, 2024
1 parent 54813cd commit 21c3353
Show file tree
Hide file tree
Showing 36 changed files with 817 additions and 119 deletions.
15 changes: 11 additions & 4 deletions src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
// AbstractInterpreter::layout_activation

// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)

assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
}
Expand Down Expand Up @@ -219,7 +225,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// If caller is interpreted it already made room for the callee arguments
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
if ((intptr_t)fp % frame::frame_alignment != 0) {
Expand Down Expand Up @@ -258,7 +263,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// we need to recreate a "real" frame pointer, pointing into the stack
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
} else {
fp = FKind::stub
fp = FKind::stub || FKind::native
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
}
Expand Down Expand Up @@ -329,7 +334,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");

// Make sure that extended_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
}

#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
3 changes: 2 additions & 1 deletion src/hotspot/cpu/aarch64/frame_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
sender_sp_offset = 2,

// Interpreter frames
interpreter_frame_oop_temp_offset = 3, // for native calls only
interpreter_frame_result_handler_offset = 3, // for native calls only
interpreter_frame_oop_temp_offset = 2, // for native calls only

interpreter_frame_sender_sp_offset = -1,
// outgoing sp before a call to an invoked method
Expand Down
32 changes: 24 additions & 8 deletions src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1740,11 +1740,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
}

// Change state to native (we save the return address in the thread, since it might not
// be pushed on the stack when we do a stack traversal).
// We use the same pc/oopMap repeatedly when we call out
// be pushed on the stack when we do a stack traversal). It is enough that the pc()
// points into the right code segment. It does not have to be the correct return pc.
// We use the same pc/oopMap repeatedly when we call out.

Label native_return;
__ set_last_Java_frame(sp, noreg, native_return, rscratch1);
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
// For convenience we use the pc we want to resume to in case of preemption on Object.wait.
__ set_last_Java_frame(sp, noreg, native_return, rscratch1);
} else {
intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);

__ set_last_Java_frame(sp, noreg, __ pc(), rscratch1);
}

Label dtrace_method_entry, dtrace_method_entry_done;
if (DTraceMethodProbes) {
Expand Down Expand Up @@ -1847,11 +1856,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

__ rt_call(native_func);

__ bind(native_return);

intptr_t return_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(return_pc - start, map);

// Verify or restore cpu control state after JNI call
__ restore_cpu_control_state_after_jni(rscratch1, rscratch2);

Expand Down Expand Up @@ -1910,6 +1914,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ stlrw(rscratch1, rscratch2);
__ bind(after_transition);

if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
// Check preemption for Object.wait()
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ cbz(rscratch1, native_return);
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ br(rscratch1);
__ bind(native_return);

intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);
}

Label reguard;
Label reguard_done;
__ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
f.interpreted_frame_oop_map(&mask);
return mask.num_oops()
+ 1 // for the mirror oop
+ (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
+ pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
(intptr_t*)f.interpreter_frame_monitor_end())/BasicObjectLock::size();
}
Expand Down
31 changes: 27 additions & 4 deletions src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// result handler is in r0
// set result handler
__ mov(result_handler, r0);
__ str(r0, Address(rfp, frame::interpreter_frame_result_handler_offset * wordSize));

// pass mirror handle if static call
{
Label L;
Expand Down Expand Up @@ -1383,9 +1385,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// pass JNIEnv
__ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset()));

// Set the last Java PC in the frame anchor to be the return address from
// the call to the native method: this will allow the debugger to
// generate an accurate stack trace.
// It is enough that the pc() points into the right code
// segment. It does not have to be the correct return pc.
// For convenience we use the pc we want to resume to in
// case of preemption on Object.wait.
Label native_return;
__ set_last_Java_frame(esp, rfp, native_return, rscratch1);

Expand All @@ -1406,9 +1409,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);

__ push_cont_fastpath();

// Call the native method.
__ blr(r10);
__ bind(native_return);

__ pop_cont_fastpath();

__ get_method(rmethod);
// result potentially in r0 or v0

Expand Down Expand Up @@ -1466,6 +1473,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);

if (LockingMode != LM_LEGACY) {
// Check preemption for Object.wait()
Label not_preempted;
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ cbz(rscratch1, not_preempted);
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ br(rscratch1);
__ bind(native_return);
__ restore_after_resume(true /* is_native */);
__ bind(not_preempted);
} else {
// any pc will do so just use this one for LM_LEGACY to keep code together.
__ bind(native_return);
}

// reset_last_Java_frame
__ reset_last_Java_frame(true);

Expand All @@ -1484,6 +1506,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
{
Label no_oop;
__ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
__ ldr(result_handler, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize));
__ cmp(t, result_handler);
__ br(Assembler::NE, no_oop);
// Unbox oop result, e.g. JNIHandles::resolve result.
Expand Down
16 changes: 12 additions & 4 deletions src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
// AbstractInterpreter::layout_activation

// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)

assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
#ifdef ASSERT
if (f.interpreter_frame_method()->max_locals() > 0) {
Expand Down Expand Up @@ -222,7 +228,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// If caller is interpreted it already made room for the callee arguments
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
if ((intptr_t)fp % frame::frame_alignment != 0) {
Expand Down Expand Up @@ -261,7 +266,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// we need to recreate a "real" frame pointer, pointing into the stack
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
} else {
fp = FKind::stub
fp = FKind::stub || FKind::native
// fp always points to the address above the pushed return pc. We need correct address.
? frame_sp + fsize - frame::sender_sp_offset
// we need to re-read fp because it may be an oop and we might have fixed the frame.
Expand Down Expand Up @@ -334,8 +339,11 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
// Make sure that monitor_block_top is still relativized.
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");

DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()

// Make sure that extended_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
}

#endif // CPU_RISCV_CONTINUATIONFREEZETHAW_RISCV_INLINE_HPP
3 changes: 2 additions & 1 deletion src/hotspot/cpu/riscv/frame_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
sender_sp_offset = 0,

// Interpreter frames
interpreter_frame_oop_temp_offset = 1, // for native calls only
interpreter_frame_result_handler_offset = 1, // for native calls only
interpreter_frame_oop_temp_offset = 0, // for native calls only

interpreter_frame_sender_sp_offset = -3,
// outgoing sp before a call to an invoked method
Expand Down
32 changes: 24 additions & 8 deletions src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1639,11 +1639,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
}

// Change state to native (we save the return address in the thread, since it might not
// be pushed on the stack when we do a stack traversal).
// We use the same pc/oopMap repeatedly when we call out
// be pushed on the stack when we do a stack traversal). It is enough that the pc()
// points into the right code segment. It does not have to be the correct return pc.
// We use the same pc/oopMap repeatedly when we call out.

Label native_return;
__ set_last_Java_frame(sp, noreg, native_return, t0);
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
// For convenience we use the pc we want to resume to in case of preemption on Object.wait.
__ set_last_Java_frame(sp, noreg, native_return, t0);
} else {
intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);

__ set_last_Java_frame(sp, noreg, __ pc(), t0);
}

Label dtrace_method_entry, dtrace_method_entry_done;
if (DTraceMethodProbes) {
Expand Down Expand Up @@ -1745,11 +1754,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

__ rt_call(native_func);

__ bind(native_return);

intptr_t return_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(return_pc - start, map);

// Verify or restore cpu control state after JNI call
__ restore_cpu_control_state_after_jni(t0);

Expand Down Expand Up @@ -1800,6 +1804,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ sw(t0, Address(t1));
__ bind(after_transition);

if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
// Check preemption for Object.wait()
__ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
__ beqz(t0, native_return);
__ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
__ jr(t0);
__ bind(native_return);

intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);
}

Label reguard;
Label reguard_done;
__ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
f.interpreted_frame_oop_map(&mask);
return mask.num_oops()
+ 1 // for the mirror oop
+ (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
+ pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
(intptr_t*)f.interpreter_frame_monitor_end()) / BasicObjectLock::size();
}
Expand Down
26 changes: 25 additions & 1 deletion src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// result handler is in x10
// set result handler
__ mv(result_handler, x10);
__ sd(x10, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));

// pass mirror handle if static call
{
Label L;
Expand Down Expand Up @@ -1163,6 +1165,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {

// It is enough that the pc() points into the right code
// segment. It does not have to be the correct return pc.
// For convenience we use the pc we want to resume to in
// case of preemption on Object.wait.
Label native_return;
__ set_last_Java_frame(esp, fp, native_return, x30);

Expand All @@ -1184,9 +1188,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
__ sw(t0, Address(t1));

__ push_cont_fastpath();

// Call the native method.
__ jalr(x28);
__ bind(native_return);

__ pop_cont_fastpath();

__ get_method(xmethod);
// result potentially in x10 or f10

Expand Down Expand Up @@ -1252,6 +1260,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ mv(t0, _thread_in_Java);
__ sw(t0, Address(xthread, JavaThread::thread_state_offset()));

if (LockingMode != LM_LEGACY) {
// Check preemption for Object.wait()
Label not_preempted;
__ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
__ beqz(t0, not_preempted);
__ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
__ jr(t0);
__ bind(native_return);
__ restore_after_resume(true /* is_native */);
__ bind(not_preempted);
} else {
// any pc will do so just use this one for LM_LEGACY to keep code together.
__ bind(native_return);
}

// reset_last_Java_frame
__ reset_last_Java_frame(true);

Expand All @@ -1270,6 +1293,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
{
Label no_oop;
__ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
__ ld(result_handler, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));
__ bne(t, result_handler, no_oop);
// Unbox oop result, e.g. JNIHandles::resolve result.
__ pop(ltos);
Expand Down
Loading

0 comments on commit 21c3353

Please sign in to comment.