From 21c335399c4c1f9a9e577c4e1cfcf7447be23244 Mon Sep 17 00:00:00 2001
From: Patricio Chilano Mateo <patricio.chilano.mateo@oracle.com>
Date: Wed, 16 Oct 2024 11:10:31 -0400
Subject: [PATCH] Allow virtual threads to unmount when blocked on
 Object.wait()

---
 .../continuationFreezeThaw_aarch64.inline.hpp |  15 +-
 src/hotspot/cpu/aarch64/frame_aarch64.hpp     |   3 +-
 .../cpu/aarch64/sharedRuntime_aarch64.cpp     |  32 ++-
 .../stackChunkFrameStream_aarch64.inline.hpp  |   1 +
 .../templateInterpreterGenerator_aarch64.cpp  |  31 ++-
 .../continuationFreezeThaw_riscv.inline.hpp   |  16 +-
 src/hotspot/cpu/riscv/frame_riscv.hpp         |   3 +-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  32 ++-
 .../stackChunkFrameStream_riscv.inline.hpp    |   1 +
 .../templateInterpreterGenerator_riscv.cpp    |  26 ++-
 .../x86/continuationFreezeThaw_x86.inline.hpp |  11 +-
 src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp  |  26 ++-
 .../x86/stackChunkFrameStream_x86.inline.hpp  |   1 +
 .../x86/templateInterpreterGenerator_x86.cpp  |  31 ++-
 src/hotspot/share/classfile/javaClasses.cpp   |  27 ++-
 src/hotspot/share/classfile/javaClasses.hpp   |  10 +
 src/hotspot/share/code/nmethod.cpp            |   4 +-
 src/hotspot/share/interpreter/oopMapCache.cpp |   6 +
 src/hotspot/share/oops/method.cpp             |   4 +
 src/hotspot/share/oops/method.hpp             |   3 +
 src/hotspot/share/oops/stackChunkOop.hpp      |   1 +
 .../share/oops/stackChunkOop.inline.hpp       |  10 +-
 src/hotspot/share/prims/jvmtiEnvBase.cpp      |  20 +-
 src/hotspot/share/prims/jvmtiExport.cpp       |  15 ++
 src/hotspot/share/prims/jvmtiExport.hpp       |   1 +
 src/hotspot/share/prims/jvmtiTagMap.cpp       |   9 +-
 src/hotspot/share/runtime/continuation.hpp    |   5 +
 .../share/runtime/continuationFreezeThaw.cpp  | 177 +++++++++++++---
 src/hotspot/share/runtime/deoptimization.cpp  |   4 +
 src/hotspot/share/runtime/javaThread.cpp      |   2 +
 src/hotspot/share/runtime/javaThread.hpp      |  19 ++
 src/hotspot/share/runtime/objectMonitor.cpp   | 194 ++++++++++++++++--
 src/hotspot/share/runtime/objectMonitor.hpp   |  19 +-
 src/hotspot/share/runtime/vframe.inline.hpp   |   7 +
 .../share/classes/java/lang/Object.java       |  28 +--
 .../classes/java/lang/VirtualThread.java      | 142 ++++++++++++-
 36 files changed, 817 insertions(+), 119 deletions(-)

diff --git a/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
index 3c5df0482c995..742d19541331f 100644
--- a/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
@@ -155,10 +155,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
   // extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
   // AbstractInterpreter::layout_activation
 
+  // The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
+  // after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
+  // We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
+  DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
+  DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)
+
   assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
   assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
   assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
-  assert(hf.unextended_sp() >  (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
+  assert(hf.unextended_sp() + extra_space >  (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
   assert(hf.fp()            >  (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
   assert(hf.fp()            <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
 }
@@ -219,7 +225,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
     // If caller is interpreted it already made room for the callee arguments
     int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
     const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
-    const int locals = hf.interpreter_frame_method()->max_locals();
     intptr_t* frame_sp = caller.unextended_sp() - fsize;
     intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
     if ((intptr_t)fp % frame::frame_alignment != 0) {
@@ -258,7 +263,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
       // we need to recreate a "real" frame pointer, pointing into the stack
       fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
     } else {
-      fp = FKind::stub
+      fp = FKind::stub || FKind::native
         ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
         : *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
     }
@@ -329,7 +334,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
   assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
 
   // Make sure that extended_sp is kept relativized.
-  assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
+  DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
+  DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
+  assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
 }
 
 #endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
index 401e2c6ae97ee..da020b4234d10 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
@@ -73,7 +73,8 @@
     sender_sp_offset                                 =  2,
 
     // Interpreter frames
-    interpreter_frame_oop_temp_offset                =  3, // for native calls only
+    interpreter_frame_result_handler_offset          =  3, // for native calls only
+    interpreter_frame_oop_temp_offset                =  2, // for native calls only
 
     interpreter_frame_sender_sp_offset               = -1,
     // outgoing sp before a call to an invoked method
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 4cb18f795437d..0b538c9cd3a20 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -1740,11 +1740,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   }
 
   // Change state to native (we save the return address in the thread, since it might not
-  // be pushed on the stack when we do a stack traversal).
-  // We use the same pc/oopMap repeatedly when we call out
+  // be pushed on the stack when we do a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out.
 
   Label native_return;
-  __ set_last_Java_frame(sp, noreg, native_return, rscratch1);
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
+    __ set_last_Java_frame(sp, noreg, native_return, rscratch1);
+  } else {
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ set_last_Java_frame(sp, noreg, __ pc(), rscratch1);
+  }
 
   Label dtrace_method_entry, dtrace_method_entry_done;
   if (DTraceMethodProbes) {
@@ -1847,11 +1856,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   __ rt_call(native_func);
 
-  __ bind(native_return);
-
-  intptr_t return_pc = (intptr_t) __ pc();
-  oop_maps->add_gc_map(return_pc - start, map);
-
   // Verify or restore cpu control state after JNI call
   __ restore_cpu_control_state_after_jni(rscratch1, rscratch2);
 
@@ -1910,6 +1914,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ stlrw(rscratch1, rscratch2);
   __ bind(after_transition);
 
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // Check preemption for Object.wait()
+    __ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
+    __ cbz(rscratch1, native_return);
+    __ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
+    __ br(rscratch1);
+    __ bind(native_return);
+
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
+  }
+
   Label reguard;
   Label reguard_done;
   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
diff --git a/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp
index 7c5cf63e382c8..8a221f1377268 100644
--- a/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp
@@ -116,6 +116,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
   f.interpreted_frame_oop_map(&mask);
   return  mask.num_oops()
         + 1 // for the mirror oop
+        + (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
         + pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
               (intptr_t*)f.interpreter_frame_monitor_end())/BasicObjectLock::size();
 }
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 56a7961606acc..794bb73bf7745 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -1348,6 +1348,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // result handler is in r0
   // set result handler
   __ mov(result_handler, r0);
+  __ str(r0, Address(rfp, frame::interpreter_frame_result_handler_offset * wordSize));
+
   // pass mirror handle if static call
   {
     Label L;
@@ -1383,9 +1385,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // pass JNIEnv
   __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset()));
 
-  // Set the last Java PC in the frame anchor to be the return address from
-  // the call to the native method: this will allow the debugger to
-  // generate an accurate stack trace.
+  // It is enough that the pc() points into the right code
+  // segment. It does not have to be the correct return pc.
+  // For convenience we use the pc we want to resume to in
+  // case of preemption on Object.wait.
   Label native_return;
   __ set_last_Java_frame(esp, rfp, native_return, rscratch1);
 
@@ -1406,9 +1409,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
   __ stlrw(rscratch1, rscratch2);
 
+  __ push_cont_fastpath();
+
   // Call the native method.
   __ blr(r10);
-  __ bind(native_return);
+
+  __ pop_cont_fastpath();
+
   __ get_method(rmethod);
   // result potentially in r0 or v0
 
@@ -1466,6 +1473,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
   __ stlrw(rscratch1, rscratch2);
 
+  if (LockingMode != LM_LEGACY) {
+    // Check preemption for Object.wait()
+    Label not_preempted;
+    __ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
+    __ cbz(rscratch1, not_preempted);
+    __ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
+    __ br(rscratch1);
+    __ bind(native_return);
+    __ restore_after_resume(true /* is_native */);
+    __ bind(not_preempted);
+  } else {
+    // any pc will do so just use this one for LM_LEGACY to keep code together.
+    __ bind(native_return);
+  }
+
   // reset_last_Java_frame
   __ reset_last_Java_frame(true);
 
@@ -1484,6 +1506,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   {
     Label no_oop;
     __ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
+    __ ldr(result_handler, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize));
     __ cmp(t, result_handler);
     __ br(Assembler::NE, no_oop);
     // Unbox oop result, e.g. JNIHandles::resolve result.
diff --git a/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp b/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp
index a96d9ccf01508..75041e3d93e40 100644
--- a/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp
@@ -153,10 +153,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
   // extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
   // AbstractInterpreter::layout_activation
 
+  // The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
+  // after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
+  // We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
+  DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
+  DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)
+
   assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
   assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
   assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
-  assert(hf.unextended_sp() >  (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
+  assert(hf.unextended_sp() + extra_space >  (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
   assert(hf.fp()            >  (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
 #ifdef ASSERT
   if (f.interpreter_frame_method()->max_locals() > 0) {
@@ -222,7 +228,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
     // If caller is interpreted it already made room for the callee arguments
     int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
     const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
-    const int locals = hf.interpreter_frame_method()->max_locals();
     intptr_t* frame_sp = caller.unextended_sp() - fsize;
     intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
     if ((intptr_t)fp % frame::frame_alignment != 0) {
@@ -261,7 +266,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
       // we need to recreate a "real" frame pointer, pointing into the stack
       fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
     } else {
-      fp = FKind::stub
+      fp = FKind::stub || FKind::native
         // fp always points to the address above the pushed return pc. We need correct address.
         ? frame_sp + fsize - frame::sender_sp_offset
         // we need to re-read fp because it may be an oop and we might have fixed the frame.
@@ -334,8 +339,11 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
   // Make sure that monitor_block_top is still relativized.
   assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
 
+  DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
+  DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
+
   // Make sure that extended_sp is kept relativized.
-  assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
+  assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
 }
 
 #endif // CPU_RISCV_CONTINUATIONFREEZETHAW_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
index 3692c99193e1a..b4540c45ab8f5 100644
--- a/src/hotspot/cpu/riscv/frame_riscv.hpp
+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
@@ -111,7 +111,8 @@
     sender_sp_offset                                 =  0,
 
     // Interpreter frames
-    interpreter_frame_oop_temp_offset                =  1, // for native calls only
+    interpreter_frame_result_handler_offset          =  1, // for native calls only
+    interpreter_frame_oop_temp_offset                =  0, // for native calls only
 
     interpreter_frame_sender_sp_offset               = -3,
     // outgoing sp before a call to an invoked method
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index f5cbd4f0be09f..004e8878226fe 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -1639,11 +1639,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   }
 
   // Change state to native (we save the return address in the thread, since it might not
-  // be pushed on the stack when we do a stack traversal).
-  // We use the same pc/oopMap repeatedly when we call out
+  // be pushed on the stack when we do a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out.
 
   Label native_return;
-  __ set_last_Java_frame(sp, noreg, native_return, t0);
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
+    __ set_last_Java_frame(sp, noreg, native_return, t0);
+  } else {
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ set_last_Java_frame(sp, noreg, __ pc(), t0);
+  }
 
   Label dtrace_method_entry, dtrace_method_entry_done;
   if (DTraceMethodProbes) {
@@ -1745,11 +1754,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   __ rt_call(native_func);
 
-  __ bind(native_return);
-
-  intptr_t return_pc = (intptr_t) __ pc();
-  oop_maps->add_gc_map(return_pc - start, map);
-
   // Verify or restore cpu control state after JNI call
   __ restore_cpu_control_state_after_jni(t0);
 
@@ -1800,6 +1804,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ sw(t0, Address(t1));
   __ bind(after_transition);
 
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // Check preemption for Object.wait()
+    __ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
+    __ beqz(t0, native_return);
+    __ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
+    __ jr(t0);
+    __ bind(native_return);
+
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
+  }
+
   Label reguard;
   Label reguard_done;
   __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
diff --git a/src/hotspot/cpu/riscv/stackChunkFrameStream_riscv.inline.hpp b/src/hotspot/cpu/riscv/stackChunkFrameStream_riscv.inline.hpp
index e226c7b7a53ab..fa8a8fb47f022 100644
--- a/src/hotspot/cpu/riscv/stackChunkFrameStream_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/stackChunkFrameStream_riscv.inline.hpp
@@ -114,6 +114,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
   f.interpreted_frame_oop_map(&mask);
   return mask.num_oops()
         + 1 // for the mirror oop
+        + (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
         + pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
               (intptr_t*)f.interpreter_frame_monitor_end()) / BasicObjectLock::size();
 }
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
index d9919e2cad5a8..538cbad633de5 100644
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -1125,6 +1125,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // result handler is in x10
   // set result handler
   __ mv(result_handler, x10);
+  __ sd(x10, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));
+
   // pass mirror handle if static call
   {
     Label L;
@@ -1163,6 +1165,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 
   // It is enough that the pc() points into the right code
   // segment. It does not have to be the correct return pc.
+  // For convenience we use the pc we want to resume to in
+  // case of preemption on Object.wait.
   Label native_return;
   __ set_last_Java_frame(esp, fp, native_return, x30);
 
@@ -1184,9 +1188,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
   __ sw(t0, Address(t1));
 
+  __ push_cont_fastpath();
+
   // Call the native method.
   __ jalr(x28);
-  __ bind(native_return);
+
+  __ pop_cont_fastpath();
+
   __ get_method(xmethod);
   // result potentially in x10 or f10
 
@@ -1252,6 +1260,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ mv(t0, _thread_in_Java);
   __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 
+  if (LockingMode != LM_LEGACY) {
+    // Check preemption for Object.wait()
+    Label not_preempted;
+    __ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
+    __ beqz(t0, not_preempted);
+    __ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
+    __ jr(t0);
+    __ bind(native_return);
+    __ restore_after_resume(true /* is_native */);
+    __ bind(not_preempted);
+  } else {
+    // any pc will do so just use this one for LM_LEGACY to keep code together.
+    __ bind(native_return);
+  }
+
   // reset_last_Java_frame
   __ reset_last_Java_frame(true);
 
@@ -1270,6 +1293,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   {
     Label no_oop;
     __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
+    __ ld(result_handler, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));
     __ bne(t, result_handler, no_oop);
     // Unbox oop result, e.g. JNIHandles::resolve result.
     __ pop(ltos);
diff --git a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp
index 4769f09f8b6ce..7027b958673cc 100644
--- a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp
+++ b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp
@@ -142,7 +142,9 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
   assert((intptr_t*)hf.at_relative(frame::interpreter_frame_last_sp_offset) == hf.unextended_sp(), "");
 
   // Make sure that locals is already relativized.
-  assert((*hf.addr_at(frame::interpreter_frame_locals_offset) == frame::sender_sp_offset + f.interpreter_frame_method()->max_locals() - 1), "");
+  DEBUG_ONLY(Method* m = f.interpreter_frame_method();)
+  DEBUG_ONLY(int max_locals = !m->is_native() ? m->max_locals() : m->size_of_parameters() + 2;)
+  assert((*hf.addr_at(frame::interpreter_frame_locals_offset) == frame::sender_sp_offset + max_locals - 1), "");
 
   // Make sure that monitor_block_top is already relativized.
   assert(hf.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
@@ -213,7 +215,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
     // If caller is interpreted it already made room for the callee arguments
     int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
     const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
-    const int locals = hf.interpreter_frame_method()->max_locals();
     intptr_t* frame_sp = caller.unextended_sp() - fsize;
     intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
     DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);)
@@ -223,7 +224,9 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
     // we need to set the locals so that the caller of new_stack_frame() can call
     // ContinuationHelper::InterpretedFrame::frame_bottom
     intptr_t locals_offset = *hf.addr_at(frame::interpreter_frame_locals_offset);
-    assert((int)locals_offset == frame::sender_sp_offset + locals - 1, "");
+    DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
+    DEBUG_ONLY(const int max_locals = !m->is_native() ? m->max_locals() : m->size_of_parameters() + 2;)
+    assert((int)locals_offset == frame::sender_sp_offset + max_locals - 1, "");
     // copy relativized locals from the heap frame
     *f.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
     return f;
@@ -248,7 +251,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
       // we need to recreate a "real" frame pointer, pointing into the stack
       fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
     } else {
-      fp = FKind::stub
+      fp = FKind::stub || FKind::native
         ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
         : *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
     }
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 91e269463c846..ef3b1585c2685 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -2186,10 +2186,16 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   // points into the right code segment. It does not have to be the correct return pc.
   // We use the same pc/oopMap repeatedly when we call out
 
-  intptr_t the_pc = (intptr_t) __ pc();
-  oop_maps->add_gc_map(the_pc - start, map);
+  Label native_return;
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
+    __ set_last_Java_frame(rsp, noreg, native_return, rscratch1);
+  } else {
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
 
-  __ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch1);
+    __ set_last_Java_frame(rsp, noreg, __ pc(), rscratch1);
+  }
 
   // We have all of the arguments setup at this point. We must not touch any register
   // argument registers at this point (what if we save/restore them there are no oop?
@@ -2373,6 +2379,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
   __ bind(after_transition);
 
+  if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
+    // Check preemption for Object.wait()
+    __ movptr(rscratch1, Address(r15_thread, JavaThread::preempt_alternate_return_offset()));
+    __ cmpptr(rscratch1, NULL_WORD);
+    __ jccb(Assembler::equal, native_return);
+    __ movptr(Address(r15_thread, JavaThread::preempt_alternate_return_offset()), NULL_WORD);
+    __ jmp(rscratch1);
+    __ bind(native_return);
+
+    intptr_t the_pc = (intptr_t) __ pc();
+    oop_maps->add_gc_map(the_pc - start, map);
+  }
+
+
   Label reguard;
   Label reguard_done;
   __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled);
diff --git a/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp b/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp
index d69facadbeafd..6289b903ab1e4 100644
--- a/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp
+++ b/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp
@@ -114,6 +114,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
   f.interpreted_frame_oop_map(&mask);
   return  mask.num_oops()
         + 1 // for the mirror oop
+        + (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
         + pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
               (intptr_t*)f.interpreter_frame_monitor_end())/BasicObjectLock::size();
 }
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
index 2d45cdd2c320f..beb80d83e2ecd 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
@@ -1049,7 +1049,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 
    // It is enough that the pc() points into the right code
    // segment. It does not have to be the correct return pc.
-   __ set_last_Java_frame(rsp, rbp, (address) __ pc(), rscratch1);
+   // For convenience we use the pc we want to resume to in
+   // case of preemption on Object.wait.
+   Label native_return;
+   __ set_last_Java_frame(rsp, rbp, native_return, rscratch1);
 #endif // _LP64
 
   // change thread state
@@ -1069,11 +1072,15 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ movl(Address(thread, JavaThread::thread_state_offset()),
           _thread_in_native);
 
+  __ push_cont_fastpath();
+
   // Call the native method.
   __ call(rax);
   // 32: result potentially in rdx:rax or ST0
   // 64: result potentially in rax or xmm0
 
+  __ pop_cont_fastpath();
+
   // Verify or restore cpu control state after JNI call
   __ restore_cpu_control_state_after_jni(rscratch1);
 
@@ -1097,10 +1104,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
     Label push_double;
     ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT));
     ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE));
-    __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
+    __ cmpptr(Address(rbp, (frame::interpreter_frame_result_handler_offset)*wordSize),
               float_handler.addr(), noreg);
     __ jcc(Assembler::equal, push_double);
-    __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize),
+    __ cmpptr(Address(rbp, (frame::interpreter_frame_result_handler_offset)*wordSize),
               double_handler.addr(), noreg);
     __ jcc(Assembler::notEqual, L);
     __ bind(push_double);
@@ -1170,6 +1177,24 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // change thread state
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
 
+#ifdef _LP64
+  if (LockingMode != LM_LEGACY) {
+    // Check preemption for Object.wait()
+    Label not_preempted;
+    __ movptr(rscratch1, Address(r15_thread, JavaThread::preempt_alternate_return_offset()));
+    __ cmpptr(rscratch1, NULL_WORD);
+    __ jccb(Assembler::equal, not_preempted);
+    __ movptr(Address(r15_thread, JavaThread::preempt_alternate_return_offset()), NULL_WORD);
+    __ jmp(rscratch1);
+    __ bind(native_return);
+    __ restore_after_resume(true /* is_native */);
+    __ bind(not_preempted);
+  } else {
+    // any pc will do so just use this one for LM_LEGACY to keep code together.
+    __ bind(native_return);
+  }
+#endif // _LP64
+
   // reset_last_Java_frame
   __ reset_last_Java_frame(thread, true);
 
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index 9c8ae7278af7a..8d59f1a7341f0 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -2024,6 +2024,8 @@ int java_lang_VirtualThread::_continuation_offset;
 int java_lang_VirtualThread::_state_offset;
 int java_lang_VirtualThread::_next_offset;
 int java_lang_VirtualThread::_onWaitingList_offset;
+int java_lang_VirtualThread::_notified_offset;
+int java_lang_VirtualThread::_waitTimeout_offset;
 
 #define VTHREAD_FIELDS_DO(macro) \
   macro(static_vthread_scope_offset,       k, "VTHREAD_SCOPE",      continuationscope_signature, true);  \
@@ -2031,7 +2033,10 @@ int java_lang_VirtualThread::_onWaitingList_offset;
   macro(_continuation_offset,              k, "cont",               continuation_signature,      false); \
   macro(_state_offset,                     k, "state",              int_signature,               false); \
   macro(_next_offset,                      k, "next",               vthread_signature,           false); \
-  macro(_onWaitingList_offset,             k, "onWaitingList",      bool_signature,              false);
+  macro(_onWaitingList_offset,             k, "onWaitingList",      bool_signature,              false); \
+  macro(_notified_offset,                  k, "notified",           bool_signature,              false); \
+  macro(_waitTimeout_offset,               k, "waitTimeout",        long_signature,              false);
+
 
 void java_lang_VirtualThread::compute_offsets() {
   InstanceKlass* k = vmClasses::VirtualThread_klass();
@@ -2090,6 +2095,18 @@ bool java_lang_VirtualThread::set_onWaitingList(oop vthread, OopHandle& list_hea
   return false; // already on waiting list
 }
 
+void java_lang_VirtualThread::set_notified(oop vthread, jboolean value) {
+  vthread->bool_field_put_volatile(_notified_offset, value);
+}
+
+jlong java_lang_VirtualThread::waitTimeout(oop vthread) {
+  return vthread->long_field(_waitTimeout_offset);
+}
+
+void java_lang_VirtualThread::set_waitTimeout(oop vthread, jlong value) {
+  vthread->long_field_put(_waitTimeout_offset, value);
+}
+
 JavaThreadStatus java_lang_VirtualThread::map_state_to_thread_status(int state) {
   JavaThreadStatus status = JavaThreadStatus::NEW;
   switch (state & ~SUSPENDED) {
@@ -2104,6 +2121,8 @@ JavaThreadStatus java_lang_VirtualThread::map_state_to_thread_status(int state)
     case YIELDING:
     case YIELDED:
     case UNBLOCKED:
+    case WAITING:
+    case TIMED_WAITING:
       status = JavaThreadStatus::RUNNABLE;
       break;
     case PARKED:
@@ -2118,6 +2137,12 @@ JavaThreadStatus java_lang_VirtualThread::map_state_to_thread_status(int state)
     case BLOCKED:
       status = JavaThreadStatus::BLOCKED_ON_MONITOR_ENTER;
       break;
+    case WAIT:
+      status = JavaThreadStatus::IN_OBJECT_WAIT;
+      break;
+    case TIMED_WAIT:
+      status = JavaThreadStatus::IN_OBJECT_WAIT_TIMED;
+      break;
     case TERMINATED:
       status = JavaThreadStatus::TERMINATED;
       break;
diff --git a/src/hotspot/share/classfile/javaClasses.hpp b/src/hotspot/share/classfile/javaClasses.hpp
index 49e7567d38c8d..bf412e181bbb7 100644
--- a/src/hotspot/share/classfile/javaClasses.hpp
+++ b/src/hotspot/share/classfile/javaClasses.hpp
@@ -532,6 +532,9 @@ class java_lang_VirtualThread : AllStatic {
   static int _state_offset;
   static int _next_offset;
   static int _onWaitingList_offset;
+  static int _notified_offset;
+  static int _recheckInterval_offset;
+  static int _waitTimeout_offset;
   JFR_ONLY(static int _jfr_epoch_offset;)
  public:
   enum {
@@ -550,6 +553,10 @@ class java_lang_VirtualThread : AllStatic {
     BLOCKING      = 12,
     BLOCKED       = 13,
     UNBLOCKED     = 14,
+    WAITING       = 15,
+    WAIT          = 16,  // waiting in Object.wait
+    TIMED_WAITING = 17,
+    TIMED_WAIT    = 18,  // waiting in timed-Object.wait
     TERMINATED    = 99,
 
     // additional state bits
@@ -574,6 +581,9 @@ class java_lang_VirtualThread : AllStatic {
   static oop next(oop vthread);
   static void set_next(oop vthread, oop next_vthread);
   static bool set_onWaitingList(oop vthread, OopHandle& list_head);
+  static jlong waitTimeout(oop vthread);
+  static void set_waitTimeout(oop vthread, jlong value);
+  static void set_notified(oop vthread, jboolean value);
   static bool is_preempted(oop vthread);
   static JavaThreadStatus map_state_to_thread_status(int state);
 };
diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
index 7fb72997749dc..bb8f3ec1a9309 100644
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@@ -708,7 +708,7 @@ void nmethod::preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map
 
   // handle the case of an anchor explicitly set in continuation code that doesn't have a callee
   JavaThread* thread = reg_map->thread();
-  if (thread->has_last_Java_frame() && fr.sp() == thread->last_Java_sp()) {
+  if ((thread->has_last_Java_frame() && fr.sp() == thread->last_Java_sp()) JVMTI_ONLY(|| (method()->is_continuation_enter_intrinsic() && thread->on_monitor_waited_event()))) {
     return;
   }
 
@@ -1298,7 +1298,7 @@ nmethod::nmethod(
     _comp_level              = CompLevel_none;
     _compiler_type           = type;
     _orig_pc_offset          = 0;
-    _num_stack_arg_slots     = _method->constMethod()->num_stack_arg_slots();
+    _num_stack_arg_slots     = 0;
 
     if (offsets->value(CodeOffsets::Exceptions) != -1) {
       // Continuation enter intrinsic
diff --git a/src/hotspot/share/interpreter/oopMapCache.cpp b/src/hotspot/share/interpreter/oopMapCache.cpp
index 87b124e9d7968..3406c85df7fe0 100644
--- a/src/hotspot/share/interpreter/oopMapCache.cpp
+++ b/src/hotspot/share/interpreter/oopMapCache.cpp
@@ -234,8 +234,10 @@ class MaskFillerForNative: public NativeSignatureIterator {
  private:
   uintptr_t * _mask;                             // the bit mask to be filled
   int         _size;                             // the mask size in bits
+  int         _num_oops;
 
   void set_one(int i) {
+    _num_oops++;
     i *= InterpreterOopMap::bits_per_entry;
     assert(0 <= i && i < _size, "offset out of bounds");
     _mask[i / BitsPerWord] |= (((uintptr_t) 1 << InterpreterOopMap::oop_bit_number) << (i % BitsPerWord));
@@ -253,6 +255,7 @@ class MaskFillerForNative: public NativeSignatureIterator {
   MaskFillerForNative(const methodHandle& method, uintptr_t* mask, int size) : NativeSignatureIterator(method) {
     _mask   = mask;
     _size   = size;
+    _num_oops = 0;
     // initialize with 0
     int i = (size + BitsPerWord - 1) / BitsPerWord;
     while (i-- > 0) _mask[i] = 0;
@@ -261,6 +264,8 @@ class MaskFillerForNative: public NativeSignatureIterator {
   void generate() {
     iterate();
   }
+
+  int num_oops() { return _num_oops; }
 };
 
 bool OopMapCacheEntry::verify_mask(CellTypeState* vars, CellTypeState* stack, int max_locals, int stack_top) {
@@ -319,6 +324,7 @@ void OopMapCacheEntry::fill_for_native(const methodHandle& mh) {
   // fill mask for parameters
   MaskFillerForNative mf(mh, bit_mask(), mask_size());
   mf.generate();
+  _num_oops = mf.num_oops();
 }
 
 
diff --git a/src/hotspot/share/oops/method.cpp b/src/hotspot/share/oops/method.cpp
index a1b380d364655..e686a529828fc 100644
--- a/src/hotspot/share/oops/method.cpp
+++ b/src/hotspot/share/oops/method.cpp
@@ -867,6 +867,10 @@ bool Method::needs_clinit_barrier() const {
   return is_static() && !method_holder()->is_initialized();
 }
 
+bool Method::is_object_wait0() const {
+  return name() == vmSymbols::wait_name();
+}
+
 objArrayHandle Method::resolved_checked_exceptions_impl(Method* method, TRAPS) {
   int length = method->checked_exceptions_length();
   if (length == 0) {  // common case
diff --git a/src/hotspot/share/oops/method.hpp b/src/hotspot/share/oops/method.hpp
index 6ffaebcdfdab2..512ca9d0c1205 100644
--- a/src/hotspot/share/oops/method.hpp
+++ b/src/hotspot/share/oops/method.hpp
@@ -586,6 +586,9 @@ class Method : public Metadata {
   // returns true if the method name is <init>
   bool is_object_initializer() const;
 
+  // returns true if the method name is wait0
+  bool is_object_wait0() const;
+
   // compiled code support
   // NOTE: code() is inherently racy as deopt can be clearing code
   // simultaneously. Use with caution.
diff --git a/src/hotspot/share/oops/stackChunkOop.hpp b/src/hotspot/share/oops/stackChunkOop.hpp
index 9e3ffd9e8f196..28e0576801ec1 100644
--- a/src/hotspot/share/oops/stackChunkOop.hpp
+++ b/src/hotspot/share/oops/stackChunkOop.hpp
@@ -102,6 +102,7 @@ class stackChunkOopDesc : public instanceOopDesc {
   inline void set_object_waiter(ObjectWaiter* obj_waiter);
 
   inline ObjectMonitor* current_pending_monitor() const;
+  inline ObjectMonitor* current_waiting_monitor() const;
 
   inline oop cont() const;
   template<typename P>
diff --git a/src/hotspot/share/oops/stackChunkOop.inline.hpp b/src/hotspot/share/oops/stackChunkOop.inline.hpp
index d068dd62201a6..f503a361c03ad 100644
--- a/src/hotspot/share/oops/stackChunkOop.inline.hpp
+++ b/src/hotspot/share/oops/stackChunkOop.inline.hpp
@@ -186,7 +186,15 @@ inline void stackChunkOopDesc::set_preempted(bool value) {
 
 inline ObjectMonitor* stackChunkOopDesc::current_pending_monitor() const {
   ObjectWaiter* waiter = object_waiter();
-  return waiter != nullptr ? waiter->monitor() : nullptr;
+  if (waiter != nullptr && (waiter->is_monitorenter() || (waiter->is_wait() && (waiter->at_reenter() || waiter->notified())))) {
+    return waiter->monitor();
+  }
+  return nullptr;
+}
+
+inline ObjectMonitor* stackChunkOopDesc::current_waiting_monitor() const {
+  ObjectWaiter* waiter = object_waiter();
+  return waiter != nullptr && waiter->is_wait() ? waiter->monitor() : nullptr;
 }
 
 inline bool stackChunkOopDesc::has_lockstack() const         { return is_flag(FLAG_HAS_LOCKSTACK); }
diff --git a/src/hotspot/share/prims/jvmtiEnvBase.cpp b/src/hotspot/share/prims/jvmtiEnvBase.cpp
index 486859ad199bd..6123642feb70b 100644
--- a/src/hotspot/share/prims/jvmtiEnvBase.cpp
+++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp
@@ -1533,9 +1533,13 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec
          waiter != nullptr && (nWait == 0 || waiter != mon->first_waiter());
          waiter = mon->next_waiter(waiter)) {
       JavaThread *w = mon->thread_of_waiter(waiter);
-      oop thread_oop = get_vthread_or_thread_oop(w);
-      if (thread_oop->is_a(vmClasses::BaseVirtualThread_klass())) {
+      if (w == nullptr) {
         skipped++;
+      } else {
+        oop thread_oop = get_vthread_or_thread_oop(w);
+        if (thread_oop->is_a(vmClasses::BaseVirtualThread_klass())) {
+          skipped++;
+        }
       }
       nWait++;
     }
@@ -1583,15 +1587,19 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec
       jint skipped = 0;
       for (int i = 0; i < nWait; i++) {
         JavaThread *w = mon->thread_of_waiter(waiter);
-        oop thread_oop = get_vthread_or_thread_oop(w);
-        bool is_virtual = thread_oop->is_a(vmClasses::BaseVirtualThread_klass());
-        assert(w != nullptr, "sanity check");
+        bool is_virtual;
+        if (w == nullptr) {
+          is_virtual = true;
+        } else {
+          oop thread_oop = get_vthread_or_thread_oop(w);
+          is_virtual = thread_oop->is_a(vmClasses::BaseVirtualThread_klass());
+        }
         if (is_virtual) {
           skipped++;
         } else {
           // If the thread was found on the ObjectWaiter list, then
           // it has not been notified.
-          Handle th(current_thread, get_vthread_or_thread_oop(w));
+          Handle th(current_thread, w->threadObj());
           ret.notify_waiters[i - skipped] = (jthread)jni_reference(calling_thread, th);
         }
         waiter = mon->next_waiter(waiter);
diff --git a/src/hotspot/share/prims/jvmtiExport.cpp b/src/hotspot/share/prims/jvmtiExport.cpp
index 0fb9b480d8a55..30150b4aa8641 100644
--- a/src/hotspot/share/prims/jvmtiExport.cpp
+++ b/src/hotspot/share/prims/jvmtiExport.cpp
@@ -2863,6 +2863,21 @@ void JvmtiExport::post_monitor_waited(JavaThread *thread, ObjectMonitor *obj_mnt
   }
 }
 
+void JvmtiExport::vthread_post_monitor_waited(JavaThread *current, ObjectMonitor *obj_mntr, jboolean timed_out) {
+  Handle vthread(current, current->vthread());
+
+  // Finish the VTMS transition temporarily to post the event.
+  current->rebind_to_jvmti_thread_state_of(vthread());
+  JvmtiVTMSTransitionDisabler::finish_VTMS_transition((jthread)vthread.raw_value(), /* is_mount */ true);
+
+  // Post event.
+  JvmtiExport::post_monitor_waited(current, obj_mntr, timed_out);
+
+  // Go back to VTMS transition state.
+  JvmtiVTMSTransitionDisabler::start_VTMS_transition((jthread)vthread.raw_value(), /* is_mount */ true);
+  current->rebind_to_jvmti_thread_state_of(current->threadObj());
+}
+
 void JvmtiExport::post_vm_object_alloc(JavaThread *thread, oop object) {
   if (object == nullptr) {
     return;
diff --git a/src/hotspot/share/prims/jvmtiExport.hpp b/src/hotspot/share/prims/jvmtiExport.hpp
index e98020aef1d3b..e0fb84f2c032f 100644
--- a/src/hotspot/share/prims/jvmtiExport.hpp
+++ b/src/hotspot/share/prims/jvmtiExport.hpp
@@ -397,6 +397,7 @@ class JvmtiExport : public AllStatic {
   static void post_monitor_contended_entered(JavaThread *thread, ObjectMonitor *obj_mntr) NOT_JVMTI_RETURN;
   static void post_monitor_wait(JavaThread *thread, oop obj, jlong timeout) NOT_JVMTI_RETURN;
   static void post_monitor_waited(JavaThread *thread, ObjectMonitor *obj_mntr, jboolean timed_out) NOT_JVMTI_RETURN;
+  static void vthread_post_monitor_waited(JavaThread *current, ObjectMonitor *obj_mntr, jboolean timed_out) NOT_JVMTI_RETURN;
   static void post_object_free(JvmtiEnv* env, GrowableArray<jlong>* objects) NOT_JVMTI_RETURN;
   static void post_resource_exhausted(jint resource_exhausted_flags, const char* detail) NOT_JVMTI_RETURN;
   static void record_vm_internal_object_allocation(oop object) NOT_JVMTI_RETURN;
diff --git a/src/hotspot/share/prims/jvmtiTagMap.cpp b/src/hotspot/share/prims/jvmtiTagMap.cpp
index bc91c1050731a..97a4ae10970be 100644
--- a/src/hotspot/share/prims/jvmtiTagMap.cpp
+++ b/src/hotspot/share/prims/jvmtiTagMap.cpp
@@ -2306,10 +2306,11 @@ bool StackRefCollector::report_native_stack_refs(jmethodID method) {
   _blk->set_context(_thread_tag, _tid, _depth, method);
   if (_is_top_frame) {
     // JNI locals for the top frame.
-    assert(_java_thread != nullptr, "sanity");
-    _java_thread->active_handles()->oops_do(_blk);
-    if (_blk->stopped()) {
-      return false;
+    if (_java_thread != nullptr) {
+      _java_thread->active_handles()->oops_do(_blk);
+      if (_blk->stopped()) {
+        return false;
+      }
     }
   } else {
     if (_last_entry_frame != nullptr) {
diff --git a/src/hotspot/share/runtime/continuation.hpp b/src/hotspot/share/runtime/continuation.hpp
index cd8e9cda8bb48..bf554c903c113 100644
--- a/src/hotspot/share/runtime/continuation.hpp
+++ b/src/hotspot/share/runtime/continuation.hpp
@@ -62,6 +62,11 @@ enum freeze_result {
 class Continuation : AllStatic {
 public:
 
+  enum preempt_kind {
+    freeze_on_monitorenter = 1,
+    freeze_on_wait         = 2
+  };
+
   enum thaw_kind {
     thaw_top = 0,
     thaw_return_barrier = 1,
diff --git a/src/hotspot/share/runtime/continuationFreezeThaw.cpp b/src/hotspot/share/runtime/continuationFreezeThaw.cpp
index 36d825c499ef5..0b9263e373bc5 100644
--- a/src/hotspot/share/runtime/continuationFreezeThaw.cpp
+++ b/src/hotspot/share/runtime/continuationFreezeThaw.cpp
@@ -441,6 +441,7 @@ class FreezeBase : public StackObj {
   NOINLINE freeze_result recurse_freeze_interpreted_frame(frame& f, frame& caller, int callee_argsize, bool callee_interpreted);
   freeze_result recurse_freeze_compiled_frame(frame& f, frame& caller, int callee_argsize, bool callee_interpreted);
   NOINLINE freeze_result recurse_freeze_stub_frame(frame& f, frame& caller);
+  NOINLINE freeze_result recurse_freeze_native_frame(frame& f, frame& caller);
   NOINLINE void finish_freeze(const frame& f, const frame& top);
 
   void freeze_lockstack();
@@ -761,17 +762,15 @@ void FreezeBase::freeze_fast_copy(stackChunkOop chunk, int chunk_start_sp CONT_J
   chunk->set_sp(chunk_new_sp);
 
   // set chunk->pc to the return address of the topmost frame in the chunk
-#if defined (AARCH64) || defined(RISCV64)
   if (_preempt) {
     // On aarch64/riscv64, the return pc of the top frame won't necessarily be at sp[-1].
-    // Get the top's frame last pc from the anchor instead.
+    // Also, on x64, if the top frame is the native wrapper frame, sp[-1] will not
+    // be the pc we used when creating the oopmap. Get the top's frame last pc from
+    // the anchor instead.
     address last_pc = _last_frame.pc();
     ContinuationHelper::patch_return_address_at(chunk_top - frame::sender_sp_ret_address_offset(), last_pc);
     chunk->set_pc(last_pc);
   } else {
-#else
-  {
-#endif
     chunk->set_pc(ContinuationHelper::return_address_at(
                   _cont_stack_top - frame::sender_sp_ret_address_offset()));
   }
@@ -868,7 +867,8 @@ frame FreezeBase::freeze_start_frame_on_preempt() {
 // The parameter callee_argsize includes metadata that has to be part of caller/callee overlap.
 NOINLINE freeze_result FreezeBase::recurse_freeze(frame& f, frame& caller, int callee_argsize, bool callee_interpreted, bool top) {
   assert(f.unextended_sp() < _bottom_address, ""); // see recurse_freeze_java_frame
-  assert(f.is_interpreted_frame() || ((top && _preempt) == ContinuationHelper::Frame::is_stub(f.cb())), "");
+  assert(f.is_interpreted_frame() || ((top && _preempt) == ContinuationHelper::Frame::is_stub(f.cb()))
+         || ((top && _preempt) == f.is_native_frame()), "");
 
   if (stack_overflow()) {
     return freeze_exception;
@@ -881,15 +881,11 @@ NOINLINE freeze_result FreezeBase::recurse_freeze(frame& f, frame& caller, int c
     }
     return recurse_freeze_compiled_frame(f, caller, callee_argsize, callee_interpreted);
   } else if (f.is_interpreted_frame()) {
-    assert((_preempt && top) || !f.interpreter_frame_method()->is_native(), "");
-    if (_preempt && top && f.interpreter_frame_method()->is_native()) {
-      // int native entry
-      return freeze_pinned_native;
-    }
+    assert(!f.interpreter_frame_method()->is_native() || (top && _preempt), "");
     return recurse_freeze_interpreted_frame(f, caller, callee_argsize, callee_interpreted);
   } else if (top && _preempt) {
-    assert(f.is_runtime_frame(), "");
-    return recurse_freeze_stub_frame(f, caller);
+    assert(f.is_native_frame() || f.is_runtime_frame(), "");
+    return f.is_native_frame() ? recurse_freeze_native_frame(f, caller) : recurse_freeze_stub_frame(f, caller);
   } else {
     return freeze_pinned_native;
   }
@@ -1283,6 +1279,49 @@ NOINLINE freeze_result FreezeBase::recurse_freeze_stub_frame(frame& f, frame& ca
   return freeze_ok;
 }
 
+NOINLINE freeze_result FreezeBase::recurse_freeze_native_frame(frame& f, frame& caller) {
+  if (!f.cb()->as_nmethod()->method()->is_object_wait0()) {
+    assert(f.cb()->as_nmethod()->method()->is_synchronized(), "");
+    // Synchronized native method case. Unlike the interpreter native wrapper, the compiled
+    // native wrapper tries to acquire the monitor after marshalling the arguments from the
+    // caller into the native convention. This is so that we have a valid oopMap in case of
+    // having to block in the slow path. But that would require freezing those registers too
+    // and then fixing them back on thaw in case of oops. To avoid complicating things and
+    // given that this would be a rare case anyways just pin the vthread to the carrier.
+    return freeze_pinned_native;
+  }
+
+  intptr_t* const stack_frame_top = ContinuationHelper::NativeFrame::frame_top(f);
+  const int fsize = f.cb()->frame_size();
+
+  log_develop_trace(continuations)("recurse_freeze_native_frame %s _size: %d fsize: %d :: " INTPTR_FORMAT " - " INTPTR_FORMAT,
+    f.cb()->name(), _freeze_size, fsize, p2i(stack_frame_top), p2i(stack_frame_top+fsize));
+
+  freeze_result result = recurse_freeze_java_frame<ContinuationHelper::NativeFrame>(f, caller, fsize, 0);
+  if (UNLIKELY(result > freeze_ok_bottom)) {
+    return result;
+  }
+
+  assert(result == freeze_ok, "should have caller frame");
+  DEBUG_ONLY(before_freeze_java_frame(f, caller, fsize, 0 /* argsize */, false /* is_bottom_frame */);)
+
+  frame hf = new_heap_frame<ContinuationHelper::NativeFrame>(f, caller);
+  intptr_t* heap_frame_top = ContinuationHelper::NativeFrame::frame_top(hf);
+
+  copy_to_chunk(stack_frame_top, heap_frame_top, fsize);
+
+  if (caller.is_interpreted_frame()) {
+    _total_align_size += frame::align_wiggle;
+  }
+
+  patch(f, hf, caller, false /* is_bottom_frame */);
+
+  DEBUG_ONLY(after_freeze_java_frame(hf, false /* is_bottom_frame */);)
+
+  caller = hf;
+  return freeze_ok;
+}
+
 NOINLINE void FreezeBase::finish_freeze(const frame& f, const frame& top) {
   stackChunkOop chunk = _cont.tail();
 
@@ -1614,7 +1653,7 @@ bool FreezeBase::check_valid_fast_path() {
   map.set_include_argument_oops(false);
   int i = 0;
   for (frame f = freeze_start_frame(); Continuation::is_frame_in_continuation(ce, f); f = f.sender(&map), i++) {
-    if (!((f.is_compiled_frame() && !f.is_deoptimized_frame()) || (i == 0 && f.is_runtime_frame()))) {
+    if (!((f.is_compiled_frame() && !f.is_deoptimized_frame()) || (i == 0 && (f.is_runtime_frame() || f.is_native_frame())))) {
       return false;
     }
   }
@@ -1880,9 +1919,10 @@ class ThawBase : public StackObj {
   inline void prefetch_chunk_pd(void* start, int size_words);
   void patch_return(intptr_t* sp, bool is_last);
 
-  intptr_t* handle_preempted_continuation(intptr_t* sp, bool fast_case);
+  intptr_t* handle_preempted_continuation(intptr_t* sp, Continuation::preempt_kind preempt_kind, bool fast_case);
   inline intptr_t* possibly_adjust_frame(frame& top);
   inline intptr_t* push_cleanup_continuation();
+  void throw_interrupted_exception(JavaThread* current, frame& top);
 
   void recurse_thaw(const frame& heap_frame, frame& caller, int num_frames, bool top_on_preempt_case);
   void finish_thaw(frame& f);
@@ -1901,6 +1941,7 @@ class ThawBase : public StackObj {
   NOINLINE void recurse_thaw_interpreted_frame(const frame& hf, frame& caller, int num_frames);
   void recurse_thaw_compiled_frame(const frame& hf, frame& caller, int num_frames, bool stub_caller);
   void recurse_thaw_stub_frame(const frame& hf, frame& caller, int num_frames);
+  void recurse_thaw_native_frame(const frame& hf, frame& caller, int num_frames);
 
   void push_return_frame(frame& f);
   inline frame new_entry_frame();
@@ -1932,7 +1973,7 @@ class Thaw : public ThawBase {
   inline intptr_t* thaw(Continuation::thaw_kind kind);
   template<bool check_stub = false>
   NOINLINE intptr_t* thaw_fast(stackChunkOop chunk);
-  NOINLINE intptr_t* thaw_slow(stackChunkOop chunk, bool return_barrier);
+  NOINLINE intptr_t* thaw_slow(stackChunkOop chunk, Continuation::thaw_kind kind);
   inline void patch_caller_links(intptr_t* sp, intptr_t* bottom);
 };
 
@@ -1948,7 +1989,7 @@ inline intptr_t* Thaw<ConfigT>::thaw(Continuation::thaw_kind kind) {
 
   _barriers = chunk->requires_barriers();
   return (LIKELY(can_thaw_fast(chunk))) ? thaw_fast(chunk)
-                                        : thaw_slow(chunk, kind != Continuation::thaw_top);
+                                        : thaw_slow(chunk, kind);
 }
 
 class ReconstructedStack : public StackObj {
@@ -2157,17 +2198,19 @@ static inline void relativize_chunk_concurrently(stackChunkOop chunk) {
 }
 
 template <typename ConfigT>
-NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_barrier) {
+NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, Continuation::thaw_kind kind) {
+  Continuation::preempt_kind preempt_kind;
   bool retry_fast_path = false;
 
   _preempted_case = chunk->preempted();
   if (_preempted_case) {
     if (chunk->object_waiter() != nullptr) {
       // Mounted again after preemption. Resume the pending monitor operation,
-      // which will be either a monitorenter.
-      assert(chunk->current_pending_monitor() != nullptr, "");
+      // which will be either a monitorenter or Object.wait() call.
+      assert(chunk->current_pending_monitor() != nullptr || chunk->current_waiting_monitor() != nullptr, "");
       ObjectWaiter* waiter = chunk->object_waiter();
       ObjectMonitor* mon = waiter->monitor();
+      preempt_kind = waiter->is_wait() ? Continuation::freeze_on_wait : Continuation::freeze_on_monitorenter;
 
       bool mon_acquired = mon->resume_operation(_thread, waiter, _cont);
       assert(!mon_acquired || mon->is_owner(_thread), "invariant");
@@ -2175,9 +2218,11 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
         // Failed to aquire monitor. Return to enterSpecial to unmount again.
         return push_cleanup_continuation();
       }
+      chunk = _cont.tail();  // reload oop in case of safepoint in resume_operation (if posting JVMTI events).
     } else {
       // Preemption cancelled in moniterenter case. We actually acquired
       // the monitor after freezing all frames so nothing to do.
+      preempt_kind = Continuation::freeze_on_monitorenter;
     }
     // Call this first to avoid racing with GC threads later when modifying the chunk flags.
     relativize_chunk_concurrently(chunk);
@@ -2188,8 +2233,8 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
   }
 
   // On first thaw after freeze restore oops to the lockstack if any.
-  assert(chunk->lockstack_size() == 0 || !return_barrier, "");
-  if (!return_barrier && chunk->lockstack_size() > 0) {
+  assert(chunk->lockstack_size() == 0 || kind == Continuation::thaw_top, "");
+  if (kind == Continuation::thaw_top && chunk->lockstack_size() > 0) {
     int lockStackSize = chunk->lockstack_size();
     assert(lockStackSize > 0, "should be");
 
@@ -2207,7 +2252,7 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
   if (retry_fast_path && can_thaw_fast(chunk)) {
     intptr_t* sp = thaw_fast<true>(chunk);
     if (_preempted_case) {
-      return handle_preempted_continuation(sp, true /* fast_case */);
+      return handle_preempted_continuation(sp, preempt_kind, true /* fast_case */);
     }
     return sp;
   }
@@ -2215,7 +2260,7 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
   LogTarget(Trace, continuations) lt;
   if (lt.develop_is_enabled()) {
     LogStream ls(lt);
-    ls.print_cr("thaw slow return_barrier: %d " INTPTR_FORMAT, return_barrier, p2i(chunk));
+    ls.print_cr("thaw slow return_barrier: %d " INTPTR_FORMAT, kind, p2i(chunk));
     chunk->print_on(true, &ls);
   }
 
@@ -2229,7 +2274,7 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
 
   DEBUG_ONLY(_frames = 0;)
   _align_size = 0;
-  int num_frames = (return_barrier ? 1 : 2);
+  int num_frames = kind == Continuation::thaw_top ? 2 : 1;
 
   _stream = StackChunkFrameStream<ChunkFrames::Mixed>(chunk);
   _top_unextended_sp_before_thaw = _stream.unextended_sp();
@@ -2256,7 +2301,7 @@ NOINLINE intptr_t* Thaw<ConfigT>::thaw_slow(stackChunkOop chunk, bool return_bar
   intptr_t* sp = caller.sp();
 
   if (_preempted_case) {
-    return handle_preempted_continuation(sp, false /* fast_case */);
+    return handle_preempted_continuation(sp, preempt_kind, false /* fast_case */);
   }
   return sp;
 }
@@ -2267,8 +2312,8 @@ void ThawBase::recurse_thaw(const frame& heap_frame, frame& caller, int num_fram
   assert(num_frames > 0, "");
   assert(!heap_frame.is_empty(), "");
 
-  if (top_on_preempt_case && heap_frame.is_runtime_frame()) {
-    recurse_thaw_stub_frame(heap_frame, caller, 2);
+  if (top_on_preempt_case && (heap_frame.is_native_frame() || heap_frame.is_runtime_frame())) {
+    heap_frame.is_native_frame() ? recurse_thaw_native_frame(heap_frame, caller, 2) : recurse_thaw_stub_frame(heap_frame, caller, 2);
   } else if (!heap_frame.is_interpreted_frame()) {
     recurse_thaw_compiled_frame(heap_frame, caller, num_frames, false);
   } else {
@@ -2388,7 +2433,8 @@ void ThawBase::clear_bitmap_bits(address start, address end) {
   assert(effective_end == end || !chunk->bitmap().at(chunk->bit_index_for(effective_end)), "bit should not be set");
 }
 
-intptr_t* ThawBase::handle_preempted_continuation(intptr_t* sp, bool fast_case) {
+intptr_t* ThawBase::handle_preempted_continuation(intptr_t* sp, Continuation::preempt_kind preempt_kind, bool fast_case) {
+  assert(preempt_kind == Continuation::freeze_on_wait || preempt_kind == Continuation::freeze_on_monitorenter, "");
   frame top(sp);
   assert(top.pc() == *(address*)(sp - frame::sender_sp_ret_address_offset()), "");
 
@@ -2410,12 +2456,18 @@ intptr_t* ThawBase::handle_preempted_continuation(intptr_t* sp, bool fast_case)
     // If we thawed in the slow path the runtime stub/native wrapper frame already
     // has the correct fp (see ThawBase::new_stack_frame). On the fast path though,
     // we copied the original fp at the time of freeze which now will have to be fixed.
-    assert(top.is_runtime_frame(), "");
+    assert(top.is_runtime_frame() || top.is_native_frame(), "");
     int fsize = top.cb()->frame_size();
     patch_pd(top, sp + fsize);
   }
 
-  if (top.is_runtime_frame()) {
+  if (preempt_kind == Continuation::freeze_on_wait) {
+    // Check now if we need to throw IE exception.
+    if (_thread->pending_interrupted_exception()) {
+      throw_interrupted_exception(_thread, top);
+      _thread->set_pending_interrupted_exception(false);
+    }
+  } else if (top.is_runtime_frame()) {
     // The continuation might now run on a different platform thread than the previous time so
     // we need to adjust the current thread saved in the stub frame before restoring registers.
     JavaThread** thread_addr = frame::saved_thread_address(top);
@@ -2426,6 +2478,16 @@ intptr_t* ThawBase::handle_preempted_continuation(intptr_t* sp, bool fast_case)
   return sp;
 }
 
+void ThawBase::throw_interrupted_exception(JavaThread* current, frame& top) {
+  ContinuationWrapper::SafepointOp so(current, _cont);
+  // Since we might safepoint set the anchor so that the stack can we walked.
+  set_anchor(current, top.sp());
+  JRT_BLOCK
+    THROW(vmSymbols::java_lang_InterruptedException());
+  JRT_BLOCK_END
+  clear_anchor(current);
+}
+
 NOINLINE void ThawBase::recurse_thaw_interpreted_frame(const frame& hf, frame& caller, int num_frames) {
   assert(hf.is_interpreted_frame(), "");
 
@@ -2469,7 +2531,9 @@ NOINLINE void ThawBase::recurse_thaw_interpreted_frame(const frame& hf, frame& c
 
   maybe_set_fastpath(f.sp());
 
-  const int locals = hf.interpreter_frame_method()->max_locals();
+  Method* m = hf.interpreter_frame_method();
+  // For native frames we need to count parameters, possible alignment, plus the 2 extra words (temp oop/result handler).
+  const int locals = !m->is_native() ? m->max_locals() : m->size_of_parameters() + frame::align_wiggle + 2;
 
   if (!is_bottom_frame) {
     // can only fix caller once this frame is thawed (due to callee saved regs)
@@ -2613,6 +2677,55 @@ void ThawBase::recurse_thaw_stub_frame(const frame& hf, frame& caller, int num_f
   caller = f;
 }
 
+void ThawBase::recurse_thaw_native_frame(const frame& hf, frame& caller, int num_frames) {
+  assert(hf.is_native_frame(), "");
+  assert(_preempted_case && hf.cb()->as_nmethod()->method()->is_object_wait0(), "");
+
+  if (UNLIKELY(seen_by_gc())) { // recurse_thaw_stub_frame already invoked our barriers with a full regmap
+    _cont.tail()->do_barriers<stackChunkOopDesc::BarrierType::Store>(_stream, SmallRegisterMap::instance());
+  }
+
+  const bool is_bottom_frame = recurse_thaw_java_frame<ContinuationHelper::NativeFrame>(caller, num_frames);
+  assert(!is_bottom_frame, "");
+
+  DEBUG_ONLY(before_thaw_java_frame(hf, caller, is_bottom_frame, num_frames);)
+
+  assert(caller.sp() == caller.unextended_sp(), "");
+
+  if (caller.is_interpreted_frame()) {
+    _align_size += frame::align_wiggle; // we add one whether or not we've aligned because we add it in recurse_freeze_native_frame
+  }
+
+  // new_stack_frame must construct the resulting frame using hf.pc() rather than hf.raw_pc() because the frame is not
+  // yet laid out in the stack, and so the original_pc is not stored in it.
+  // As a result, f.is_deoptimized_frame() is always false and we must test hf to know if the frame is deoptimized.
+  frame f = new_stack_frame<ContinuationHelper::NativeFrame>(hf, caller, false /* bottom */);
+  intptr_t* const stack_frame_top = f.sp();
+  intptr_t* const heap_frame_top = hf.unextended_sp();
+
+  int fsize = ContinuationHelper::NativeFrame::size(hf);
+  assert(fsize <= (int)(caller.unextended_sp() - f.unextended_sp()), "");
+
+  intptr_t* from = heap_frame_top - frame::metadata_words_at_bottom;
+  intptr_t* to   = stack_frame_top - frame::metadata_words_at_bottom;
+  int sz = fsize + frame::metadata_words_at_bottom;
+
+  copy_from_chunk(from, to, sz); // copying good oops because we invoked barriers above
+
+  patch(f, caller, false /* bottom */);
+
+  // f.is_deoptimized_frame() is always false and we must test hf.is_deoptimized_frame() (see comment above)
+  assert(!f.is_deoptimized_frame(), "");
+  assert(!hf.is_deoptimized_frame(), "");
+  assert(!f.cb()->as_nmethod()->is_marked_for_deoptimization(), "");
+
+  // can only fix caller once this frame is thawed (due to callee saved regs); this happens on the stack
+  _cont.tail()->fix_thawed_frame(caller, SmallRegisterMap::instance());
+
+  DEBUG_ONLY(after_thaw_java_frame(f, false /* bottom */);)
+  caller = f;
+}
+
 void ThawBase::finish_thaw(frame& f) {
   stackChunkOop chunk = _cont.tail();
 
diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp
index 85a978aa21144..b48acc048d558 100644
--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@@ -122,6 +122,10 @@ DeoptimizationScope::~DeoptimizationScope() {
 }
 
 void DeoptimizationScope::mark(nmethod* nm, bool inc_recompile_counts) {
+  if (!nm->can_be_deoptimized()) {
+    return;
+  }
+
   ConditionalMutexLocker ml(NMethodState_lock, !NMethodState_lock->owned_by_self(), Mutex::_no_safepoint_check_flag);
 
   // If it's already marked but we still need it to be deopted.
diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp
index c7411f97cecab..a1f9fd5aa30b7 100644
--- a/src/hotspot/share/runtime/javaThread.cpp
+++ b/src/hotspot/share/runtime/javaThread.cpp
@@ -457,6 +457,7 @@ JavaThread::JavaThread(MemTag mem_tag) :
   _is_disable_suspend(false),
   _VTMS_transition_mark(false),
   _pending_jvmti_unmount_event(false),
+  _on_monitor_waited_event(false),
   _contended_entered_monitor(nullptr),
 #ifdef ASSERT
   _is_VTMS_transition_disabler(false),
@@ -500,6 +501,7 @@ JavaThread::JavaThread(MemTag mem_tag) :
 
   _preempt_alternate_return(nullptr),
   _preemption_cancelled(false),
+  _pending_interrupted_exception(false),
 
   _handshake(this),
 
diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp
index c829689727304..b45c04179a98a 100644
--- a/src/hotspot/share/runtime/javaThread.hpp
+++ b/src/hotspot/share/runtime/javaThread.hpp
@@ -327,6 +327,7 @@ class JavaThread: public Thread {
   bool                  _is_disable_suspend;             // JVMTI suspend is temporarily disabled; used on current thread only
   bool                  _VTMS_transition_mark;           // used for sync between VTMS transitions and disablers
   bool                  _pending_jvmti_unmount_event;    // When preempting we post unmount event at unmount end rather than start
+  bool                  _on_monitor_waited_event;        // Avoid callee arg processing for enterSpecial when posting waited event
   ObjectMonitor*        _contended_entered_monitor;      // Monitor por pending monitor_contended_entered callback
 #ifdef ASSERT
   bool                  _is_VTMS_transition_disabler;    // thread currently disabled VTMS transitions
@@ -486,11 +487,17 @@ class JavaThread: public Thread {
   // set this field so that in the preempt stub we call thaw again
   // instead of unmounting.
   bool _preemption_cancelled;
+  // For Object.wait() we set this field to know if we need to
+  // throw IE at the end of thawing before returning to Java.
+  bool _pending_interrupted_exception;
 
  public:
   bool preemption_cancelled()           { return _preemption_cancelled; }
   void set_preemption_cancelled(bool b) { _preemption_cancelled = b; }
 
+  bool pending_interrupted_exception()           { return _pending_interrupted_exception; }
+  void set_pending_interrupted_exception(bool b) { _pending_interrupted_exception = b; }
+
   bool preempting()           { return _preempt_alternate_return != nullptr; }
   void set_preempt_alternate_return(address val) { _preempt_alternate_return = val; }
 
@@ -720,6 +727,9 @@ class JavaThread: public Thread {
   bool pending_jvmti_unmount_event()             { return _pending_jvmti_unmount_event; }
   void set_pending_jvmti_unmount_event(bool val) { _pending_jvmti_unmount_event = val; }
 
+  bool on_monitor_waited_event()             { return _on_monitor_waited_event; }
+  void set_on_monitor_waited_event(bool val) { _on_monitor_waited_event = val; }
+
   bool pending_contended_entered_event()         { return _contended_entered_monitor != nullptr; }
   ObjectMonitor* contended_entered_monitor()     { return _contended_entered_monitor; }
 #ifdef ASSERT
@@ -1318,4 +1328,13 @@ class NoPreemptMark {
   ~NoPreemptMark() { if (_unpin) _ce->unpin(); }
 };
 
+class ThreadOnMonitorWaitedEvent {
+  JavaThread* _thread;
+ public:
+  ThreadOnMonitorWaitedEvent(JavaThread* thread) : _thread(thread) {
+    JVMTI_ONLY(_thread->set_on_monitor_waited_event(true);)
+  }
+  ~ThreadOnMonitorWaitedEvent() { JVMTI_ONLY(_thread->set_on_monitor_waited_event(false);) }
+};
+
 #endif // SHARE_RUNTIME_JAVATHREAD_HPP
diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
index 8203bfe64cd35..5060ad469b2ab 100644
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@@ -1067,7 +1067,13 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
   OrderAccess::fence();      // see comments at the end of EnterI()
 }
 
-bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current) {
+// This method is called from two places:
+// - On monitorenter contention with a null waiter.
+// - After Object.wait() times out or the target is interrupted to reenter the
+//   monitor, with the existing waiter.
+// For the Object.wait() case we do not delete the ObjectWaiter in case we
+// succesfully acquire the monitor since we are going to need it on return.
+bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current, ObjectWaiter* waiter) {
   if (TryLock(current) == TryLockResult::Success) {
     assert(is_owner(current), "invariant");
     assert(!is_succesor(current), "invariant");
@@ -1075,7 +1081,7 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current) {
   }
 
   oop vthread = current->vthread();
-  ObjectWaiter* node = new ObjectWaiter(vthread, this);
+  ObjectWaiter* node = waiter != nullptr ? waiter : new ObjectWaiter(vthread, this);
   node->_prev   = (ObjectWaiter*) 0xBAD;
   node->TState  = ObjectWaiter::TS_CXQ;
 
@@ -1090,7 +1096,7 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current) {
     if (TryLock(current) == TryLockResult::Success) {
       assert(is_owner(current), "invariant");
       assert(!is_succesor(current), "invariant");
-      delete node;
+      if (waiter == nullptr) delete node;  // for Object.wait() don't delete yet
       return true;
     }
   }
@@ -1101,7 +1107,7 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current) {
     assert(is_owner(current), "invariant");
     UnlinkAfterAcquire(current, node);
     if (is_succesor(current)) clear_succesor();
-    delete node;
+    if (waiter == nullptr) delete node;  // for Object.wait() don't delete yet
     return true;
   }
 
@@ -1120,6 +1126,11 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current) {
 bool ObjectMonitor::resume_operation(JavaThread* current, ObjectWaiter* node, ContinuationWrapper& cont) {
   assert(java_lang_VirtualThread::state(current->vthread()) == java_lang_VirtualThread::RUNNING, "wrong state for vthread");
 
+  if (node->is_wait() && !node->at_reenter()) {
+    bool acquired_monitor = VThreadWaitReenter(current, node, cont);
+    if (acquired_monitor) return true;
+  }
+
   // Retry acquiring monitor...
 
   int state = node->TState;
@@ -1155,6 +1166,16 @@ void ObjectMonitor::VThreadEpilog(JavaThread* current, ObjectWaiter* node) {
 
   guarantee(_recursions == 0, "invariant");
 
+  if (node->is_wait()) {
+    _recursions = node->_recursions;   // restore the old recursion count
+    _waiters--;                        // decrement the number of waiters
+
+    if (node->_interrupted) {
+      // We will throw at thaw end after finishing the mount transition.
+      current->set_pending_interrupted_exception(true);
+    }
+  }
+
   assert(node->TState == ObjectWaiter::TS_ENTER || node->TState == ObjectWaiter::TS_CXQ, "");
   UnlinkAfterAcquire(current, node);
   delete node;
@@ -1582,6 +1603,32 @@ static void post_monitor_wait_event(EventJavaMonitorWait* event,
   event->commit();
 }
 
+static void vthread_monitor_waited_event(JavaThread *current, ObjectWaiter* node, ContinuationWrapper& cont, EventJavaMonitorWait* event, jboolean timed_out) {
+  // Since we might safepoint set the anchor so that the stack can we walked.
+  assert(current->last_continuation() != nullptr, "");
+  JavaFrameAnchor* anchor = current->frame_anchor();
+  anchor->set_last_Java_sp(current->last_continuation()->entry_sp());
+  anchor->set_last_Java_pc(current->last_continuation()->entry_pc());
+
+  ContinuationWrapper::SafepointOp so(current, cont);
+
+  JRT_BLOCK
+    if (event->should_commit()) {
+      long timeout = java_lang_VirtualThread::waitTimeout(current->vthread());
+      post_monitor_wait_event(event, node->_monitor, node->_notifier_tid, timeout, timed_out);
+    }
+    if (JvmtiExport::should_post_monitor_waited()) {
+      // We mark this call in case of an upcall to Java while posting the event.
+      // If somebody walks the stack in that case, processing the enterSpecial
+      // frame should not include processing callee arguments since there is no
+      // actual callee (see nmethod::preserve_callee_argument_oops()).
+      ThreadOnMonitorWaitedEvent tmwe(current);
+      JvmtiExport::vthread_post_monitor_waited(current, node->_monitor, timed_out);
+    }
+  JRT_BLOCK_END
+  current->frame_anchor()->clear();
+}
+
 // -----------------------------------------------------------------------------
 // Wait/Notify/NotifyAll
 //
@@ -1621,6 +1668,16 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
 
   current->set_current_waiting_monitor(this);
 
+  ContinuationEntry* ce = current->last_continuation();
+  if (interruptible && ce != nullptr && ce->is_virtual_thread()) {
+    int result = Continuation::try_preempt(current, ce->cont_oop(current));
+    if (result == freeze_ok) {
+      VThreadWait(current, millis);
+      current->set_current_waiting_monitor(nullptr);
+      return;
+    }
+  }
+
   // create a node to be put into the queue
   // Critically, after we reset() the event but prior to park(), we must check
   // for a pending interrupt.
@@ -1670,7 +1727,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
       ThreadBlockInVMPreprocess<ClearSuccOnSuspend> tbivs(current, csos, true /* allow_suspend */);
       if (interrupted || HAS_PENDING_EXCEPTION) {
         // Intentionally empty
-      } else if (node._notified == 0) {
+      } else if (!node._notified) {
         if (millis <= 0) {
           current->_ParkEvent->park();
         } else {
@@ -1698,7 +1755,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
       Thread::SpinAcquire(&_WaitSetLock, "WaitSet - unlink");
       if (node.TState == ObjectWaiter::TS_WAIT) {
         DequeueSpecificWaiter(&node);       // unlink from WaitSet
-        assert(node._notified == 0, "invariant");
+        assert(!node._notified, "invariant");
         node.TState = ObjectWaiter::TS_RUN;
       }
       Thread::SpinRelease(&_WaitSetLock);
@@ -1724,7 +1781,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
     if (JvmtiExport::should_post_monitor_waited()) {
       JvmtiExport::post_monitor_waited(current, this, ret == OS_TIMEOUT);
 
-      if (node._notified != 0 && is_succesor(current)) {
+      if (node._notified && is_succesor(current)) {
         // In this part of the monitor wait-notify-reenter protocol it
         // is possible (and normal) for another thread to do a fastpath
         // monitor enter-exit while this thread is still trying to get
@@ -1753,6 +1810,9 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
     assert(!is_owner(current), "invariant");
     ObjectWaiter::TStates v = node.TState;
     if (v == ObjectWaiter::TS_RUN) {
+      // We use the NoPreemptMark for the very rare case where the previous
+      // preempt attempt failed due to OOM. The preempt on monitor contention
+      // could succeed but we can't unmount now.
       NoPreemptMark npm(current);
       enter(current);
     } else {
@@ -1808,16 +1868,32 @@ void ObjectMonitor::INotify(JavaThread* current) {
   ObjectWaiter* iterator = DequeueWaiter();
   if (iterator != nullptr) {
     guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
-    guarantee(iterator->_notified == 0, "invariant");
+    guarantee(!iterator->_notified, "invariant");
     // Disposition - what might we do with iterator ?
     // a.  add it directly to the EntryList - either tail (policy == 1)
     //     or head (policy == 0).
     // b.  push it onto the front of the _cxq (policy == 2).
     // For now we use (b).
 
+    if (iterator->is_vthread()) {
+      oop vthread = iterator->vthread();
+      java_lang_VirtualThread::set_notified(vthread, true);
+      int old_state = java_lang_VirtualThread::state(vthread);
+      // If state is not WAIT/TIMED_WAIT then target could still be on
+      // unmount transition, or wait could have already timed-out or target
+      // could have been interrupted. In the first case, the target itself
+      // will set the state to BLOCKED at the end of the unmount transition.
+      // In the other cases the target would have been already unblocked so
+      // there is nothing to do.
+      if (old_state == java_lang_VirtualThread::WAIT ||
+          old_state == java_lang_VirtualThread::TIMED_WAIT) {
+        java_lang_VirtualThread::cmpxchg_state(vthread, old_state, java_lang_VirtualThread::BLOCKED);
+      }
+    }
+
     iterator->TState = ObjectWaiter::TS_ENTER;
 
-    iterator->_notified = 1;
+    iterator->_notified = true;
     iterator->_notifier_tid = JFR_THREAD_ID(current);
 
     ObjectWaiter* list = _EntryList;
@@ -1849,8 +1925,9 @@ void ObjectMonitor::INotify(JavaThread* current) {
     // is the only thread that grabs _WaitSetLock.  There's almost no contention
     // on _WaitSetLock so it's not profitable to reduce the length of the
     // critical section.
-
-    iterator->wait_reenter_begin(this);
+    if (!iterator->is_vthread()) {
+      iterator->wait_reenter_begin(this);
+    }
   }
   Thread::SpinRelease(&_WaitSetLock);
 }
@@ -1901,6 +1978,95 @@ void ObjectMonitor::notifyAll(TRAPS) {
   OM_PERFDATA_OP(Notifications, inc(tally));
 }
 
+void ObjectMonitor::VThreadWait(JavaThread* current, jlong millis) {
+  oop vthread = current->vthread();
+  ObjectWaiter* node = new ObjectWaiter(vthread, this);
+  node->_is_wait = true;
+  node->TState = ObjectWaiter::TS_WAIT;
+  java_lang_VirtualThread::set_notified(vthread, false);  // Reset notified flag
+
+  // Enter the waiting queue, which is a circular doubly linked list in this case
+  // but it could be a priority queue or any data structure.
+  // _WaitSetLock protects the wait queue.  Normally the wait queue is accessed only
+  // by the owner of the monitor *except* in the case where park()
+  // returns because of a timeout or interrupt.  Contention is exceptionally rare
+  // so we use a simple spin-lock instead of a heavier-weight blocking lock.
+
+  Thread::SpinAcquire(&_WaitSetLock, "WaitSet - add");
+  AddWaiter(node);
+  Thread::SpinRelease(&_WaitSetLock);
+
+  node->_recursions = _recursions;   // record the old recursion count
+  _recursions = 0;                   // set the recursion level to be 0
+  _waiters++;                        // increment the number of waiters
+  exit(current);                     // exit the monitor
+  guarantee(!is_owner(current), "invariant");
+
+  assert(java_lang_VirtualThread::state(vthread) == java_lang_VirtualThread::RUNNING, "wrong state for vthread");
+  java_lang_VirtualThread::set_state(vthread, millis == 0 ? java_lang_VirtualThread::WAITING : java_lang_VirtualThread::TIMED_WAITING);
+  java_lang_VirtualThread::set_waitTimeout(vthread, millis);
+
+  // Save the ObjectWaiter* in the chunk since we will need it when resuming execution.
+  oop cont = java_lang_VirtualThread::continuation(vthread);
+  stackChunkOop chunk  = jdk_internal_vm_Continuation::tail(cont);
+  chunk->set_object_waiter(node);
+}
+
+bool ObjectMonitor::VThreadWaitReenter(JavaThread* current, ObjectWaiter* node, ContinuationWrapper& cont) {
+  // First time we run after being preempted on Object.wait().
+  // Check if we were interrupted or the wait timed-out, and in
+  // that case remove ourselves from the _WaitSet queue.
+  if (node->TState == ObjectWaiter::TS_WAIT) {
+    Thread::SpinAcquire(&_WaitSetLock, "WaitSet - unlink");
+    if (node->TState == ObjectWaiter::TS_WAIT) {
+      DequeueSpecificWaiter(node);       // unlink from WaitSet
+      assert(!node->_notified, "invariant");
+      node->TState = ObjectWaiter::TS_RUN;
+    }
+    Thread::SpinRelease(&_WaitSetLock);
+  }
+
+  // If this was an interrupted case, set the _interrupted boolean so that
+  // once we re-acquire the monitor we know if we need to throw IE or not.
+  ObjectWaiter::TStates state = node->TState;
+  bool was_notified = state == ObjectWaiter::TS_ENTER || state == ObjectWaiter::TS_CXQ;
+  assert(was_notified || state == ObjectWaiter::TS_RUN, "");
+  node->_interrupted = !was_notified && current->is_interrupted(false);
+
+  // Post JFR and JVMTI events.
+  EventJavaMonitorWait event;
+  if (event.should_commit() || JvmtiExport::should_post_monitor_waited()) {
+    vthread_monitor_waited_event(current, node, cont, &event, !was_notified && !node->_interrupted);
+  }
+
+  // Mark that we are at reenter so that we don't call this method again.
+  node->_at_reenter = true;
+  assert(!is_owner(current), "invariant");
+
+  if (!was_notified) {
+    bool acquired = VThreadMonitorEnter(current, node);
+    if (acquired) {
+      guarantee(_recursions == 0, "invariant");
+      _recursions = node->_recursions;   // restore the old recursion count
+      _waiters--;                        // decrement the number of waiters
+
+      if (node->_interrupted) {
+        // We will throw at thaw end after finishing the mount transition.
+        current->set_pending_interrupted_exception(true);
+      }
+
+      delete node;
+      stackChunkOop chunk  = cont.tail();
+      chunk->set_object_waiter(nullptr);
+      return true;
+    }
+  } else {
+    // Already moved to _cxq or _EntryList by notifier, so just add to contentions.
+    add_to_contentions(1);
+  }
+  return false;
+}
+
 // -----------------------------------------------------------------------------
 // Adaptive Spinning Support
 //
@@ -2165,8 +2331,12 @@ ObjectWaiter::ObjectWaiter(JavaThread* current) {
   _thread   = current;
   _monitor  = nullptr;
   _notifier_tid = 0;
-  _notified = 0;
+  _recursions = 0;
   TState    = TS_RUN;
+  _notified = false;
+  _is_wait  = false;
+  _at_reenter = false;
+  _interrupted = false;
   _active   = false;
 }
 
diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp
index 57fc8b0a36bcb..d6042af43a653 100644
--- a/src/hotspot/share/runtime/objectMonitor.hpp
+++ b/src/hotspot/share/runtime/objectMonitor.hpp
@@ -48,15 +48,19 @@ class ContinuationWrapper;
 
 class ObjectWaiter : public CHeapObj<mtThread> {
  public:
-  enum TStates { TS_UNDEF, TS_READY, TS_RUN, TS_WAIT, TS_ENTER, TS_CXQ };
+  enum TStates : uint8_t { TS_UNDEF, TS_READY, TS_RUN, TS_WAIT, TS_ENTER, TS_CXQ };
   ObjectWaiter* volatile _next;
   ObjectWaiter* volatile _prev;
   JavaThread*     _thread;
   OopHandle      _vthread;
   ObjectMonitor* _monitor;
   uint64_t  _notifier_tid;
-  volatile int  _notified;
+  int         _recursions;
   volatile TStates TState;
+  volatile bool _notified;
+  bool           _is_wait;
+  bool        _at_reenter;
+  bool       _interrupted;
   bool            _active;    // Contention monitoring is enabled
  public:
   ObjectWaiter(JavaThread* current);
@@ -66,6 +70,10 @@ class ObjectWaiter : public CHeapObj<mtThread> {
   bool is_vthread()    { return _thread == nullptr; }
   uint8_t state()      { return TState; }
   ObjectMonitor* monitor() { return _monitor; }
+  bool is_monitorenter()   { return !_is_wait; }
+  bool is_wait()           { return _is_wait; }
+  bool notified()          { return _notified; }
+  bool at_reenter()        { return _at_reenter; }
   oop vthread();
   void wait_reenter_begin(ObjectMonitor *mon);
   void wait_reenter_end(ObjectMonitor *mon);
@@ -337,14 +345,13 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   // Simply set _next_om field to new_value.
   void set_next_om(ObjectMonitor* new_value);
 
-  int       waiters() const;
-
   int       contentions() const;
   void      add_to_contentions(int value);
   intx      recursions() const                                         { return _recursions; }
   void      set_recursions(size_t recursions);
 
   // JVM/TI GetObjectMonitorUsage() needs this:
+  int waiters() const;
   ObjectWaiter* first_waiter()                                         { return _WaitSet; }
   ObjectWaiter* next_waiter(ObjectWaiter* o)                           { return o->_next; }
   JavaThread* thread_of_waiter(ObjectWaiter* o)                        { return o->_thread; }
@@ -411,7 +418,9 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   void      ReenterI(JavaThread* current, ObjectWaiter* current_node);
   void      UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node);
 
-  bool      VThreadMonitorEnter(JavaThread* current);
+  bool      VThreadMonitorEnter(JavaThread* current, ObjectWaiter* node = nullptr);
+  void      VThreadWait(JavaThread* current, jlong millis);
+  bool      VThreadWaitReenter(JavaThread* current, ObjectWaiter* node, ContinuationWrapper& cont);
   void      VThreadEpilog(JavaThread* current, ObjectWaiter* node);
 
   enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 };
diff --git a/src/hotspot/share/runtime/vframe.inline.hpp b/src/hotspot/share/runtime/vframe.inline.hpp
index 4630e695ce92d..e7f9edbc232d7 100644
--- a/src/hotspot/share/runtime/vframe.inline.hpp
+++ b/src/hotspot/share/runtime/vframe.inline.hpp
@@ -122,6 +122,13 @@ inline vframeStream::vframeStream(JavaThread* thread, bool stop_at_java_call_stu
 
   if (thread->is_vthread_mounted()) {
     _frame = vthread_carrier ? _thread->carrier_last_frame(&_reg_map) : _thread->vthread_last_frame();
+    if (Continuation::is_continuation_enterSpecial(_frame)) {
+      // This can happen when calling async_get_stack_trace() and catching the target
+      // vthread at the JRT_BLOCK_END in freeze_internal() or when posting the Monitor
+      // Waited event after target vthread was preempted. Since all continuation frames
+      // are freezed we get the top frame from the stackChunk instead.
+      _frame = Continuation::last_frame(java_lang_VirtualThread::continuation(_thread->vthread()), &_reg_map);
+    }
   } else {
     _frame = _thread->last_frame();
   }
diff --git a/src/java.base/share/classes/java/lang/Object.java b/src/java.base/share/classes/java/lang/Object.java
index 7909f05304268..14170d4fc0e7e 100644
--- a/src/java.base/share/classes/java/lang/Object.java
+++ b/src/java.base/share/classes/java/lang/Object.java
@@ -25,7 +25,6 @@
 
 package java.lang;
 
-import jdk.internal.misc.Blocker;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 
 /**
@@ -374,21 +373,24 @@ public final void wait() throws InterruptedException {
      * @see    #wait(long, int)
      */
     public final void wait(long timeoutMillis) throws InterruptedException {
-        if (!Thread.currentThread().isVirtual()) {
-            wait0(timeoutMillis);
-            return;
+        if (timeoutMillis < 0) {
+            throw new IllegalArgumentException("timeout value is negative");
         }
 
-        // virtual thread waiting
-        boolean attempted = Blocker.begin();
-        try {
+        if (Thread.currentThread() instanceof VirtualThread vthread) {
+            try {
+                wait0(timeoutMillis);
+            } catch (InterruptedException e) {
+                // virtual thread's interrupt status needs to be cleared
+                vthread.getAndClearInterrupt();
+                throw e;
+            } finally {
+                if (timeoutMillis > 0) {
+                    vthread.cancelWaitTimeout();
+                }
+            }
+        } else {
             wait0(timeoutMillis);
-        } catch (InterruptedException e) {
-            // virtual thread's interrupt status needs to be cleared
-            Thread.currentThread().getAndClearInterrupt();
-            throw e;
-        } finally {
-            Blocker.end(attempted);
         }
     }
 
diff --git a/src/java.base/share/classes/java/lang/VirtualThread.java b/src/java.base/share/classes/java/lang/VirtualThread.java
index 3a4a323dd9a61..ab58bc07e68a1 100644
--- a/src/java.base/share/classes/java/lang/VirtualThread.java
+++ b/src/java.base/share/classes/java/lang/VirtualThread.java
@@ -112,6 +112,16 @@ final class VirtualThread extends BaseVirtualThread {
      *   BLOCKED -> UNBLOCKED      // unblocked, may be scheduled to continue
      * UNBLOCKED -> RUNNING        // continue execution after blocked on monitor enter
      *
+     *   RUNNING -> WAITING        // transitional state during wait on monitor
+     *   WAITING -> WAITED         // waiting on monitor
+     *    WAITED -> BLOCKED        // notified, waiting to be unblocked by monitor owner
+     *    WAITED -> UNBLOCKED      // timed-out/interrupted
+     *
+     *       RUNNING -> TIMED_WAITING   // transition state during timed-waiting on monitor
+     * TIMED_WAITING -> TIMED_WAITED    // timed-waiting on monitor
+     *  TIMED_WAITED -> BLOCKED         // notified, waiting to be unblocked by monitor owner
+     *  TIMED_WAITED -> UNBLOCKED       // timed-out/interrupted
+     *
      *  RUNNING -> YIELDING        // Thread.yield
      * YIELDING -> YIELDED         // cont.yield successful, may be scheduled to continue
      * YIELDING -> RUNNING         // cont.yield failed
@@ -139,6 +149,12 @@ final class VirtualThread extends BaseVirtualThread {
     private static final int BLOCKED   = 13;        // unmounted
     private static final int UNBLOCKED = 14;        // unmounted but runnable
 
+    // monitor wait/timed-wait
+    private static final int WAITING       = 15;
+    private static final int WAIT          = 16;    // waiting in Object.wait
+    private static final int TIMED_WAITING = 17;
+    private static final int TIMED_WAIT    = 18;    // waiting in timed-Object.wait
+
     private static final int TERMINATED = 99;  // final state
 
     // can be suspended from scheduling when unmounted
@@ -156,6 +172,14 @@ final class VirtualThread extends BaseVirtualThread {
     // next virtual thread on the list of virtual threads waiting to be unblocked
     private volatile VirtualThread next;
 
+    // notified by Object.notify/notifyAll while waiting in Object.wait
+    private volatile boolean notified;
+
+    // timed-wait support
+    private long waitTimeout;
+    private byte timedWaitNonce;
+    private volatile Future<?> waitTimeoutTask;
+
     // carrier thread when mounted, accessed by VM
     private volatile Thread carrierThread;
 
@@ -608,6 +632,44 @@ private void afterYield() {
             return;
         }
 
+        // Object.wait
+        if (s == WAITING || s == TIMED_WAITING) {
+            byte nonce;
+            int newState;
+            if (s == WAITING) {
+                nonce = 0;  // not used
+                setState(newState = WAIT);
+            } else {
+                // synchronize with timeout task (previous timed-wait may be running)
+                synchronized (timedWaitLock()) {
+                    nonce = ++timedWaitNonce;
+                    setState(newState = TIMED_WAIT);
+                }
+            }
+
+            // may have been notified while in transition to wait state
+            if (notified && compareAndSetState(newState, BLOCKED)) {
+                // may have even been unblocked already
+                if (blockPermit && compareAndSetState(BLOCKED, UNBLOCKED)) {
+                    submitRunContinuation();
+                }
+                return;
+            }
+
+            // may have been interrupted while in transition to wait state
+            if (interrupted && compareAndSetState(newState, UNBLOCKED)) {
+                submitRunContinuation();
+                return;
+            }
+
+            // schedule wakeup
+            if (newState == TIMED_WAIT) {
+                assert waitTimeout > 0;
+                waitTimeoutTask = schedule(() -> waitTimeoutExpired(nonce), waitTimeout, MILLISECONDS);
+            }
+            return;
+        }
+
         assert false;
     }
 
@@ -899,6 +961,58 @@ private void unblock() {
         }
     }
 
+    /**
+     * Invoked by timer thread when wait timeout for virtual thread has expired.
+     * If the virtual thread is in timed-wait then this method will unblock the thread
+     * and submit its task so that it continues and attempts to reenter the monitor.
+     * This method does nothing if the thread has been woken by notify or interrupt.
+     */
+    private void waitTimeoutExpired(byte nounce) {
+        assert !Thread.currentThread().isVirtual();
+        for (;;) {
+            boolean unblocked = false;
+            synchronized (timedWaitLock()) {
+                if (nounce != timedWaitNonce) {
+                    // this timeout task is for a past timed-wait
+                    return;
+                }
+                int s = state();
+                if (s == TIMED_WAIT) {
+                    unblocked = compareAndSetState(TIMED_WAIT, UNBLOCKED);
+                } else if (s != (TIMED_WAIT | SUSPENDED)) {
+                    // notified or interrupted, no longer waiting
+                    return;
+                }
+            }
+            if (unblocked) {
+                submitRunContinuation();
+                return;
+            }
+            // need to retry when thread is suspended in time-wait
+            Thread.yield();
+        }
+    }
+
+    /**
+     * Invoked by Object.wait to cancel the wait timer.
+     */
+    void cancelWaitTimeout() {
+        assert Thread.currentThread() == this;
+        Future<?> timeoutTask = this.waitTimeoutTask;
+        if (timeoutTask != null) {
+            // Pin the continuation to prevent the virtual thread from unmounting
+            // when there is contention removing the task. This avoids deadlock that
+            // could arise due to carriers and virtual threads contending for a
+            // lock on the delay queue.
+            Continuation.pin();
+            try {
+                timeoutTask.cancel(false);
+            } finally {
+                Continuation.unpin();
+            }
+        }
+    }
+
     /**
      * Attempts to yield the current virtual thread (Thread.yield).
      */
@@ -1028,6 +1142,13 @@ public void interrupt() {
 
             // make available parking permit, unpark thread if parked
             unpark();
+
+            // if thread is waiting in Object.wait then schedule to try to reenter
+            int s = state();
+            if ((s == WAIT || s == TIMED_WAIT) && compareAndSetState(s, UNBLOCKED)) {
+                submitRunContinuation();
+            }
+
         } else {
             interrupted = true;
             carrierThread.setInterrupt();
@@ -1095,14 +1216,18 @@ Thread.State threadState() {
                 return Thread.State.RUNNABLE;
             case PARKING:
             case TIMED_PARKING:
+            case WAITING:
+            case TIMED_WAITING:
             case YIELDING:
                 // runnable, in transition
                 return Thread.State.RUNNABLE;
             case PARKED:
             case PINNED:
+            case WAIT:
                 return Thread.State.WAITING;
             case TIMED_PARKED:
             case TIMED_PINNED:
+            case TIMED_WAIT:
                 return Thread.State.TIMED_WAITING;
             case BLOCKING:
             case BLOCKED:
@@ -1152,13 +1277,13 @@ private StackTraceElement[] tryGetStackTrace() {
             case RUNNING, PINNED, TIMED_PINNED -> {
                 return null;   // mounted
             }
-            case PARKED, TIMED_PARKED, BLOCKED -> {
+            case PARKED, TIMED_PARKED, BLOCKED, WAIT, TIMED_WAIT -> {
                 // unmounted, not runnable
             }
             case UNPARKED, UNBLOCKED, YIELDED -> {
                 // unmounted, runnable
             }
-            case PARKING, TIMED_PARKING, BLOCKING, YIELDING -> {
+            case PARKING, TIMED_PARKING, BLOCKING, YIELDING, WAITING, TIMED_WAITING -> {
                 return null;  // in transition
             }
             default -> throw new InternalError("" + initialState);
@@ -1191,6 +1316,11 @@ private StackTraceElement[] tryGetStackTrace() {
                 // resubmit if unblocked while suspended
                 yield blockPermit && compareAndSetState(BLOCKED, UNBLOCKED);
             }
+            case WAIT, TIMED_WAIT -> {
+                // resubmit if notified or interrupted while waiting (Object.wait)
+                // waitTimeoutExpired will retry if the timed expired when suspended
+                yield (notified || interrupted) && compareAndSetState(initialState, UNBLOCKED);
+            }
             default -> throw new InternalError();
         };
         if (resubmit) {
@@ -1285,6 +1415,14 @@ private Object carrierThreadAccessLock() {
         return interruptLock;
     }
 
+    /**
+     * Returns a lock object to coordinating timed-wait setup and timeout handling.
+     */
+    private Object timedWaitLock() {
+        // use this object for now to avoid the overhead of introducing another lock
+        return runContinuation;
+    }
+
     /**
      * Disallow the current thread be suspended or preempted.
      */