diff --git a/core/cpu.c b/core/cpu.c
index aadea0e6..c2fe4d61 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -502,7 +502,15 @@ uint8 is_vmcs_loaded(struct vcpu_t *vcpu)
 
 int debug_vmcs_count = 0;
 
-void restore_host_cr4_vmxe(struct per_cpu_data *cpu_data);
+void check_vmxon_coherency(struct per_cpu_data *cpu_data)
+{
+    if ((cpu_data->host_cr4 & CR4_VMXE) &&
+        (cpu_data->vmm_flag & VMXON_HAX)) {
+        // TODO: Need to understand why this happens
+        // (on both Windows and macOS)
+        hax_debug("HAX: VMM flag (VMON_HAX) is not clear!\n");
+    }
+}
 
 uint32 log_host_cr4_vmxe = 0;
 uint64 log_host_cr4 = 0;
@@ -548,13 +556,107 @@ void hax_panic_log(struct vcpu_t *vcpu)
     hax_error("log_vmoff_err %lx\n", log_vmxoff_err);
 }
 
+vmx_error_t vmxroot_enter(struct per_cpu_data *cpu_data);
+vmx_error_t vmxroot_leave(struct per_cpu_data *cpu_data);
+void restore_host_cr(struct per_cpu_data *cpu_data);
+
+/*
+ * Try to enter VMX-ROOT operations.
+ * Must be smp_call_function() safe.
+ * The success/failure logic is done in the caller.
+ */
+vmx_error_t vmxroot_enter(struct per_cpu_data *cpu_data)
+{
+    uint64 fc_msr;
+    uint64 cr0, cr0_f0, cr0_f1;
+    uint64 cr4, cr4_f0, cr4_f1;
+
+    smp_mb();
+
+    // always save
+    cpu_data->host_cr0 = get_cr0();
+    cpu_data->host_cr4 = get_cr4();
+
+    // set fixed bits in CR0 if needed
+    cr0    = cpu_data->host_cr0;
+    cr0_f0 = cpu_data->vmx_info._cr0_fixed_0;
+    cr0_f1 = cpu_data->vmx_info._cr0_fixed_1;
+
+    cr0 = (cr0 & cr0_f1) | cr0_f0;
+    if (cr0 != cpu_data->host_cr0) {
+        set_cr0(cr0);
+        cpu_data->vmm_flag |= VMXON_RESTORE_CR0;
+    }
+
+    // set fixed bits in CR4 if needed
+    cr4    = cpu_data->host_cr4;
+    cr4_f0 = cpu_data->vmx_info._cr4_fixed_0;
+    cr4_f1 = cpu_data->vmx_info._cr4_fixed_1;
+
+    cr4 = (cr4 & cr4_f1) | cr4_f0;
+    if (cr4 != cpu_data->host_cr4) {
+        set_cr4(cr4);
+        cpu_data->vmm_flag |= VMXON_RESTORE_CR4;
+    }
+
+    /* HP systems & Mac systems workaround
+     * When resuming from S3, some HP/Mac set the IA32_FEATURE_CONTROL MSR to
+     * zero. Setting the lock bit to zero & then doing 'vmxon' would cause a GP.
+     * As a workaround, when we see this condition, we enable the bits so that
+     * we can launch vmxon & thereby hax.
+     * bit 0 - Lock bit
+     * bit 2 - Enable VMX outside SMX operation
+     *
+     * ********* To Do **************************************
+     * This is the workground to fix BSOD when resume from S3
+     * The best way is to add one power management handler, and set
+     * IA32_FEATURE_CONTROL MSR in that PM S3 handler
+     * *****************************************************
+     */
+    fc_msr = ia32_rdmsr(IA32_FEATURE_CONTROL);
+    if (!(fc_msr & FC_LOCKED))
+        ia32_wrmsr(IA32_FEATURE_CONTROL,
+                   fc_msr | FC_LOCKED | FC_VMXON_OUTSMX);
+
+    return __vmxon(hax_page_pa(cpu_data->vmxon_page));
+}
+
+/*
+ * Restore host control registers value before entering vmxroot
+ * operations. Must be smp_func_call() safe.
+ */
+void restore_host_cr(struct per_cpu_data *cpu_data)
+{
+    if (cpu_data->vmm_flag & VMXON_RESTORE_CR0)
+        set_cr0(cpu_data->host_cr0);
+
+    if (cpu_data->vmm_flag & VMXON_RESTORE_CR4)
+        set_cr4(cpu_data->host_cr4);
+}
+
+/*
+ * Try to leave VMX-ROOT operations.
+ * Must be smp_call_function() safe.
+ * The success/failure logic is done in the caller.
+ */
+vmx_error_t vmxroot_leave(struct per_cpu_data *cpu_data)
+{
+    vmx_error_t err;
+
+    smp_mb();
+
+    err = __vmxoff();
+    restore_host_cr(cpu_data);
+    return err;
+}
+
 uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
 {
     struct per_cpu_data *cpu_data;
     paddr_t vmcs_phy;
     paddr_t curr_vmcs = VMCS_NONE;
     vmx_error_t err = 0;
-    uint64 fc_msr;
+    mword host_cr4_vmxe;
 
     hax_disable_preemption(flags);
 
@@ -570,37 +672,18 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
         return 0;
     }
 
-    cpu_data->host_cr4_vmxe = (get_cr4() & CR4_VMXE);
-    if(cpu_data->host_cr4_vmxe) {
+    err = vmxroot_enter(cpu_data);
+    host_cr4_vmxe = cpu_data->host_cr4 & CR4_VMXE;
+
+    if(host_cr4_vmxe) {
         if (debug_vmcs_count % 100000 == 0) {
-            hax_debug("host VT has enabled!\n");
-            hax_debug("Cr4 value = 0x%lx\n", get_cr4());
+            hax_debug("host has VT enabled!\n");
+            hax_debug("Cr4 value = 0x%llx\n", cpu_data->host_cr4);
             log_host_cr4_vmxe = 1;
-            log_host_cr4 = get_cr4();
+            log_host_cr4 = cpu_data->host_cr4;
         }
         debug_vmcs_count++;
     }
-    set_cr4(get_cr4() | CR4_VMXE);
-    /* HP systems & Mac systems workaround
-      * When resuming from S3, some HP/Mac set the IA32_FEATURE_CONTROL MSR to
-      * zero. Setting the lock bit to zero & then doing 'vmxon' would cause a GP.
-      * As a workaround, when we see this condition, we enable the bits so that
-      * we can launch vmxon & thereby hax.
-      * bit 0 - Lock bit
-      * bit 2 - Enable VMX outside SMX operation
-      *
-      * ********* To Do **************************************
-      * This is the workground to fix BSOD when resume from S3
-      * The best way is to add one power management handler, and set
-      * IA32_FEATURE_CONTROL MSR in that PM S3 handler
-      * *****************************************************
-      */
-    fc_msr = ia32_rdmsr(IA32_FEATURE_CONTROL);
-    if (!(fc_msr & FC_LOCKED))
-        ia32_wrmsr(IA32_FEATURE_CONTROL,
-                   fc_msr | FC_LOCKED | FC_VMXON_OUTSMX);
-
-    err = __vmxon(hax_page_pa(cpu_data->vmxon_page));
 
     log_vmxon_err = err;
     log_vmxon_addr = hax_page_pa(cpu_data->vmxon_page);
@@ -611,7 +694,7 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
         bool fatal = true;
 
 #ifdef __MACH__
-        if ((err & VMX_FAIL_INVALID) && cpu_data->host_cr4_vmxe) {
+        if ((err & VMX_FAIL_INVALID) && host_cr4_vmxe) {
             // On macOS, if VMXON fails with VMX_FAIL_INVALID and host CR4.VMXE
             // was already set, it is very likely that another VMM (VirtualBox
             // or any VMM based on macOS Hypervisor Framework, e.g. Docker) is
@@ -636,11 +719,12 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
             }
         }
 #endif
+        check_vmxon_coherency(cpu_data);
+        restore_host_cr(cpu_data);
 
         if (fatal) {
             hax_error("VMXON failed for region 0x%llx (err=0x%x)\n",
                       hax_page_pa(cpu_data->vmxon_page), (uint32) err);
-            restore_host_cr4_vmxe(cpu_data);
             if (err & VMX_FAIL_INVALID) {
                 log_vmxon_err_type1 = 1;
             } else {
@@ -666,8 +750,10 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
     if (__vmptrld(vmcs_phy) != VMX_SUCCEED) {
         hax_error("HAX: vmptrld failed (%08llx)\n", vmcs_phy);
         cpu_data->vmm_flag = 0;
-        __vmxoff();
-        restore_host_cr4_vmxe(cpu_data);
+        err = vmxroot_leave(cpu_data);
+        if (err & VMX_FAIL_MASK) {
+            hax_error("HAX: vmxoff failed (err=0x%lx)\n", err);
+        }
         log_vmxon_err_type3 = 1;
         hax_enable_preemption(flags);
         return VMPTRLD_FAIL;
@@ -683,20 +769,6 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
     return VMXON_SUCCESS;
 }
 
-void restore_host_cr4_vmxe(struct per_cpu_data *cpu_data)
-{
-    if (cpu_data->host_cr4_vmxe) {
-        if (cpu_data->vmm_flag & VMXON_HAX) {
-            // TODO: Need to understand why this happens (on both Windows and
-            // macOS)
-            hax_debug("HAX: VMM flag (VMON_HAX) is not clear!\n");
-        }
-        set_cr4(get_cr4() | CR4_VMXE);
-    } else {
-        set_cr4(get_cr4() & (~CR4_VMXE));
-    }
-}
-
 uint32 put_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
 {
     int cpu_id = hax_cpuid();
@@ -722,10 +794,9 @@ uint32 put_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
     cpu_data->current_vcpu = NULL;
 
     if (cpu_data->vmm_flag & VMXON_HAX) {
-        err = __vmxoff();
-        if (!(err & VMX_FAIL_MASK)) {
-            restore_host_cr4_vmxe(cpu_data);
-        } else {
+        check_vmxon_coherency(cpu_data);
+        err = vmxroot_leave(cpu_data);
+        if (err & VMX_FAIL_MASK) {
             hax_error("VMXOFF Failed..........\n");
             vmxoff_err = err;
             log_vmxoff_err = err;
diff --git a/core/ept.c b/core/ept.c
index 0e8f6754..8a016995 100644
--- a/core/ept.c
+++ b/core/ept.c
@@ -330,23 +330,17 @@ static void invept_smpfunc(struct invept_bundle *bundle)
 
     smp_mb();
     cpu_data = current_cpu_data();
-    cpu_data->vmxon_err = VMX_SUCCEED;
-    cpu_data->vmxoff_err = VMX_SUCCEED;
-    cpu_data->invept_err = VMX_SUCCEED;
-
-    cpu_data->host_cr4_vmxe = get_cr4() & CR4_VMXE;
-    set_cr4(get_cr4() | CR4_VMXE);
-    cpu_data->vmxon_err = __vmxon(hax_page_pa(cpu_data->vmxon_page));
-
-    if (!(cpu_data->vmxon_err & VMX_FAIL_MASK)) {
-        cpu_data->invept_err = __invept(bundle->type, bundle->desc);
-        cpu_data->vmxoff_err = __vmxoff();
-        if (cpu_data->host_cr4_vmxe) {
-            set_cr4(get_cr4() | CR4_VMXE);
-        } else {
-            set_cr4(get_cr4() & ~CR4_VMXE);
-        }
+    cpu_data->vmxon_err = vmxroot_enter(cpu_data);
+
+    if (cpu_data->vmxon_err & VMX_FAIL_MASK) {
+        cpu_data->invept_err = VMX_SUCCEED;
+        cpu_data->vmxoff_err = VMX_SUCCEED;
+        restore_host_cr(cpu_data);
+        return;
     }
+
+    cpu_data->invept_err = __invept(bundle->type, bundle->desc);
+    cpu_data->vmxoff_err = vmxroot_leave(cpu_data);
 }
 
 void invept(hax_vm_t *hax_vm, uint type)
diff --git a/core/include/cpu.h b/core/include/cpu.h
index c9b8fa94..2a1cafb8 100644
--- a/core/include/cpu.h
+++ b/core/include/cpu.h
@@ -85,7 +85,9 @@ struct hstate_compare {
     uint64 gs_msr, rflags, rsp;
 };
 
-#define VMXON_HAX (1 << 0)
+#define VMXON_HAX         (1 << 0)
+#define VMXON_RESTORE_CR0 (1 << 1)
+#define VMXON_RESTORE_CR4 (1 << 2)
 
 struct per_cpu_data {
     struct hax_page    *vmxon_page;
@@ -95,7 +97,13 @@ struct per_cpu_data {
     cpuid_t            cpu_id;
     uint16             vmm_flag;
     uint16             nested;
-    mword              host_cr4_vmxe;
+
+    /*
+     * These fields are used to record host cr0/cr4 prior entering
+     * vmx-root operations, so that we can restore them when leaving
+     * vmx-root operations.
+     */
+    uint64             host_cr0, host_cr4;
 
     /*
      * These fields are used to record the result of certain VMX instructions
diff --git a/core/include/vmx.h b/core/include/vmx.h
index 07123fab..ff88b64e 100644
--- a/core/include/vmx.h
+++ b/core/include/vmx.h
@@ -459,6 +459,11 @@ union vmcs_t {
 
 typedef union vmcs_t vmcs_t;
 
+struct per_cpu_data;
+extern vmx_error_t vmxroot_enter(struct per_cpu_data *cpu_data);
+extern vmx_error_t vmxroot_leave(struct per_cpu_data *cpu_data);
+extern void restore_host_cr(struct per_cpu_data *cpu_data);
+
 struct vcpu_t;
 extern void load_vmcs_common(struct vcpu_t *vcpu);
 extern uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags);