From abdbe69f5733e824cf7686db8d8a5e3d981fa6bd Mon Sep 17 00:00:00 2001 From: Guillaume Date: Fri, 1 Dec 2023 15:19:30 +0100 Subject: [PATCH] Call resume fast if an exception occurs during migration If an exception is raised when performing the migration of a VM we call resume fast to resume the VM on the original host if the VM is in suspended state. Signed-off-by: Guillaume --- ocaml/idl/datamodel_vm.ml | 16 ++++++++++++ ocaml/xapi-idl/xen/xenops_interface.ml | 4 +++ ocaml/xapi/api_server.ml | 1 + ocaml/xenopsd/lib/xenops_server.ml | 28 ++++++++++++++++++++- ocaml/xenopsd/lib/xenops_server_plugin.ml | 2 ++ ocaml/xenopsd/lib/xenops_server_skeleton.ml | 2 ++ ocaml/xenopsd/xc/domain.ml | 5 ++++ ocaml/xenopsd/xc/domain.mli | 3 +++ ocaml/xenopsd/xc/xenops_server_xen.ml | 9 +++++++ 9 files changed, 69 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 014244ee41..4177549a59 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -827,6 +827,22 @@ let unpause = ] ~allowed_roles:_R_VM_OP () +(* VM.ResumeFast *) + +let resumeFast = + call ~in_product_since:rel_rio ~name:"resume_fast" + ~doc: + "Resume the specified VM cooperatively. This can only be called when the \ + specified VM is in the Suspended state." + ~params:[(Ref _vm, "vm", "The VM to resume fast")] + ~errs: + [ + Api_errors.vm_bad_power_state + ; Api_errors.operation_not_allowed + ; Api_errors.vm_is_template + ] + ~allowed_roles:_R_VM_OP () + (* VM.CleanShutdown *) let cleanShutdown = diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index f472bafefe..12c76fd807 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -737,6 +737,10 @@ module XenopsAPI (R : RPC) = struct declare "VM.unpause" [] (debug_info_p @-> vm_id_p @-> returning task_id_p err) + let resume_fast = + declare "VM.resume_fast" [] + (debug_info_p @-> vm_id_p @-> returning task_id_p err) + let request_rdp = let enabled_p = Param.mk ~name:"enabled" Types.bool in declare "VM.request_rdp" [] diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index e7f414c626..a7863f4a26 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -197,6 +197,7 @@ let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = (* if we're a master or slave and whether the call came in on the unix domain socket or the tcp socket *) (* If we're a slave, and the call is from the unix domain socket or from the HIMN, and the call *isn't* *) (* in the whitelist, then forward *) + let _ = D.info "call.Rpc.name is %s" call.Rpc.name in let whitelisted = List.mem call.Rpc.name whitelist in let emergency_call = List.mem call.Rpc.name emergency_call_list in let is_slave = not (Pool_role.is_master ()) in diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 52f3f2acdc..08004d696c 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -138,6 +138,7 @@ type atomic = | VM_pause of Vm.id | VM_softreboot of Vm.id | VM_unpause of Vm.id + | VM_resume_fast of Vm.id | VM_request_rdp of (Vm.id * bool) | VM_run_script of (Vm.id * string) | VM_set_domain_action_request of (Vm.id * domain_action_request option) @@ -231,6 +232,8 @@ let rec name_of_atomic = function "VM_softreboot" | VM_unpause _ -> "VM_unpause" + | VM_resume_fast _ -> + "VM_resume_fast" | VM_request_rdp _ -> "VM_request_rdp" | VM_run_script _ -> @@ -302,6 +305,7 @@ type operation = | VM_restore_vifs of Vm.id | VM_restore_devices of (Vm.id * bool) | VM_migrate of vm_migrate_op + | VM_resume_fast of Vm.id | VM_receive_memory of vm_receive_op | VBD_hotplug of Vbd.id | VBD_hotunplug of Vbd.id * bool @@ -336,6 +340,8 @@ let name_of_operation = function "VM_restore_devices" | VM_migrate _ -> "VM_migrate" + | VM_resume_fast _ -> + "VM_resumte_fast" | VM_receive_memory _ -> "VM_receive_memory" | VBD_hotplug _ -> @@ -2195,6 +2201,16 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) | _ -> info "VM %s is not paused" id ) + | VM_resume_fast id -> ( + debug "VM.resume_fast %s" id ; + let vm_t = VM_DB.read_exn id in + let power = (B.VM.get_state vm_t).Vm.power_state in + match power with + | Suspended -> + B.VM.resume_fast t vm_t ; VM_DB.signal id + | _ -> + info "VM %s is not suspended, fast resum is not allowed" id + ) | VM_request_rdp (id, enabled) -> debug "VM.request_rdp %s %b" id enabled ; B.VM.request_rdp (VM_DB.read_exn id) enabled @@ -2419,6 +2435,7 @@ let rec immediate_operation dbg _id op = and trigger_cleanup_after_failure op t = let dbg = (Xenops_task.to_interface_task t).Task.dbg in match op with + | VM_resume_fast _ | VM_check_state _ | PCI_check_state _ | VBD_check_state _ @@ -2439,7 +2456,8 @@ and trigger_cleanup_after_failure op t = immediate_operation dbg final_id (VM_check_state final_id) | VM_migrate {vmm_id; vmm_tmp_src_id; _} -> immediate_operation dbg vmm_id (VM_check_state vmm_id) ; - immediate_operation dbg vmm_tmp_src_id (VM_check_state vmm_tmp_src_id) + immediate_operation dbg vmm_tmp_src_id (VM_check_state vmm_tmp_src_id) ; + immediate_operation dbg vmm_id (VM_resume_fast vmm_id) | VBD_hotplug id | VBD_hotunplug (id, _) -> immediate_operation dbg (fst id) (VBD_check_state id) | VIF_hotplug id | VIF_hotunplug (id, _) -> @@ -2489,6 +2507,7 @@ and trigger_cleanup_after_failure_atom op t = | VM_set_memory_dynamic_range (id, _, _) | VM_pause id | VM_unpause id + | VM_resume_fast id | VM_request_rdp (id, _) | VM_run_script (id, _) | VM_set_domain_action_request (id, _) @@ -2569,6 +2588,10 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle) | VIF_hotunplug (id, force) -> debug "VIF_hotplug %s.%s %b" (fst id) (snd id) force ; perform_atomics (atomics_of_operation op) t + | VM_resume_fast id -> + debug "VM.resume_fast %s" id ; + perform_atomics (atomics_of_operation op) t ; + VM_DB.signal id | VM_migrate vmm -> debug "VM.migrate %s -> %s" vmm.vmm_id vmm.vmm_url ; let id = vmm.vmm_id in @@ -3554,6 +3577,8 @@ module VM = struct let unpause _ dbg id = queue_operation dbg id (Atomic (VM_unpause id)) + let resume_fast _ dbg id = queue_operation dbg id (Atomic (VM_resume_fast id)) + let request_rdp _ dbg id enabled = queue_operation dbg id (Atomic (VM_request_rdp (id, enabled))) @@ -4137,6 +4162,7 @@ let _ = Server.VM.destroy (VM.destroy ()) ; Server.VM.pause (VM.pause ()) ; Server.VM.unpause (VM.unpause ()) ; + Server.VM.resume_fast (VM.resume_fast ()) ; Server.VM.request_rdp (VM.request_rdp ()) ; Server.VM.run_script (VM.run_script ()) ; Server.VM.set_xsdata (VM.set_xsdata ()) ; diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index fbeb78f364..3153645e24 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -118,6 +118,8 @@ module type S = sig val unpause : Xenops_task.task_handle -> Vm.t -> unit + val resume_fast : Xenops_task.task_handle -> Vm.t -> unit + val set_xsdata : Xenops_task.task_handle -> Vm.t -> (string * string) list -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index c688aa7923..030213828d 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -77,6 +77,8 @@ module VM = struct let unpause _ _ = unimplemented "VM.unpause" + let resume_fast _ _ = unimplemented "VM.resume_fast" + let set_xsdata _ _ _ = unimplemented "VM.set_xsdata" let set_vcpus _ _ _ = unimplemented "VM.set_vcpus" diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 177d25937c..cfb08a5e02 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -770,6 +770,11 @@ let pause ~xc domid = Xenctrl.domain_pause xc domid let unpause ~xc domid = Xenctrl.domain_unpause xc domid +let resume_fast ~xc domid = + let uuid = get_uuid ~xc domid in + debug "VM = %s; domid = %d; calling resume fast" (Uuidx.to_string uuid) domid ; + Xenctrl.domain_resume_fast xc domid + let set_action_request ~xs domid x = let path = xs.Xs.getdomainpath domid ^ "/action-request" in match x with None -> xs.Xs.rm path | Some v -> xs.Xs.write path v diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 0f18579ecf..b3f2ae266b 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -209,6 +209,9 @@ val pause : xc:Xenctrl.handle -> domid -> unit val unpause : xc:Xenctrl.handle -> domid -> unit (** Unpause a domain *) +val resume_fast : xc:Xenctrl.handle -> domid -> unit +(** Resume fast a domain *) + val set_action_request : xs:Xenstore.Xs.xsh -> domid -> string option -> unit (** [set_action_request xs domid None] declares this domain is fully intact. Any other string is a hint to the toolstack that the domain is still broken. *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 4a83e9b18e..609e65018f 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1786,6 +1786,15 @@ module VM = struct Domain.unpause ~xc di.Xenctrl.domid ) + (** To be able to call resume fast the domain should have been suspended so we + need to ensure that it is the case. Fast resume is calling resume cooperative. + This can only be used for guests which can handle the special return code. *) + let resume_fast t vm = + on_domain t vm (fun xc _ _ _ di -> + (* TODO: check if we can get info in xenstore that the VM supports fast resume *) + Domain.resume_fast ~xc di.Xenctrl.domid + ) + let set_xsdata task vm xsdata = on_domain task vm (fun _ xs _ _ di -> Domain.set_xsdata ~xs di.Xenctrl.domid xsdata