Skip to content

Commit

Permalink
Call resume fast if an exception occurs during migration
Browse files Browse the repository at this point in the history
If an exception is raised when performing the migration of a VM we call
resume fast to resume the VM on the original host if the VM is in
suspended state.

Signed-off-by: Guillaume <[email protected]>
  • Loading branch information
gthvn1 committed Jan 4, 2024
1 parent d5f5b12 commit abdbe69
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 1 deletion.
16 changes: 16 additions & 0 deletions ocaml/idl/datamodel_vm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,22 @@ let unpause =
]
~allowed_roles:_R_VM_OP ()

(* VM.ResumeFast *)

let resumeFast =
call ~in_product_since:rel_rio ~name:"resume_fast"
~doc:
"Resume the specified VM cooperatively. This can only be called when the \
specified VM is in the Suspended state."
~params:[(Ref _vm, "vm", "The VM to resume fast")]
~errs:
[
Api_errors.vm_bad_power_state
; Api_errors.operation_not_allowed
; Api_errors.vm_is_template
]
~allowed_roles:_R_VM_OP ()

(* VM.CleanShutdown *)

let cleanShutdown =
Expand Down
4 changes: 4 additions & 0 deletions ocaml/xapi-idl/xen/xenops_interface.ml
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,10 @@ module XenopsAPI (R : RPC) = struct
declare "VM.unpause" []
(debug_info_p @-> vm_id_p @-> returning task_id_p err)

let resume_fast =
declare "VM.resume_fast" []
(debug_info_p @-> vm_id_p @-> returning task_id_p err)

let request_rdp =
let enabled_p = Param.mk ~name:"enabled" Types.bool in
declare "VM.request_rdp" []
Expand Down
1 change: 1 addition & 0 deletions ocaml/xapi/api_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call =
(* if we're a master or slave and whether the call came in on the unix domain socket or the tcp socket *)
(* If we're a slave, and the call is from the unix domain socket or from the HIMN, and the call *isn't* *)
(* in the whitelist, then forward *)
let _ = D.info "call.Rpc.name is %s" call.Rpc.name in
let whitelisted = List.mem call.Rpc.name whitelist in
let emergency_call = List.mem call.Rpc.name emergency_call_list in
let is_slave = not (Pool_role.is_master ()) in
Expand Down
28 changes: 27 additions & 1 deletion ocaml/xenopsd/lib/xenops_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ type atomic =
| VM_pause of Vm.id
| VM_softreboot of Vm.id
| VM_unpause of Vm.id
| VM_resume_fast of Vm.id
| VM_request_rdp of (Vm.id * bool)
| VM_run_script of (Vm.id * string)
| VM_set_domain_action_request of (Vm.id * domain_action_request option)
Expand Down Expand Up @@ -231,6 +232,8 @@ let rec name_of_atomic = function
"VM_softreboot"
| VM_unpause _ ->
"VM_unpause"
| VM_resume_fast _ ->
"VM_resume_fast"
| VM_request_rdp _ ->
"VM_request_rdp"
| VM_run_script _ ->
Expand Down Expand Up @@ -302,6 +305,7 @@ type operation =
| VM_restore_vifs of Vm.id
| VM_restore_devices of (Vm.id * bool)
| VM_migrate of vm_migrate_op
| VM_resume_fast of Vm.id
| VM_receive_memory of vm_receive_op
| VBD_hotplug of Vbd.id
| VBD_hotunplug of Vbd.id * bool
Expand Down Expand Up @@ -336,6 +340,8 @@ let name_of_operation = function
"VM_restore_devices"
| VM_migrate _ ->
"VM_migrate"
| VM_resume_fast _ ->
"VM_resumte_fast"
| VM_receive_memory _ ->
"VM_receive_memory"
| VBD_hotplug _ ->
Expand Down Expand Up @@ -2195,6 +2201,16 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic)
| _ ->
info "VM %s is not paused" id
)
| VM_resume_fast id -> (
debug "VM.resume_fast %s" id ;
let vm_t = VM_DB.read_exn id in
let power = (B.VM.get_state vm_t).Vm.power_state in
match power with
| Suspended ->
B.VM.resume_fast t vm_t ; VM_DB.signal id
| _ ->
info "VM %s is not suspended, fast resum is not allowed" id
)
| VM_request_rdp (id, enabled) ->
debug "VM.request_rdp %s %b" id enabled ;
B.VM.request_rdp (VM_DB.read_exn id) enabled
Expand Down Expand Up @@ -2419,6 +2435,7 @@ let rec immediate_operation dbg _id op =
and trigger_cleanup_after_failure op t =
let dbg = (Xenops_task.to_interface_task t).Task.dbg in
match op with
| VM_resume_fast _
| VM_check_state _
| PCI_check_state _
| VBD_check_state _
Expand All @@ -2439,7 +2456,8 @@ and trigger_cleanup_after_failure op t =
immediate_operation dbg final_id (VM_check_state final_id)
| VM_migrate {vmm_id; vmm_tmp_src_id; _} ->
immediate_operation dbg vmm_id (VM_check_state vmm_id) ;
immediate_operation dbg vmm_tmp_src_id (VM_check_state vmm_tmp_src_id)
immediate_operation dbg vmm_tmp_src_id (VM_check_state vmm_tmp_src_id) ;
immediate_operation dbg vmm_id (VM_resume_fast vmm_id)
| VBD_hotplug id | VBD_hotunplug (id, _) ->
immediate_operation dbg (fst id) (VBD_check_state id)
| VIF_hotplug id | VIF_hotunplug (id, _) ->
Expand Down Expand Up @@ -2489,6 +2507,7 @@ and trigger_cleanup_after_failure_atom op t =
| VM_set_memory_dynamic_range (id, _, _)
| VM_pause id
| VM_unpause id
| VM_resume_fast id
| VM_request_rdp (id, _)
| VM_run_script (id, _)
| VM_set_domain_action_request (id, _)
Expand Down Expand Up @@ -2569,6 +2588,10 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle)
| VIF_hotunplug (id, force) ->
debug "VIF_hotplug %s.%s %b" (fst id) (snd id) force ;
perform_atomics (atomics_of_operation op) t
| VM_resume_fast id ->
debug "VM.resume_fast %s" id ;
perform_atomics (atomics_of_operation op) t ;
VM_DB.signal id
| VM_migrate vmm ->
debug "VM.migrate %s -> %s" vmm.vmm_id vmm.vmm_url ;
let id = vmm.vmm_id in
Expand Down Expand Up @@ -3554,6 +3577,8 @@ module VM = struct

let unpause _ dbg id = queue_operation dbg id (Atomic (VM_unpause id))

let resume_fast _ dbg id = queue_operation dbg id (Atomic (VM_resume_fast id))

let request_rdp _ dbg id enabled =
queue_operation dbg id (Atomic (VM_request_rdp (id, enabled)))

Expand Down Expand Up @@ -4137,6 +4162,7 @@ let _ =
Server.VM.destroy (VM.destroy ()) ;
Server.VM.pause (VM.pause ()) ;
Server.VM.unpause (VM.unpause ()) ;
Server.VM.resume_fast (VM.resume_fast ()) ;
Server.VM.request_rdp (VM.request_rdp ()) ;
Server.VM.run_script (VM.run_script ()) ;
Server.VM.set_xsdata (VM.set_xsdata ()) ;
Expand Down
2 changes: 2 additions & 0 deletions ocaml/xenopsd/lib/xenops_server_plugin.ml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ module type S = sig

val unpause : Xenops_task.task_handle -> Vm.t -> unit

val resume_fast : Xenops_task.task_handle -> Vm.t -> unit

val set_xsdata :
Xenops_task.task_handle -> Vm.t -> (string * string) list -> unit

Expand Down
2 changes: 2 additions & 0 deletions ocaml/xenopsd/lib/xenops_server_skeleton.ml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ module VM = struct

let unpause _ _ = unimplemented "VM.unpause"

let resume_fast _ _ = unimplemented "VM.resume_fast"

let set_xsdata _ _ _ = unimplemented "VM.set_xsdata"

let set_vcpus _ _ _ = unimplemented "VM.set_vcpus"
Expand Down
5 changes: 5 additions & 0 deletions ocaml/xenopsd/xc/domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,11 @@ let pause ~xc domid = Xenctrl.domain_pause xc domid

let unpause ~xc domid = Xenctrl.domain_unpause xc domid

let resume_fast ~xc domid =
let uuid = get_uuid ~xc domid in
debug "VM = %s; domid = %d; calling resume fast" (Uuidx.to_string uuid) domid ;
Xenctrl.domain_resume_fast xc domid

let set_action_request ~xs domid x =
let path = xs.Xs.getdomainpath domid ^ "/action-request" in
match x with None -> xs.Xs.rm path | Some v -> xs.Xs.write path v
Expand Down
3 changes: 3 additions & 0 deletions ocaml/xenopsd/xc/domain.mli
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ val pause : xc:Xenctrl.handle -> domid -> unit
val unpause : xc:Xenctrl.handle -> domid -> unit
(** Unpause a domain *)

val resume_fast : xc:Xenctrl.handle -> domid -> unit
(** Resume fast a domain *)

val set_action_request : xs:Xenstore.Xs.xsh -> domid -> string option -> unit
(** [set_action_request xs domid None] declares this domain is fully intact. Any
other string is a hint to the toolstack that the domain is still broken. *)
Expand Down
9 changes: 9 additions & 0 deletions ocaml/xenopsd/xc/xenops_server_xen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1786,6 +1786,15 @@ module VM = struct
Domain.unpause ~xc di.Xenctrl.domid
)

(** To be able to call resume fast the domain should have been suspended so we
need to ensure that it is the case. Fast resume is calling resume cooperative.
This can only be used for guests which can handle the special return code. *)
let resume_fast t vm =
on_domain t vm (fun xc _ _ _ di ->
(* TODO: check if we can get info in xenstore that the VM supports fast resume *)
Domain.resume_fast ~xc di.Xenctrl.domid
)

let set_xsdata task vm xsdata =
on_domain task vm (fun _ xs _ _ di ->
Domain.set_xsdata ~xs di.Xenctrl.domid xsdata
Expand Down

0 comments on commit abdbe69

Please sign in to comment.