Skip to content

Commit 0a8cd8e

Browse files
robhoesMarkSymsCtx
authored andcommitted
CA-209511: Don't remove private data path for devices during localhost migration
There can be multiple domains for a single VM during a localhost migration, and the private data path is indexed by UUID, not domid. When unplugging the devices for the old domain during a localhost migration, we must not remove the private data keys, because they are needed by the new domain. Signed-off-by: Rob Hoes <[email protected]>
1 parent b2c07f6 commit 0a8cd8e

File tree

7 files changed

+49
-26
lines changed

7 files changed

+49
-26
lines changed

xc/device.ml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -449,12 +449,12 @@ let hard_shutdown_wait (task: Xenops_task.t) ~xs ~timeout x =
449449
let (_: bool) = cancellable_watch (Device x) [ Watch.map (fun _ -> ()) (hard_shutdown_complete ~xs x) ] [] task ~xs ~timeout () in
450450
()
451451

452-
let release (task: Xenops_task.t) ~xs (x: device) =
452+
let release (task: Xenops_task.t) ~xc ~xs (x: device) =
453453
debug "Device.Vbd.release %s" (string_of_device x);
454454
(* Make sure blktap/blkback fire the udev remove event by deleting the
455455
backend now *)
456456
Generic.safe_rm ~xs (backend_path_of_device ~xs x);
457-
Hotplug.release task ~xs x;
457+
Hotplug.release task ~xc ~xs x;
458458

459459
if !Xenopsd.run_hotplug_scripts
460460
then Hotplug.run_hotplug_script x [ "remove" ];
@@ -555,7 +555,7 @@ let add_async ~xs ~hvm x domid =
555555
Generic.add_device ~xs device back front priv [];
556556
device
557557

558-
let add_wait (task: Xenops_task.t) ~xs device =
558+
let add_wait (task: Xenops_task.t) ~xc ~xs device =
559559
if !Xenopsd.run_hotplug_scripts
560560
then Hotplug.run_hotplug_script device [ "add" ];
561561

@@ -577,15 +577,15 @@ let add_wait (task: Xenops_task.t) ~xs device =
577577
with Hotplug.Frontend_device_error _ as e ->
578578
debug "Caught Frontend_device_error: assuming it is safe to shutdown the backend";
579579
clean_shutdown task ~xs device; (* assumes double-failure isn't possible *)
580-
release task ~xs device;
580+
release task ~xc ~xs device;
581581
raise e
582582
end;
583583
device
584584

585585
(* Add the VBD to the domain, When this command returns, the device is ready. (This isn't as
586586
concurrent as xend-- xend allocates loopdevices via hotplug in parallel and then
587587
performs a 'waitForDevices') *)
588-
let add (task: Xenops_task.t) ~xs ~hvm x domid =
588+
let add (task: Xenops_task.t) ~xc ~xs ~hvm x domid =
589589
let device =
590590
let result = ref None in
591591
while !result = None do
@@ -597,7 +597,7 @@ let add (task: Xenops_task.t) ~xs ~hvm x domid =
597597
Thread.delay 0.1
598598
end else raise e (* permanent failure *)
599599
done; Opt.unbox !result in
600-
add_wait task ~xs device
600+
add_wait task ~xc ~xs device
601601

602602
let qemu_media_change ~xs device _type params =
603603
let backend_path = (backend_path_of_device ~xs device) in
@@ -739,15 +739,15 @@ let set_carrier ~xs (x: device) carrier =
739739
let disconnect_path = disconnect_path_of_device ~xs x in
740740
xs.Xs.write disconnect_path (if carrier then "0" else "1")
741741

742-
let release (task: Xenops_task.t) ~xs (x: device) =
742+
let release (task: Xenops_task.t) ~xc ~xs (x: device) =
743743
debug "Device.Vif.release %s" (string_of_device x);
744744

745745
if !Xenopsd.run_hotplug_scripts then begin
746746
let tap = { x with backend = { x.backend with kind = Tap } } in
747747
Hotplug.run_hotplug_script x [ "remove" ];
748748
Hotplug.run_hotplug_script tap [ "remove" ];
749749
end;
750-
Hotplug.release task ~xs x
750+
Hotplug.release task ~xc ~xs x
751751

752752
let move ~xs (x: device) bridge =
753753
let xs_bridge_path = Device_common.get_private_data_path_of_device x ^ "/bridge" in

xc/device.mli

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ sig
5757
backend_domid: int;
5858
}
5959

60-
val add : Xenops_task.t -> xs:Xenstore.Xs.xsh -> hvm:bool -> t -> Xenctrl.domid -> device
60+
val add : Xenops_task.t -> xc:Xenctrl.handle -> xs:Xenstore.Xs.xsh -> hvm:bool -> t -> Xenctrl.domid -> device
6161

62-
val release : Xenops_task.t -> xs:Xenstore.Xs.xsh -> device -> unit
62+
val release : Xenops_task.t -> xc:Xenctrl.handle -> xs:Xenstore.Xs.xsh -> device -> unit
6363
val media_eject : xs:Xenstore.Xs.xsh -> device -> unit
6464
val media_insert : xs:Xenstore.Xs.xsh -> phystype:physty -> params:string -> device -> unit
6565
val media_is_ejected : xs:Xenstore.Xs.xsh -> device -> bool
@@ -84,7 +84,7 @@ sig
8484
-> ?extra_xenserver_keys:(string * string) list -> Xenctrl.domid
8585
-> device
8686
val set_carrier : xs:Xenstore.Xs.xsh -> device -> bool -> unit
87-
val release : Xenops_task.t -> xs:Xenstore.Xs.xsh -> device -> unit
87+
val release : Xenops_task.t -> xc:Xenctrl.handle -> xs:Xenstore.Xs.xsh -> device -> unit
8888
val move : xs:Xenstore.Xs.xsh -> device -> string -> unit
8989
end
9090

xc/device_common.ml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,10 @@ let string_of_device (x: device) =
132132
* to take the UUID as an argument (and change the callers as well...) *)
133133
let uuid_of_domid domid =
134134
try
135-
Xenops_helpers.with_xc_and_xs (fun _ xs ->
136-
let vm = xs.Xs.getdomainpath domid ^ "/vm" in
137-
let vm_dir = xs.Xs.read vm in
138-
xs.Xs.read (vm_dir ^ "/uuid")
135+
with_xs (fun xs ->
136+
Uuidm.to_string (Xenops_helpers.uuid_of_domid ~xs domid)
139137
)
140-
with _ ->
138+
with Xenops_helpers.Domain_not_found ->
141139
error "uuid_of_domid failed for domid %d" domid;
142140
(* Returning a random string on error is not very neat, but we must avoid
143141
* exceptions here. This patch must be followed soon by a patch that changes the

xc/domain.ml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,7 @@ let destroy (task: Xenops_task.t) ~xc ~xs ~qemu_domid domid =
368368

369369
(* Any other domains with the same UUID as the one we are destroying.
370370
* There can be one during a localhost migration. *)
371-
let other_domains = List.filter (fun x -> Xenctrl_uuid.uuid_of_handle x.Xenctrl.handle = uuid)
372-
(Xenctrl.domain_getinfolist xc 0) in
371+
let other_domains = Xenops_helpers.domains_of_uuid ~xc uuid in
373372
debug "VM = %s; domid = %d; Domain.destroy: other domains with the same UUID = [ %a ]"
374373
(Uuid.to_string uuid) domid
375374
(fun () -> String.concat "; ")
@@ -423,7 +422,7 @@ let destroy (task: Xenops_task.t) ~xc ~xs ~qemu_domid domid =
423422
List.iter (fun x ->
424423
log_exn_continue ("waiting for hotplug for " ^ (string_of_device x))
425424
(fun () ->
426-
Hotplug.release task ~xs x; released := x :: !released
425+
Hotplug.release task ~xc ~xs x; released := x :: !released
427426
) ()
428427
) all_devices;
429428

xc/hotplug.ml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,15 +196,24 @@ let wait_for_connect (task: Xenops_task.t) ~xs (x: device) =
196196
(* Wait for the device to be released by the backend driver (via udev) and
197197
then deallocate any resources which are registered (in our private bit of
198198
xenstore) *)
199-
let release (task:Xenops_task.t) ~xs (x: device) =
199+
let release (task:Xenops_task.t) ~xc ~xs (x: device) =
200200
debug "Hotplug.release: %s" (string_of_device x);
201201
wait_for_unplug task ~xs x;
202202
let hotplug_path = get_hotplug_path x in
203-
let private_data_path = get_private_data_path_of_device x in
203+
let private_data_path =
204+
(* Only remove the private data path if there isn't another domain on the host for the same VM.
205+
* There can be multiple during a localhost migration, and the private path is indexed by UUID, not domid. *)
206+
let vm_uuid = Xenops_helpers.uuid_of_domid ~xs x.frontend.domid in
207+
let domains_of_vm = Xenops_helpers.domains_of_uuid ~xc vm_uuid in
208+
if List.length domains_of_vm <= 1 then
209+
Some (get_private_data_path_of_device x)
210+
else
211+
None
212+
in
204213
let extra_xenserver_path = extra_xenserver_path_of_device xs x in
205214
Xs.transaction xs (fun t ->
206215
t.Xst.rm hotplug_path;
207-
t.Xst.rm private_data_path;
216+
Opt.iter t.Xst.rm private_data_path;
208217
t.Xst.rm extra_xenserver_path
209218
)
210219

xc/xenops_helpers.ml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,20 @@ let with_xc_and_xs f =
2424
let with_xc_and_xs_final f cf =
2525
with_xc_and_xs (fun xc xs -> finally (fun () -> f xc xs) cf)
2626

27+
exception Domain_not_found
28+
29+
let uuid_of_domid ~xs domid =
30+
try
31+
let vm = xs.Xs.getdomainpath domid ^ "/vm" in
32+
let vm_dir = xs.Xs.read vm in
33+
match Uuidm.of_string (xs.Xs.read (vm_dir ^ "/uuid")) with
34+
| Some uuid -> uuid
35+
| None -> raise Domain_not_found
36+
with _ ->
37+
raise Domain_not_found
38+
39+
let domains_of_uuid ~xc uuid =
40+
List.filter
41+
(fun x -> Xenctrl_uuid.uuid_of_handle x.Xenctrl.handle = uuid)
42+
(Xenctrl.domain_getinfolist xc 0)
43+

xc/xenops_server_xen.ml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ let create_vbd_frontend ~xc ~xs task frontend_domid vdi =
228228
backend_domid = backend_domid;
229229
} in
230230
let device = Xenops_task.with_subtask task "Vbd.add"
231-
(fun () -> Device.Vbd.add task ~xs ~hvm:false t frontend_domid) in
231+
(fun () -> Device.Vbd.add task ~xc ~xs ~hvm:false t frontend_domid) in
232232
Device device
233233

234234
let block_device_of_vbd_frontend = function
@@ -1987,7 +1987,7 @@ module VBD = struct
19871987
} in
19881988
let device =
19891989
Xenops_task.with_subtask task (Printf.sprintf "Vbd.add %s" (id_of vbd))
1990-
(fun () -> Device.Vbd.add task ~xs ~hvm x frontend_domid) in
1990+
(fun () -> Device.Vbd.add task ~xc ~xs ~hvm x frontend_domid) in
19911991

19921992
(* We store away the disk so we can implement VBD.stat *)
19931993
Opt.iter (fun disk -> xs.Xs.write (vdi_path_of_device ~xs device) (disk |> rpc_of_disk |> Jsonrpc.to_string)) vbd.backend;
@@ -2063,7 +2063,7 @@ module VBD = struct
20632063
Opt.iter
20642064
(fun device ->
20652065
Xenops_task.with_subtask task (Printf.sprintf "Vbd.release %s" (id_of vbd))
2066-
(fun () -> Device.Vbd.release task ~xs device);
2066+
(fun () -> Device.Vbd.release task ~xc ~xs device);
20672067
) device;
20682068
(* If we have a qemu frontend, detach this too. *)
20692069
Mutex.execute dB_m (fun () ->
@@ -2356,7 +2356,7 @@ module VIF = struct
23562356
Xenops_task.with_subtask task (Printf.sprintf "Vif.hard_shutdown %s" (id_of vif))
23572357
(fun () -> (if force then Device.hard_shutdown else Device.clean_shutdown) task ~xs device);
23582358
Xenops_task.with_subtask task (Printf.sprintf "Vif.release %s" (id_of vif))
2359-
(fun () -> Device.Vif.release task ~xs device) in
2359+
(fun () -> Device.Vif.release task ~xc ~xs device) in
23602360
destroy device;
23612361

23622362
Opt.iter (fun vm_t ->

0 commit comments

Comments
 (0)